| Moderator
Registriert seit: 11.05.2008
Beiträge: 6.269
| Hier nochmal die verständliche Version mit erweitertem Kommentar und Beispiel, ich hoffe das klappt jetzt - habe das Akcent a entfernt. Die API hat sich etwas verändert, das Alphabet muss jetzt nicht mehr übergeben werden, sondern ist vordefiniert. PHP-Code: <?php /** * @desc * Sort arrays with multibyte values using a predefined or custom alphabet. * * @author Christian Reinecke <reinecke@bajoodoo.de> * @version 1.1 * @since 2009-01-15 * * @example * <code> * // quick * $array = array("Zebra", "Engländer", "England", "Chinese", "Äffchen", "Überseeboot", * "Unterseeboot", "ß", "Sz", "Bürger", "Burger", "burger", "bürger", "ÄÖÜ"); * AIS_Util_MultibyteSort::staticSort($array); * print_r($array); * // Array ( * // [0] => Äffchen [1] => ÄÖÜ [2] => Burger [3] => Bürger [4] => burger [5] => bürger * // [6] => Chinese [7] => England [8] => Engländer [9] => Sz [10] => ß [11] => Unterseeboot * // [12] => Überseeboot [13] => Zebra * // ) * * * // with custom setup * $mbsort = new AIS_Util_MultibyteSort("UTF-8", true); * $mbsort->sort($array); * print_r($array) * // same result as above * $mbsort->sort($otherArray); * </code> * * @todo * This class can not handle unknown special characters (fallback to default behaviour), * which makes it desirable to find a global, sorted alphabet. Feel free to add more special chars. * * Don't forget to send the correct charset in your header * <code> * header("Content-type: text/html; charset=utf-8"); * </code> */ class AIS_Util_MultibyteSort { const EncodingUtf8 = "UTF-8"; const EncodingDefault = self::EncodingUtf8;
const CasePreferenceUpper = true; const CasePreferenceLower = false; const CasePreferenceDefault = self::CasePreferenceUpper;
protected static $instances = array(); protected static $alphabetLoose = array("A" => "Ä", "a" => "äæå", "O" => "Ö", "o" => "ö", "U" => "Ü", "u" => "ü", "s" => "ß");
/** * multibyte encoding * @see self::EncodingDefault * @var string */ protected $encoding;
/** * array map (single special char => ASCII-128) * @var array */ protected $alphabet;
/** * case flag, whether upper case chars are preferred to lower case chars * @see self::CasePreferenceDefault * @var bool */ protected $case;
/** * @param array|string [optional] loose alphabet or key for predefined loose alphabet * @param string [optional] encoding for multibyte functions * @param bool [optional] prefer uppercase char to lower-case (Aa or aA) */ public function __construct($encoding = self::EncodingDefault, $preferUpperCase = self::CasePreferenceDefault) { $this->setEncoding($encoding); $this->setUpperCasePreference($preferUpperCase); $this->loadAlphabet(); }
/** * @desc * The instance for all static calls is stored in a static property, so you can use the * instance once again. But if you prefer to use this class only once, use the __construct * method. There's no overhead then. */ public static function getInstance($encoding = self::EncodingDefault, $preferUpperCase = self::CasePreferenceDefault) { $hash = crc32($encoding . $preferUpperCase); if (!array_key_exists($hash, self::$instances)) { self::$instances[$hash] = new self($encoding, $preferUpperCase); } return self::$instances[$hash]; }
/** * @desc shortcut */ public static function sortStatic(&$array, $encoding = self::EncodingDefault, $preferUpperCase = self::CasePreferenceDefault) { $self = self::getInstance($encoding, $preferUpperCase); $self->sort($array); }
public static function asortStatic(&$array, $encoding = self::EncodingDefault, $preferUpperCase = self::CasePreferenceDefault) { $self = self::getInstance($encoding, $preferUpperCase); $self->asort($array); }
/** * @desc sort array with the constructor-given settings */ public function sort(&$array) { /** * Got this message: "Warning: usort() [function.usort]: Invalid comparison function"? * Then do not call this method from a static context and change your error_reporting level * to E_ALL | E_STRICT. */ usort($array, array($this, "sortString")); }
public function asort(&$array) { uasort($array, array($this, "sortString")); }
/** * @desc multibyte string compare (something like we would expect behind mb_strcmp()) * @return int sort order value (1, 0, -1) */ protected function sortString($a, $b) { $ax = mb_strlen($a, $this->encoding); $bx = mb_strlen($b, $this->encoding); for ($i = 0, $x = min($ax, $bx); $i < $x; ++$i) { $result = $this->charCmp(mb_substr($a, $i, 1, $this->encoding), mb_substr($b, $i, 1, $this->encoding)); if ($result != 0) { return $result; } } return $this->intCmp($ax, $bx); }
/** * @desc integer compare * @return int sort order value (-1, 0, 1) */ protected function intCmp($a, $b) { return ($a == $b) ? 0 : ($a < $b ? -1 : 1); }
/** * @desc multibyte char compare * @return int sort order value (-1, 0, 1) */ protected function charCmp($a, $b) { // check if characters are known as special chars $ai = isset($this->alphabet[$a]); // ai = a isset $bi = isset($this->alphabet[$b]);
if ($ai && $bi) { // both are known special chars $ar = $this->alphabet[$a]; // ar = a representation (ASCII-128) $br = $this->alphabet[$b]; $result = $this->charCaseCmp($ar, $br); if ($result == 0 && $a != $b) { // they aren't equal, but their representation is, so check position in original array $ap = mb_strpos(self::$alphabetLoose[$ar], $a, 0, $this->encoding); $bp = mb_strpos(self::$alphabetLoose[$br], $b, 0, $this->encoding); $result = $this->intCmp($ap, $bp); } } else if ($ai) { // $a is a known special char, $b not $result = $this->charCaseCmp($this->alphabet[$a], $b); // so they are not equal, $result = 0 means $b is "smaller" $result = ($result == 0) ? 1 : $result; } else if ($bi) { // $b is a known special char, $a not $result = $this->charCaseCmp($a, $this->alphabet[$b]); // so they are not equal; $result = 0 means $a is "smaller" $result = ($result == 0) ? -1 : $result; } else { // both are unknown characters $result = $this->charCaseCmp($a, $b); } return $result; }
/** * @desc multibyte char case compare * @return sort order value (-1, 0, 1) */ protected function charCaseCmp($a, $b) { if ($a == $b) { // they are equal, no check required return 0; } $A = mb_strtoupper($a, $this->encoding); $B = mb_strtoupper($b, $this->encoding); $result = strcmp($A, $B); if ($result == 0) { // their mb_strtoupper() value is equal, select compare value depending on case preference $result = ($A != $a) ? $this->case : ($this->case * -1); } return $result; }
protected function loadAlphabet() { if (empty($this->alphabet)) { // load only once per instance // $alphabetLoose is required to differ between special chars with the same ASCII-128 representation foreach (self::$alphabetLoose as $order => $char) { for ($i = 0, $x = mb_strlen($char, $this->encoding); $i < $x; ++$i) { // use each multibyte char as key with its ASCII-128 representation as key $this->alphabet[mb_substr($char, $i, 1, $this->encoding)] = $order; } } } }
protected function setEncoding($encoding) { $this->encoding = $encoding; }
protected function setUpperCasePreference($preferUpperCase) { $this->case = $preferUpperCase ? 1 : -1; } } ?> |