45 if (self::hasNormalizerSupport()) {
46 $nfc = normalizer_normalize($string);
47 if (is_string($nfc)) {
53 $string = self::transliterateAscii($string);
56 $string = preg_replace(
'~<([a-zA-Z][^>]*)>~',
' $1 ', $string);
60 $string = strtr($string, [
62 "\xE2\x82\xAC" =>
' E ',
63 "\xC2\xA3" =>
' GBP ',
69 $pattern .=
'\x00-\x08';
70 $pattern .=
'\x0b\x0c';
71 $pattern .=
'\x0e-\x1f';
72 $pattern .=
'\x21-\x2c';
73 $pattern .=
'\x2e\x2f';
74 $pattern .=
'\x3a-\x40';
75 $pattern .=
'\x5b-\x5e';
77 $pattern .=
'\x7b-\x7f';
79 $string = preg_replace($pattern,
'', $string);
80 $string = strtr($string,
'',
'');
84 $string = is_callable(
'mb_strtolower') ? mb_strtolower($string,
'UTF-8') : strtolower($string);
90 $pattern .=
'\x00-\x2f';
91 $pattern .=
'\x3a-\x40';
92 $pattern .=
'\x5b-\x60';
93 $pattern .=
'\x7b-\x7f';
97 $words = preg_split($pattern, $string, -1, PREG_SPLIT_NO_EMPTY);
100 $words = array_map(
'urlencode', $words);
119 $map = self::getAsciiTranslitMap();
122 return strtr($utf8, $map);
133 "\xC2\xAA" =>
'a',
"\xC2\xBA" =>
'o',
"\xC3\x80" =>
'A',
134 "\xC3\x81" =>
'A',
"\xC3\x82" =>
'A',
"\xC3\x83" =>
'A',
135 "\xC3\x84" =>
'A',
"\xC3\x85" =>
'A',
"\xC3\x86" =>
'AE',
136 "\xC3\x87" =>
'C',
"\xC3\x88" =>
'E',
"\xC3\x89" =>
'E',
137 "\xC3\x8A" =>
'E',
"\xC3\x8B" =>
'E',
"\xC3\x8C" =>
'I',
138 "\xC3\x8D" =>
'I',
"\xC3\x8E" =>
'I',
"\xC3\x8F" =>
'I',
139 "\xC3\x90" =>
'D',
"\xC3\x91" =>
'N',
"\xC3\x92" =>
'O',
140 "\xC3\x93" =>
'O',
"\xC3\x94" =>
'O',
"\xC3\x95" =>
'O',
141 "\xC3\x96" =>
'O',
"\xC3\x99" =>
'U',
"\xC3\x9A" =>
'U',
142 "\xC3\x9B" =>
'U',
"\xC3\x9C" =>
'U',
"\xC3\x9D" =>
'Y',
143 "\xC3\x9E" =>
'TH',
"\xC3\x9F" =>
'ss',
"\xC3\xA0" =>
'a',
144 "\xC3\xA1" =>
'a',
"\xC3\xA2" =>
'a',
"\xC3\xA3" =>
'a',
145 "\xC3\xA4" =>
'a',
"\xC3\xA5" =>
'a',
"\xC3\xA6" =>
'ae',
146 "\xC3\xA7" =>
'c',
"\xC3\xA8" =>
'e',
"\xC3\xA9" =>
'e',
147 "\xC3\xAA" =>
'e',
"\xC3\xAB" =>
'e',
"\xC3\xAC" =>
'i',
148 "\xC3\xAD" =>
'i',
"\xC3\xAE" =>
'i',
"\xC3\xAF" =>
'i',
149 "\xC3\xB0" =>
'd',
"\xC3\xB1" =>
'n',
"\xC3\xB2" =>
'o',
150 "\xC3\xB3" =>
'o',
"\xC3\xB4" =>
'o',
"\xC3\xB5" =>
'o',
151 "\xC3\xB6" =>
'o',
"\xC3\xB8" =>
'o',
"\xC3\xB9" =>
'u',
152 "\xC3\xBA" =>
'u',
"\xC3\xBB" =>
'u',
"\xC3\xBC" =>
'u',
153 "\xC3\xBD" =>
'y',
"\xC3\xBE" =>
'th',
"\xC3\xBF" =>
'y',
156 "\xC4\x80" =>
'A',
"\xC4\x81" =>
'a',
"\xC4\x82" =>
'A',
157 "\xC4\x83" =>
'a',
"\xC4\x84" =>
'A',
"\xC4\x85" =>
'a',
158 "\xC4\x86" =>
'C',
"\xC4\x87" =>
'c',
"\xC4\x88" =>
'C',
159 "\xC4\x89" =>
'c',
"\xC4\x8A" =>
'C',
"\xC4\x8B" =>
'c',
160 "\xC4\x8C" =>
'C',
"\xC4\x8D" =>
'c',
"\xC4\x8E" =>
'D',
161 "\xC4\x8F" =>
'd',
"\xC4\x90" =>
'D',
"\xC4\x91" =>
'd',
162 "\xC4\x92" =>
'E',
"\xC4\x93" =>
'e',
"\xC4\x94" =>
'E',
163 "\xC4\x95" =>
'e',
"\xC4\x96" =>
'E',
"\xC4\x97" =>
'e',
164 "\xC4\x98" =>
'E',
"\xC4\x99" =>
'e',
"\xC4\x9A" =>
'E',
165 "\xC4\x9B" =>
'e',
"\xC4\x9C" =>
'G',
"\xC4\x9D" =>
'g',
166 "\xC4\x9E" =>
'G',
"\xC4\x9F" =>
'g',
"\xC4\xA0" =>
'G',
167 "\xC4\xA1" =>
'g',
"\xC4\xA2" =>
'G',
"\xC4\xA3" =>
'g',
168 "\xC4\xA4" =>
'H',
"\xC4\xA5" =>
'h',
"\xC4\xA6" =>
'H',
169 "\xC4\xA7" =>
'h',
"\xC4\xA8" =>
'I',
"\xC4\xA9" =>
'i',
170 "\xC4\xAA" =>
'I',
"\xC4\xAB" =>
'i',
"\xC4\xAC" =>
'I',
171 "\xC4\xAD" =>
'i',
"\xC4\xAE" =>
'I',
"\xC4\xAF" =>
'i',
172 "\xC4\xB0" =>
'I',
"\xC4\xB1" =>
'i',
"\xC4\xB2" =>
'IJ',
173 "\xC4\xB3" =>
'ij',
"\xC4\xB4" =>
'J',
"\xC4\xB5" =>
'j',
174 "\xC4\xB6" =>
'K',
"\xC4\xB7" =>
'k',
"\xC4\xB8" =>
'k',
175 "\xC4\xB9" =>
'L',
"\xC4\xBA" =>
'l',
"\xC4\xBB" =>
'L',
176 "\xC4\xBC" =>
'l',
"\xC4\xBD" =>
'L',
"\xC4\xBE" =>
'l',
177 "\xC4\xBF" =>
'L',
"\xC5\x80" =>
'l',
"\xC5\x81" =>
'L',
178 "\xC5\x82" =>
'l',
"\xC5\x83" =>
'N',
"\xC5\x84" =>
'n',
179 "\xC5\x85" =>
'N',
"\xC5\x86" =>
'n',
"\xC5\x87" =>
'N',
180 "\xC5\x88" =>
'n',
"\xC5\x89" =>
'N',
"\xC5\x8A" =>
'n',
181 "\xC5\x8B" =>
'N',
"\xC5\x8C" =>
'O',
"\xC5\x8D" =>
'o',
182 "\xC5\x8E" =>
'O',
"\xC5\x8F" =>
'o',
"\xC5\x90" =>
'O',
183 "\xC5\x91" =>
'o',
"\xC5\x92" =>
'OE',
"\xC5\x93" =>
'oe',
184 "\xC5\x94" =>
'R',
"\xC5\x95" =>
'r',
"\xC5\x96" =>
'R',
185 "\xC5\x97" =>
'r',
"\xC5\x98" =>
'R',
"\xC5\x99" =>
'r',
186 "\xC5\x9A" =>
'S',
"\xC5\x9B" =>
's',
"\xC5\x9C" =>
'S',
187 "\xC5\x9D" =>
's',
"\xC5\x9E" =>
'S',
"\xC5\x9F" =>
's',
188 "\xC5\xA0" =>
'S',
"\xC5\xA1" =>
's',
"\xC5\xA2" =>
'T',
189 "\xC5\xA3" =>
't',
"\xC5\xA4" =>
'T',
"\xC5\xA5" =>
't',
190 "\xC5\xA6" =>
'T',
"\xC5\xA7" =>
't',
"\xC5\xA8" =>
'U',
191 "\xC5\xA9" =>
'u',
"\xC5\xAA" =>
'U',
"\xC5\xAB" =>
'u',
192 "\xC5\xAC" =>
'U',
"\xC5\xAD" =>
'u',
"\xC5\xAE" =>
'U',
193 "\xC5\xAF" =>
'u',
"\xC5\xB0" =>
'U',
"\xC5\xB1" =>
'u',
194 "\xC5\xB2" =>
'U',
"\xC5\xB3" =>
'u',
"\xC5\xB4" =>
'W',
195 "\xC5\xB5" =>
'w',
"\xC5\xB6" =>
'Y',
"\xC5\xB7" =>
'y',
196 "\xC5\xB8" =>
'Y',
"\xC5\xB9" =>
'Z',
"\xC5\xBA" =>
'z',
197 "\xC5\xBB" =>
'Z',
"\xC5\xBC" =>
'z',
"\xC5\xBD" =>
'Z',
198 "\xC5\xBE" =>
'z',
"\xC5\xBF" =>
's',
200 "\xC8\x98" =>
'S',
"\xC8\x99" =>
's',
201 "\xC8\x9A" =>
'T',
"\xC8\x9B" =>
't',
203 "\xC6\xA0" =>
'O',
"\xC6\xA1" =>
'o',
204 "\xC6\xAF" =>
'U',
"\xC6\xB0" =>
'u',
206 "\xE1\xBA\xA6" =>
'A',
"\xE1\xBA\xA7" =>
'a',
207 "\xE1\xBA\xB0" =>
'A',
"\xE1\xBA\xB1" =>
'a',
208 "\xE1\xBB\x80" =>
'E',
"\xE1\xBB\x81" =>
'e',
209 "\xE1\xBB\x92" =>
'O',
"\xE1\xBB\x93" =>
'o',
210 "\xE1\xBB\x9C" =>
'O',
"\xE1\xBB\x9D" =>
'o',
211 "\xE1\xBB\xAA" =>
'U',
"\xE1\xBB\xAB" =>
'u',
212 "\xE1\xBB\xB2" =>
'Y',
"\xE1\xBB\xB3" =>
'y',
214 "\xE1\xBA\xA2" =>
'A',
"\xE1\xBA\xA3" =>
'a',
215 "\xE1\xBA\xA8" =>
'A',
"\xE1\xBA\xA9" =>
'a',
216 "\xE1\xBA\xB2" =>
'A',
"\xE1\xBA\xB3" =>
'a',
217 "\xE1\xBA\xBA" =>
'E',
"\xE1\xBA\xBB" =>
'e',
218 "\xE1\xBB\x82" =>
'E',
"\xE1\xBB\x83" =>
'e',
219 "\xE1\xBB\x88" =>
'I',
"\xE1\xBB\x89" =>
'i',
220 "\xE1\xBB\x8E" =>
'O',
"\xE1\xBB\x8F" =>
'o',
221 "\xE1\xBB\x94" =>
'O',
"\xE1\xBB\x95" =>
'o',
222 "\xE1\xBB\x9E" =>
'O',
"\xE1\xBB\x9F" =>
'o',
223 "\xE1\xBB\xA6" =>
'U',
"\xE1\xBB\xA7" =>
'u',
224 "\xE1\xBB\xAC" =>
'U',
"\xE1\xBB\xAD" =>
'u',
225 "\xE1\xBB\xB6" =>
'Y',
"\xE1\xBB\xB7" =>
'y',
227 "\xE1\xBA\xAA" =>
'A',
"\xE1\xBA\xAB" =>
'a',
228 "\xE1\xBA\xB4" =>
'A',
"\xE1\xBA\xB5" =>
'a',
229 "\xE1\xBA\xBC" =>
'E',
"\xE1\xBA\xBD" =>
'e',
230 "\xE1\xBB\x84" =>
'E',
"\xE1\xBB\x85" =>
'e',
231 "\xE1\xBB\x96" =>
'O',
"\xE1\xBB\x97" =>
'o',
232 "\xE1\xBB\xA0" =>
'O',
"\xE1\xBB\xA1" =>
'o',
233 "\xE1\xBB\xAE" =>
'U',
"\xE1\xBB\xAF" =>
'u',
234 "\xE1\xBB\xB8" =>
'Y',
"\xE1\xBB\xB9" =>
'y',
236 "\xE1\xBA\xA4" =>
'A',
"\xE1\xBA\xA5" =>
'a',
237 "\xE1\xBA\xAE" =>
'A',
"\xE1\xBA\xAF" =>
'a',
238 "\xE1\xBA\xBE" =>
'E',
"\xE1\xBA\xBF" =>
'e',
239 "\xE1\xBB\x90" =>
'O',
"\xE1\xBB\x91" =>
'o',
240 "\xE1\xBB\x9A" =>
'O',
"\xE1\xBB\x9B" =>
'o',
241 "\xE1\xBB\xA8" =>
'U',
"\xE1\xBB\xA9" =>
'u',
243 "\xE1\xBA\xA0" =>
'A',
"\xE1\xBA\xA1" =>
'a',
244 "\xE1\xBA\xAC" =>
'A',
"\xE1\xBA\xAD" =>
'a',
245 "\xE1\xBA\xB6" =>
'A',
"\xE1\xBA\xB7" =>
'a',
246 "\xE1\xBA\xB8" =>
'E',
"\xE1\xBA\xB9" =>
'e',
247 "\xE1\xBB\x86" =>
'E',
"\xE1\xBB\x87" =>
'e',
248 "\xE1\xBB\x8A" =>
'I',
"\xE1\xBB\x8B" =>
'i',
249 "\xE1\xBB\x8C" =>
'O',
"\xE1\xBB\x8D" =>
'o',
250 "\xE1\xBB\x98" =>
'O',
"\xE1\xBB\x99" =>
'o',
251 "\xE1\xBB\xA2" =>
'O',
"\xE1\xBB\xA3" =>
'o',
252 "\xE1\xBB\xA4" =>
'U',
"\xE1\xBB\xA5" =>
'u',
253 "\xE1\xBB\xB0" =>
'U',
"\xE1\xBB\xB1" =>
'u',
254 "\xE1\xBB\xB4" =>
'Y',
"\xE1\xBB\xB5" =>
'y',
266 $form_c =
"\xC3\x85";
267 $form_d =
"A\xCC\x8A";
268 $ret = (function_exists(
'normalizer_normalize')
269 && $form_c === normalizer_normalize($form_d));
static transliterateAscii($utf8)
Transliterate Western multibyte chars to ASCII.
static hasNormalizerSupport()
Tests that "normalizer_normalize" exists and works.
static urlize($string, $separator= '-')
Create a version of a string for embedding in a URL.
static getAsciiTranslitMap()
Get array of UTF-8 (NFC) character replacements.