- preg_match: documentation ( source)
- preg_replace: documentation ( source)
<?php
$crap = "\u200e\u206a\u200f\u200e\u202d\u206f\u200c\u206a\u202c\u202b\u200f\u206f\u202b\u206e\u206e\u202b\u206e\u202e\u206c\u202b\u206d\u200e\u202a\u206d\u200b\u206c\u206a\u202e\u206b\u200b\u200f\u200b\u202d\u202d\u206f\u206b\u200c\u206a\u206f\u206b\u202e";
function charset_decode_utf_8 ($string) {
/* Only do the slow convert if there are 8-bit characters */
/* avoid using 0xA0 (\240) in ereg ranges. RH73 does not like that */
if (!preg_match("/[\200-\237]/", $string)
&& !preg_match("/[\241-\377]/", $string)
) {
return $string;
}
// decode three byte unicode characters
$string = preg_replace("/([\340-\357])([\200-\277])([\200-\277])/e",
"'&#'.((ord('\\1')-224)*4096 + (ord('\\2')-128)*64 + (ord('\\3')-128)).';'",
$string
);
// decode two byte unicode characters
$string = preg_replace("/([\300-\337])([\200-\277])/e",
"'&#'.((ord('\\1')-192)*64+(ord('\\2')-128)).';'",
$string
);
return $string;
}
$str = charset_decode_utf_8($crap);
echo $str;