<?php
$examples = array(
'Valid ASCII' => array("a", true),
'Valid 2 Octet Sequence' => array("\xc3\xb1", true),
'Invalid 2 Octet Sequence' => array("\xc3\x28", false),
'Invalid Sequence Identifier' => array("\xa0\xa1", false),
'Valid 3 Octet Sequence' => array("\xe2\x82\xa1", true),
'Invalid 3 Octet Sequence (in 2nd Octet)' => array("\xe2\x28\xa1", false),
'Invalid 3 Octet Sequence (in 3rd Octet)' => array("\xe2\x82\x28", false),
'Valid 4 Octet Sequence' => array("\xf0\x90\x8c\xbc", true),
'Invalid 4 Octet Sequence (in 2nd Octet)' => array("\xf0\x28\x8c\xbc", false),
'Invalid 4 Octet Sequence (in 3rd Octet)' => array("\xf0\x90\x28\xbc", false),
'Invalid 4 Octet Sequence (in 4th Octet)' => array("\xf0\x28\x8c\x28", false),
'Valid 5 Octet Sequence (but not Unicode!)' => array("\xf8\xa1\xa1\xa1\xa1", false),
'Valid 6 Octet Sequence (but not Unicode!)' => array("\xfc\xa1\xa1\xa1\xa1\xa1", false),
);
function checkWithPreg($str) { return preg_match('//u', $str); }
function checkWithMbstring($str) { return mb_check_encoding($str, 'UTF-8'); }
function runTests($method, $count) {
global $examples;
$start_m = microtime();
$start = time();
for($i=0; $i<$count; $i++) {
foreach($examples as $title => $arr) {
list($str, $bool) = $arr;
$method($str);
}
}
$end_m = microtime();
$end = time();
$bench = ($end - $start) + ($end_m - $start_m);
echo "$method $bench\n";
}
runTests('checkWithMbstring', 10000);
runTests('checkWithPreg', 10000);
preferences:
33.21 ms | 402 KiB | 5 Q