<?php
$iterations = 10000;
function str_random($length=16) {
$pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
return substr(str_shuffle(str_repeat($pool, 5)), 0, $length);
}
$strings = array();
for ($i = 0; $i < $iterations; $i++) {
$strings[] = str_random(rand(16, 64));
}
echo 'String size: ', round(strlen(serialize($strings))/ 1024, 2), 'kb', PHP_EOL;
$hashes_md5 = array();
$hashes_crc32 = array();
$t_md5 = microtime(true);
foreach ($strings as $s) {
$hashes_md5['x'.(crc32($s) >> 8)] = 0;
}
$t_md5 = microtime(true) - $t_md5;
//$max = bindec('111111111111111111111111111111');
//$max = 0xFFFFFFF; //F; +5% size boost, collision: 0.1689%, time boost: 0%
$max = 0xFFFFFF; //FF; +14% size boost, collision: 2.908%, time boost: -9%
//$max = 0xFFFFF; //FFF; +47% size boost, collision: 35.5696%, time boost: 0%
// modify 10% of data
for ($i = 0; $i < $iterations; $i++) {
if (rand(1,100) > 90) {
$strings[$i] = str_random(rand(16, 64));
}
}
$t_crc32 = microtime(true);
foreach ($strings as $s) {
$hashes_crc32['x'.(crc32($s) >> 8)] = 0;
}
$t_crc32 = microtime(true) - $t_crc32;
function compareShingles(array $first, array $second)
{
$t_crc32 = microtime(true);
echo 'func intersect: ', count(array_intersect_key($first, $second)), PHP_EOL;
echo ' func unique: ', count(array_merge($first, $second)), PHP_EOL;
$t_crc32 = microtime(true) - $t_crc32;
echo ' time: ', $t_crc32, PHP_EOL;
$t_crc32 = microtime(true);
$diff_count = count(array_diff_key($first, $second));
$intersect_count = count($first) - $diff_count;// count(array_intersect_key($first, $second));
$unique_count = count($second) + $diff_count;
echo 'calculated intersect: ', $intersect_count, PHP_EOL;
echo ' calculated unique: ', $unique_count, PHP_EOL;
$t_crc32 = microtime(true) - $t_crc32;
echo ' time: ', $t_crc32, PHP_EOL;
//return round(($intersect_count / $unique_count) / 0.01, 2);
}
$hashes_md5_flip = $hashes_md5; //array_flip($hashes_md5);
$hashes_crc32_flip = $hashes_crc32; //array_flip($hashes_crc32);
//ksort($hashes_md5_flip);
//ksort($hashes_crc32_flip);
$t_merge = microtime(true);
$hashes_diff = array_diff_key($hashes_md5_flip, $hashes_crc32_flip);
$t_merge = microtime(true) - $t_merge;
$t_diff = microtime(true);
$hashes_merge = array_merge($hashes_md5_flip, $hashes_crc32_flip);
$t_diff = microtime(true) - $t_diff;
$t_intersect = microtime(true);
$hashes_intersect = array_intersect_key($hashes_md5_flip, $hashes_crc32_flip);
$t_intersect = microtime(true) - $t_intersect;
echo PHP_EOL;
echo ' merge time: ', $t_merge, PHP_EOL;
echo ' merge count: ', count($hashes_merge), PHP_EOL;
echo ' diff time: ', $t_merge, PHP_EOL;
echo ' diff count: ', count($hashes_diff), PHP_EOL;
echo ' intersect time: ', $t_intersect, PHP_EOL;
echo 'intersect count: ', count($hashes_intersect), PHP_EOL;
echo ' md5 count: ', count($hashes_md5_flip), ' with diff:', count($hashes_md5_flip)+count($hashes_diff), ' intersect: ', count($hashes_md5_flip)-count($hashes_diff), PHP_EOL;
echo ' crc32 count: ', count($hashes_crc32_flip), ' with diff:', count($hashes_crc32_flip)+count($hashes_diff), ' intersect: ', count($hashes_crc32_flip)-count($hashes_diff), PHP_EOL;
echo PHP_EOL;
compareShingles($hashes_md5_flip, $hashes_crc32_flip);
echo PHP_EOL;
exit;
$s_md5 = strlen(serialize($hashes_md5));
$s_crc32 = strlen(serialize($hashes_crc32));
$u_hashes_md5 = array_unique($hashes_md5);
$u_hashes_crc32 = array_unique($hashes_crc32);
$su_md5 = strlen(serialize($u_hashes_md5));
$su_crc32 = strlen(serialize($u_hashes_crc32));
echo 'time md5: ', $t_md5, PHP_EOL;
echo 'time crc32: ', $t_crc32, PHP_EOL;
echo 'time boost: ', round(($t_md5 - $t_crc32) / $t_md5, 2) * 100, '%', PHP_EOL;
echo PHP_EOL;
echo 'size md5: ', $s_md5, PHP_EOL;
echo 'size crc32: ', $s_crc32, PHP_EOL;
echo 'size boost: ', round(($s_md5 - $s_crc32) / $s_md5, 2) * 100, '%', PHP_EOL;
echo PHP_EOL;
echo 'unique size md5: ', $su_md5, ' (', $cnt_hashes_md5 = count($u_hashes_md5), ')', PHP_EOL;
echo 'unique size crc32: ', $su_crc32, ' (', $cnt_hashes_crc32 = count($u_hashes_crc32), ')', PHP_EOL;
echo 'unique size boost: ', round(($su_md5 - $su_crc32) / $su_md5, 2) * 100, '%', PHP_EOL;
echo ' collision: ', round(($cnt_hashes_md5 - $cnt_hashes_crc32) / $cnt_hashes_md5, 6) * 100, '%', PHP_EOL;
preferences:
31.49 ms | 402 KiB | 5 Q