<?php
$iterations = 100000;
function str_random($length=16) {
$pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
return substr(str_shuffle(str_repeat($pool, 5)), 0, $length);
}
$strings = array();
for ($i = 0; $i < $iterations; $i++) {
$strings[] = str_random(rand(16, 64));
}
echo 'String size: ', round(strlen(serialize($strings))/ 1024, 2), 'kb', PHP_EOL;
$hashes_md5 = array();
$hashes_crc32 = array();
$t_md5 = microtime(true);
foreach ($strings as $s) {
$hashes_md5['x'.(crc32($s) >> 8)] = 0;
}
$t_md5 = microtime(true) - $t_md5;
//$max = bindec('111111111111111111111111111111');
//$max = 0xFFFFFFF; //F; +5% size boost, collision: 0.1689%, time boost: 0%
$max = 0xFFFFFF; //FF; +14% size boost, collision: 2.908%, time boost: -9%
//$max = 0xFFFFF; //FFF; +47% size boost, collision: 35.5696%, time boost: 0%
// modify 10% of data
for ($i = 0; $i < $iterations; $i++) {
if (rand(1,100) > 90) {
$strings[$i] = openssl_random_pseudo_bytes(rand(16, 50));
}
}
$t_crc32 = microtime(true);
foreach ($strings as $s) {
$hashes_crc32['x'.(crc32($s) >> 8)] = 0;
}
$t_crc32 = microtime(true) - $t_crc32;
function compareShingles(array $first, array $second)
{
$t_crc32 = microtime(true);
echo 'func intersect: ', count(array_intersect_key($first, $second)), PHP_EOL;
echo ' func unique: ', count(array_merge($first, $second)), PHP_EOL;
$t_crc32 = microtime(true) - $t_crc32;
echo ' time: ', $t_crc32, PHP_EOL;
$t_crc32 = microtime(true);
$diff_count = count(array_diff_key($first, $second));
$intersect_count = count($first) - $diff_count;// count(array_intersect_key($first, $second));
$unique_count = count($second) + $diff_count;
echo 'calculated intersect: ', $intersect_count, PHP_EOL;
echo ' calculated unique: ', $unique_count, PHP_EOL;
$t_crc32 = microtime(true) - $t_crc32;
echo ' time: ', $t_crc32, PHP_EOL;
//return round(($intersect_count / $unique_count) / 0.01, 2);
}
$hashes_md5_flip = $hashes_md5; //array_flip($hashes_md5);
$hashes_crc32_flip = $hashes_crc32; //array_flip($hashes_crc32);
//ksort($hashes_md5_flip);
//ksort($hashes_crc32_flip);
$t_merge = microtime(true);
$hashes_diff = array_diff_key($hashes_md5_flip, $hashes_crc32_flip);
$t_merge = microtime(true) - $t_merge;
$t_diff = microtime(true);
$hashes_merge = array_merge($hashes_md5_flip, $hashes_crc32_flip);
$t_diff = microtime(true) - $t_diff;
$t_intersect = microtime(true);
$hashes_intersect = array_intersect_key($hashes_md5_flip, $hashes_crc32_flip);
$t_intersect = microtime(true) - $t_intersect;
echo PHP_EOL;
echo ' merge time: ', $t_merge, PHP_EOL;
echo ' merge count: ', count($hashes_merge), PHP_EOL;
echo ' diff time: ', $t_merge, PHP_EOL;
echo ' diff count: ', count($hashes_diff), PHP_EOL;
echo ' intersect time: ', $t_intersect, PHP_EOL;
echo 'intersect count: ', count($hashes_intersect), PHP_EOL;
echo ' md5 count: ', count($hashes_md5_flip), ' with diff:', count($hashes_md5_flip)+count($hashes_diff), ' intersect: ', count($hashes_md5_flip)-count($hashes_diff), PHP_EOL;
echo ' crc32 count: ', count($hashes_crc32_flip), ' with diff:', count($hashes_crc32_flip)+count($hashes_diff), ' intersect: ', count($hashes_crc32_flip)-count($hashes_diff), PHP_EOL;
echo PHP_EOL;
compareShingles($hashes_md5_flip, $hashes_crc32_flip);
echo PHP_EOL;
preferences:
55.85 ms | 402 KiB | 5 Q