<?php
$needle = '0772-51-AA-655';
$haystack = array(
'0772-51-AA-6559',
'0772-51-AA-6557',
'0772-51-AA-6552'
);
$result = findFuzzyMatches($needle, $haystack);
var_dump($result);
function findFuzzyMatches ($needle, $haystack, $tolerance = 1) {
/*
Returns (fuzzy) matches between $needle and elements of $haystack.
The selected matches are those having Levenshtein distance up to
$tolerance. Give $tolerance = -1 to get all matches.
Return type is an associative array: the keys are the Levenshtein
distance (integer), the values are sub-arrays containing the
elements of $haystack having that distance.
E.g.:
$needle = 'abc';
$haystack = array('abc', 'abd', 'abe', 'xyz');
returns:
array(
0 => array('abc'),
1 => array('abd', 'abe'),
3 => array('xyz')
)
*/
$matches = array(); // key: distance, value: sub-array with candidates having that distance
foreach ($haystack as $candidate) {
// NOTE: Slowness? Use levenshtein, which is the faster function of the two.
//$distance = levenshtein($needle, $candidate); // Downside: character swaps cost 2 instead of 1.
$distance = DamerauLevenshtein($needle, $candidate);
if ($distance <= $tolerance || $tolerance == -1) {
if (!array_key_exists($distance, $matches)) {
$matches[$distance] = array();
}
$matches[$distance][] = $candidate;
}
}
return $matches;
}
function DamerauLevenshtein($str1, $str2) {
$d = Array();
$lenStr1 = strlen($str1);
$lenStr2 = strlen($str2);
if ($lenStr1 == 0) {
return $lenStr2;
}
if ($lenStr2 == 0) {
return $lenStr1;
}
for ($i=0; $i <= $lenStr1; $i++) {
$d[$i] = Array();
$d[$i][0] = $i;
}
for ($j=0; $j <= $lenStr2; $j++) {
$d[0][$j] = $j;
}
for ($i=1; $i <= $lenStr1; $i++) {
for ($j=1; $j <= $lenStr2; $j++) {
$cost = substr($str1, $i - 1, 1) == substr($str2, $j - 1, 1) ? 0 : 1;
$d[$i][$j] = min(
$d[$i - 1][$j] + 1, // deletion
$d[$i][$j - 1] + 1, // insertion
$d[$i - 1][$j - 1] + $cost // substitution
);
if (
$i > 1 &&
$j > 1 &&
substr($str1, $i - 1, 1) == substr($str2, $j - 2, 1) &&
substr($str1, $i - 2, 1) == substr($str2, $j - 1, 1)
) {
$d[$i][$j] = min(
$d[$i][$j],
$d[$i - 2][$j - 2] + $cost // transposition
);
}
}
}
return $d[$lenStr1][$lenStr2];
}
?>
- Output for git.master, git.master_jit, rfc.property-hooks
- array(1) {
[1]=>
array(3) {
[0]=>
string(15) "0772-51-AA-6559"
[1]=>
string(15) "0772-51-AA-6557"
[2]=>
string(15) "0772-51-AA-6552"
}
}
This tab shows result from various feature-branches currently under review by the php developers. Contact me to have additional branches featured.
Active branches
Archived branches
Once feature-branches are merged or declined, they are no longer available. Their functionality (when merged) can be viewed from the main output page
preferences:
32.35 ms | 401 KiB | 8 Q