@ 2014-01-30T05:39:59Z <?php
class Similarity
{
protected $data = null;
protected $distance = null;
public function __construct($data, $distance)
{
$this->data = (string)$data;
$this->distance = (int)$distance;
}
public function checkMatch($search, callable $checker=null, array $args=[], $return=false)
{
$solid = $this->normalizeInput($this->data, $search);
$data = $solid['data'];
$search = $solid['search'];
$data = preg_split('/\s+/', $data);
foreach($this->getAssoc($data, substr_count($search, ' ')+1) as $assoc)
{
foreach($this->getPermutations($assoc) as $ordered)
{
$ordered = join(' ', $ordered);
$result = call_user_func_array($checker, array_merge([$ordered, $search], $args));
if($result<=$this->distance)
{
return $return?$ordered:true;
}
}
}
return $return?null:false;
}
protected function getPermutations(array $input)
{
if(count($input)==1)
{
return [$input];
}
$result = [];
foreach($input as $key=>$element)
{
foreach($this->getPermutations(array_diff_key($input, [$key=>0])) as $subarray)
{
$result[] = array_merge([$element], $subarray);
}
}
return $result;
}
protected function nextAssoc($assoc)
{
if(false !== ($pos = strrpos($assoc, '01')))
{
$assoc[$pos] = '1';
$assoc[$pos+1] = '0';
return substr($assoc, 0, $pos+2).
str_repeat('0', substr_count(substr($assoc, $pos+2), '0')).
str_repeat('1', substr_count(substr($assoc, $pos+2), '1'));
}
return false;
}
protected function getAssoc(array $data, $count=2)
{
if(count($data)<$count)
{
return null;
}
$assoc = str_repeat('0', count($data)-$count).str_repeat('1', $count);
$result = [];
do
{
$result[]=array_intersect_key($data, array_filter(str_split($assoc)));
}
while($assoc=$this->nextAssoc($assoc));
return $result;
}
protected function normalizeInput($data, $search)
{
$data = preg_split('/\s+/', trim(strtolower($data)), -1, PREG_SPLIT_NO_EMPTY);
$search = preg_split('/\s+/', trim(strtolower($search)), -1, PREG_SPLIT_NO_EMPTY);
if(count($data)<count($search))
{
$temp = $data;
$data = join(' ', $search);
$search = join(' ', $temp);
}
else
{
$data = join(' ', $data);
$search = join(' ', $search);
}
return ['data'=>$data, 'search'=>$search];
}
}
$data = 'Louise Bro';
$search = [
'Louise Gulbæk Bro',
'Niels Faurskov',
'Niels Faurskov Andersen',
'Nils Faurskov Andersen',
'Nils Andersen',
'niels faurskov',
'niels Fæurskov',
'niffddels Faurskævffre',
'jens grunnet'
];
$checker = new Similarity($data, 2);
echo(sprintf('Testing "%s"'.PHP_EOL.PHP_EOL, $data));
foreach($search as $name)
{
echo(sprintf(
'Name "%s" has %s'.PHP_EOL,
$name,
($result=$checker->checkMatch($name, 'levenshtein', [], 1))
?sprintf('matched with "%s"', $result)
:'mismatched'
)
);
}
Enable javascript to submit You have javascript disabled. You will not be able to edit any code.
Output for git.master , git.master_jit , rfc.property-hooks Testing "Louise Bro"
Name "Louise Gulbæk Bro" has matched with "louise bro"
Name "Niels Faurskov" has mismatched
Name "Niels Faurskov Andersen" has mismatched
Name "Nils Faurskov Andersen" has mismatched
Name "Nils Andersen" has mismatched
Name "niels faurskov" has mismatched
Name "niels Fæurskov" has mismatched
Name "niffddels Faurskævffre" has mismatched
Name "jens grunnet" has mismatched
This tab shows result from various feature-branches currently under review by the php developers. Contact me to have additional branches featured.
Active branches Archived branches Once feature-branches are merged or declined, they are no longer available. Their functionality (when merged) can be viewed from the main output page
preferences:dark mode live preview
37.68 ms | 402 KiB | 8 Q