- substr: documentation ( source)
- var_dump: documentation ( source)
- array_intersect: documentation ( source)
- array_values: documentation ( source)
- explode: documentation ( source)
<?php
//http://stackoverflow.com/questions/653157/a-better-similarity-ranking-algorithm-for-variable-length-strings
class StringMatch{
public static function compare($a, $b){
$pairsA = self::wordLetterPairs($a);
$pairsB = self::wordLetterPairs($b);
$union = count($pairsA) + count($pairsB);
$intersect = count(array_intersect($pairsA, $pairsB));
return 2*$intersect/$union;
}
public static function letterPairs($str){
$pairCount = strlen($str)-1; //don't compute every time in if
$pairs = array();
for($i = 0; $i < $pairCount; $i++){
$pairs[] = substr($str, $i, 2);
}
return $pairs;
}
public static function wordLetterPairs($str){
$allPairs = array();
$str = explode(" ", $str);
//get pairs in each word
foreach($str as $w){
$pairs = self::letterPairs($w);
//make sure, no doubled pairs are included
foreach($pairs as $p){
$allPairs[$p] = $p;
}
}
return array_values($allPairs);
}
}
var_dump(StringMatch::compare("GutenTag Frau Fickerin", "Fickerin Frau GutenTag"));