3v4l.org

run code in 200+ php & hhvm versions
Bugs & Features
<?php function find_similar3($needle,$str,$keep_needle_order=false){ if(!is_string($needle)||!is_string($str)) { return false; } $valid=array(); //get encodings and words from haystack and needle setlocale(LC_CTYPE, 'en_GB.UTF8'); $encoding_s=mb_detect_encoding($str); $encoding_n=mb_detect_encoding($needle); mb_regex_encoding ($encoding_n); $pneed=array_filter(mb_split('\W',$needle)); mb_regex_encoding ($encoding_s); $pstr=array_filter(mb_split('\W',$str)); foreach($pneed as $k=>$word)//loop trough needle's words { foreach($pstr as $key=>$w) { if($encoding_n!==$encoding_s) {//if $encodings are not the same make some transliteration // $tmp_word=($encoding_n!=='ASCII')?to_ascii($word,$encoding_n):$word; // $tmp_w=($encoding_s!=='ASCII')?to_ascii($w,$encoding_s):$w; $tmp_word=($encoding_n!=='ASCII')?mb_convert_encoding($word,$encoding_n, 'ASCII'):$word; $tmp_w=($encoding_s!=='ASCII')?mb_convert_encoding($w,$encoding_s, 'ASCII'):$w; }else { $tmp_word=$word; $tmp_w=$w; } $tmp[$tmp_w]=levenshtein($tmp_w,$tmp_word);//collect levenshtein distances $keys[$tmp_w]=array($key,$w); } $nominees=array_flip(array_keys($tmp,min($tmp)));//get the nominees $tmp=10000; foreach($nominees as $nominee=>$idx) {//test sound like to get more precision $idx=levenshtein(metaphone($nominee),metaphone($tmp_word)); if($idx<$tmp){ $answer=$nominee;//get the winner } unset($nominees[$nominee]); } if(!$keep_needle_order){ $valid[$keys[$answer][0]]=$keys[$answer][1];//get the right form of the winner } else{ $valid[$k]=$keys[$answer][1]; } $tmp=$nominees=array();//clean a little for the next iteration } if(!$keep_needle_order) { ksort($valid); } $valid=array_values($valid);//get only the values /*return the array of the closest value to the needle according to this algorithm of course*/ return $valid; } var_dump(find_similar3('i knew you love me','finally i know you loved me and all my pets')); var_dump(find_similar3('I you love','This is a demo text and I love you about this')); var_dump(find_similar3('a unik idia','I have a unique idea. Do you need?')); var_dump(find_similar3("Goebel, Weiss, Goethe, Goethe und Goetz",'Weiß, Goldmann, Göbel, Weiss, Göthe, Goethe und Götz')); var_dump(find_similar3('Ḽơᶉëᶆ ȋṕšᶙṁ ḍỡḽǭᵳ ʂǐť ӓṁệẗ, ĉṓɲṩḙċťᶒțûɾ ấɖḯƥĭṩčįɳġ ḝłįʈ', 'Ḽơᶉëᶆ ȋṕšᶙṁ ḍỡḽǭᵳ ʂǐť ӓṁệẗ, ĉṓɲṩḙċťᶒțûɾ ấɖḯƥĭṩčįɳġ ḝłįʈ, șếᶑ ᶁⱺ ẽḭŭŝḿꝋď ṫĕᶆᶈṓɍ ỉñḉīḑȋᵭṵńť ṷŧ ḹẩḇőꝛế éȶ đꝍꞎôꝛȇ ᵯáꞡᶇā ąⱡîɋṹẵ.'));
based on l3Bot
Output for 4.3.4 - 5.0.3, 5.5.0 - 5.6.38, hhvm-3.10.1 - 3.22.0, 7.0.0 - 7.3.0rc3
array(5) { [0]=> string(1) "i" [1]=> string(4) "know" [2]=> string(3) "you" [3]=> string(5) "loved" [4]=> string(2) "me" } array(3) { [0]=> string(1) "I" [1]=> string(4) "love" [2]=> string(3) "you" } array(3) { [0]=> string(1) "a" [1]=> string(6) "unique" [2]=> string(4) "idea" } array(3) { [0]=> string(5) "Weiss" [1]=> string(6) "Goethe" [2]=> string(3) "und" } array(8) { [0]=> string(13) "Ḽơᶉëᶆ" [1]=> string(13) "ȋṕšᶙṁ" [2]=> string(14) "ḍỡḽǭᵳ" [3]=> string(6) "ʂǐť" [4]=> string(11) "ӓṁệẗ" [5]=> string(26) "ĉṓɲṩḙċťᶒțûɾ" [6]=> string(23) "ấɖḯƥĭṩčįɳġ" [7]=> string(9) "ḝłįʈ" }
Output for 5.0.4 - 5.4.45
array(5) { [0]=> string(1) "i" [1]=> string(4) "know" [2]=> string(3) "you" [3]=> string(5) "loved" [4]=> string(2) "me" } array(3) { [0]=> string(1) "I" [1]=> string(4) "love" [2]=> string(3) "you" } array(3) { [0]=> string(1) "a" [1]=> string(6) "unique" [2]=> string(4) "idea" } array(3) { [0]=> string(5) "Weiss" [1]=> string(6) "Goethe" [2]=> string(3) "und" } array(11) { [0]=> string(5) "Ḽơ" [1]=> string(2) "ë" [2]=> string(7) "ȋṕš" [3]=> string(3) "ṁ" [4]=> string(11) "ḍỡḽǭ" [5]=> string(6) "ʂǐť" [6]=> string(11) "ӓṁệẗ" [7]=> string(17) "ĉṓɲṩḙċť" [8]=> string(6) "țûɾ" [9]=> string(23) "ấɖḯƥĭṩčįɳġ" [10]=> string(9) "ḝłįʈ" }
Output for 4.3.2 - 4.3.3
Fatal error: Call to undefined function: mb_regex_encoding() in /in/5D8j2 on line 15
Process exited with code 255.
Output for 4.3.0 - 4.3.1
Fatal error: Call to undefined function: mb_regex_encoding() in /in/5D8j2 on line 15