3v4l.org

run code in 300+ PHP versions simultaneously
<?php /** * Performs a words comparision between $s1 and $s2. This function counts every word only once, so 'hello hello' * would be 100% similar to 'hello'. Both strings will get converted to lowercase, with its diacritical marks * (´, ¨, ^, `, ~, etc.) stripped out. This behaviour is very much like a 'keywords search'. * @param string $s1 First string to compare * @param string $s2 Second string to compare * @param null|string[] $skipWords Value-only array of words that should'nt be taken into account when comparing $s1 and $s2 * @param null|string[] $skipText Value-only array of text that should'nt be taken into account when comparing $s1 and $s2. * Please note that this text will be stripped from the end, start and middle parts of words. * @return float|int Percent of similarity between $s1 and $s2, where 1 represents 100% and 0 represents 0% */ function compareWords($s1, $s2, $skipWords = [ 'en', 'de', 'del', 'los', 'la', 'in', 'from', 'the' ], $skipText = [ '.', ',', ';', ':' ]) { if ($s1 === null || $s2 === null) return 0; if ($skipText !== null && count($skipText) > 0) { $s1 = str_replace($skipText, '', $s1); $s2 = str_replace($skipText, '', $s2); } if ($skipWords !== null && count($skipWords) > 0) { $skipWords = array_map(function ($item) { return preg_quote($item, '/'); }, $skipWords); $skipWordsRegex = '/(?:(?<=\s)|^)(?:' . implode('|', $skipWords) . ')(?:(?=\s)|$)/'; $s1 = preg_replace($skipWordsRegex, '', $s1); $s2 = preg_replace($skipWordsRegex, '', $s2); } $s1 = trim(UString::lowerCase(preg_replace('/\s+/', ' ', UString::removeDiacritics($s1)))); $s2 = trim(UString::lowerCase(preg_replace('/\s+/', ' ', UString::removeDiacritics($s2)))); if (strlen($s1) === 0 || strlen($s2) === 0) return 0; $s1Words = array_unique(explode(' ', $s1)); $s2Words = array_unique(explode(' ', $s2)); $s1WordsCount = count($s1Words); $s2WordsCount = count($s2Words); // make sure $s1Words is the smaller array, to have a smaller cycle if ($s1WordsCount > $s2WordsCount) { $temp = $s1Words; $s1Words = $s2Words; $s2Words = $temp; } $s2Words = array_flip($s2Words); $maxWords = max($s1WordsCount, $s2WordsCount); $matches = 0; foreach ($s1Words as $s1Word) { if (array_key_exists($s1Word, $s2Words)) $matches++; } return $matches / $maxWords; } var_dump(compareWords('The tomato sauce.', 'tomato sauce'));

preferences:
57.04 ms | 402 KiB | 5 Q