<?php
/**
* Return the coefficient of two items based on Jaccard index
* http://en.wikipedia.org/wiki/Jaccard_index
*
* Example:
*
* $tags1 = "code, php, jaccard, test, items";
* $tags2 = "test, code";
* echo getSimilarityCoefficient( $tags1, $tags2 ); // 0.28
*
* $str1 = "similarity coefficient of two items";
* $str2 = "two items are cool";
* echo getSimilarityCoefficient( $str1, $str2, " " ); // 0.44
*
* @param string $item1
* @param string $item2
* @param string $separator
* @return float
* @author Henrique Hohmann
* @author Alex Rock
* @version 0.2
*/
function getSimilarityCoefficient( $item1, $item2, $separator = "," ) {
$item1 = array_unique(array_map('trim', explode( $separator, strtolower($item1) )));
$item2 = array_unique(array_map('trim', explode( $separator, strtolower($item2) )));
$arr_intersection = array_intersect( $item2, $item1 );
$arr_union = array_unique(array_merge( $item1, $item2 ));
$coefficient = count( $arr_intersection ) / count( $arr_union );
return $coefficient;
}
$tags1 = "code, test";
$tags2 = "test, code";
echo getSimilarityCoefficient( $tags1, $tags2 );
echo "\n";
$tags1 = "test, code";
$tags2 = "test, code";
echo getSimilarityCoefficient( $tags1, $tags2 );
echo "\n";
$str1 = "two items that are cool";
$str2 = "two items are cool";
echo getSimilarityCoefficient( $str1, $str2, " " );
echo "\n";
$str1 = "I've got two COOL items";
$str2 = "two items are cool";
echo getSimilarityCoefficient( $str1, $str2, " " );
- Output for 7.2.0 - 7.2.34, 7.3.0 - 7.3.33, 7.4.0 - 7.4.33, 8.0.0 - 8.0.30, 8.1.0 - 8.1.33, 8.2.0 - 8.2.29, 8.3.0 - 8.3.25, 8.4.1 - 8.4.12
- 1
1
0.8
0.5
preferences:
137.97 ms | 407 KiB | 5 Q