<?php
$array = array (
// Sub-array 1
array (
// Story 'Monkey 1' - Has identical sub-sub-arrays 'Monkey 2' and 'Monkey 3' and will be merged with them.
array (
"header" => "This is a story about a monkey.",
'keywords' => array( "Trees", "Monkey", "Flying", "Drink", "Vacation", "Coconut", "Big", "Bonobo", "Climbing")
),
// Story 'Cat 1' - Has identical sub-sub-array 'Cat 2' and will be merged with it.
array (
"header" => "Here's a catarific story about a cat",
'keywords' => array( "meauw", "raaaw", "kitty", "growup", "Fun", "claws", "fish", "salmon")
)
),
// Sub-array 2
array (
// Story 'Monkey 2' - Has identical sub-sub-arrays 'Monkey 1' and 'Monkey 3' and will be merged with them.
array (
"header" => "This is another, but different story, about a monkey.",
'keywords' => array( "Monkey", "Big", "Trees", "Bonobo", "Fun", "Dance", "Cow", "Coconuts")
),
// Story 'Cat 2' - Has identical sub-sub-array 'Cat 1' and will be merged with it.
array (
"header" => "Here's a different story about a cat",
'keywords' => array( "meauwe", "ball", "cat", "kitten", "claws", "sleep", "fish", "purr")
)
),
// Sub-array 3
array (
// Story 'Monkey 3' - Has identical sub-sub-arrays 'Monkey 1' and 'Monkey 2' and will be merged with them.
array (
"header" => "This is a third story about a monkey.",
'keywords' => array( "Jungle", "tree", "monkey", "Bonobo", "Fun", "Dance", "climbing", "Coconut", "pretty")
),
// Story 'Fireman 1' - Has no identical sub-sub-arrays and will not be merged.
array (
"header" => "This is a story about a fireman",
'keywords' => array( "fire", "explosion", "burning", "rescue", "happy", "help", "water", "car")
)
)
);
//flatten array to make it simpler
$new =[];
foreach($array as $subarr){
$new = array_merge($new, $subarr);
}
$threshold = 3;
$lev_point_value = 1;
$merged=[];
foreach($new as $key => $story){
$word_count = 0;
// create regex pattern to find similar items
$words = "/" . implode("|", $story["keywords"]) . "/i";
foreach($new as $key2 => $story2){
// only loop new items and items that has not been merged already
if($key != $key2 && $key2 > $key && !in_array($key2, $merged)){
foreach ($story['keywords'] as $item1){
foreach ($story2["keywords"] as $item2){
if (levenshtein($item1, $item2) <= $lev_point_value) {
$word_count++;
// If the count of words from preg_grep is above threshold it's mergable
if($word_count >= $threshold){
// debug
//echo $key . " " . $key2 . "\n";
//echo $story["header"] . " = " . $story2["header"] ."\n\n";
// if the item does not exist create it first to remove notices
if(!isset($res[$key])) $res[$key] = ["header" => [], "keywords" =>[]];
// add headers
$res[$key]["header"][] = $story["header"];
$res[$key]["header"][] = $story2["header"];
// only keep unique
$res[$key]["header"] = array_unique($res[$key]["header"]);
// add keywords and remove duplicates
$res[$key]["keywords"] = array_merge($res[$key]["keywords"], $story["keywords"], $story2["keywords"]);
$res[$key]["keywords"] = array_unique($res[$key]["keywords"]);
// add key2 to merged so that we don't merge this again.
$merged[] = $key2;
}
}
}
}
}
}
}
var_dump($new);