3v4l.org

run code in 300+ PHP versions simultaneously
<?php $HTML = <<< HTML some data from <span class="positive">blahblah test</span> was not <span class="positive">statistically <span class="positive">valid</span></span> <span class="positive">not statistically valid</span> HTML; $listOfNegatives = ['not statistically valid', 'blahblah test']; /** * Extract all words and their corresponsing positions * @param [string] $HTML * @return [array] $HTMLWords */ function extractWords($HTML) { $HTMLWords = []; preg_match_all("~\b(?<![</])\w+\b(?![^<>]+>)~", $HTML, $words, PREG_OFFSET_CAPTURE); foreach ($words[0] as $word) { $HTMLWords[$word[1]] = $word[0]; } return $HTMLWords; } /** * Check if any of our defined list values can be found in an ordered-array of exctracted words * @param [array] $HTMLWords * @param [array] $listOfNegatives * @return [array] $subString */ function checkNegativesExistence($HTMLWords, $listOfNegatives) { $counter = 0; $previousWordOffset = null; $subStrings = []; foreach ($listOfNegatives as $i => $string) { $stringWords = explode(" ", $string); $wordIndex = 0; foreach ($HTMLWords as $offset => $HTMLWord) { if ($wordIndex > count($stringWords) - 1) { $wordIndex = 0; $counter++; } if ($stringWords[$wordIndex] == $HTMLWord) { $subStrings[$counter][] = [$HTMLWord, $offset, $previousWordOffset]; $wordIndex++; } elseif (isset($subStrings[$counter]) && count($subStrings[$counter]) > 0) { unset($subStrings[$counter]); $wordIndex = 0; } $previousWordOffset = $offset + strlen($HTMLWord); } $counter++; } return $subStrings; } /** * Substitute newly matched strings with negative HTML wrapper * @param [array] $subStrings * @param [string] $HTML * @return [string] $HTML */ function negativeHighlight($subStrings, $HTML) { $offset = 0; $HTMLLength = strlen($HTML); foreach ($subStrings as $key => $value) { $arrayOfWords = []; foreach ($value as $word) { $arrayOfWords[] = $word[0]; if (current($value) == $value[0]) { $start = substr($HTML, $word[1], strlen($word[0])) == $word[0] ? $word[2] : $word[2] + $offset; } if (current($value) == end($value)) { $defaultLength = $word[1] + strlen($word[0]) - $start; $length = substr($HTML, $word[1], strlen($word[0])) === $word[0] ? $defaultLength : $defaultLength + $offset; } } $string = implode(" ", $arrayOfWords); $HTML = substr_replace($HTML, "<span class=\"negative\">{$string}</span>", $start, $length); if ($HTMLLength > strlen($HTML)) { $offset = -($HTMLLength - strlen($HTML)); } elseif ($HTMLLength < strlen($HTML)) { $offset = strlen($HTML) - $HTMLLength; } } return $HTML; } $newHTML = negativeHighlight(checkNegativesExistence(extractWords($HTML), $listOfNegatives), $HTML); echo preg_replace_callback("~(<span[^>]+>([^<]*+<(?!/)(?:([a-zA-Z0-9]++)[^>]*>[^<]*</\3>|(?2)))*[^<]*</span>|(?'single'</[^<>]+>|<[^<>]+>))~", function ($match) { if (isset($match['single'])) { return null; } return $match[1]; }, $newHTML );
Output for 5.5.0 - 5.5.37, 5.6.0 - 5.6.28, 7.0.0 - 7.0.20, 7.1.0 - 7.1.25, 7.2.0 - 7.2.33, 7.3.0 - 7.3.33, 7.4.0 - 7.4.33, 8.0.0 - 8.0.30, 8.1.0 - 8.1.33, 8.2.0 - 8.2.29, 8.3.0 - 8.3.27, 8.4.1 - 8.4.14
some data from<span class="negative">blahblah test</span> was<span class="negative">not statistically valid</span><span class="negative">not statistically valid</span>

preferences:
549.67 ms | 408 KiB | 5 Q