<?php
$html = <<<HTML
foo <a href='http://test.com'>fóo</a> lórem
bár ipsum bar food foo bark. <a>bar</a> not á test
HTML;
$lookup = [
'foo' => 'h3',
'bar' => 'h2'
];
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$xpath = new DOMXPath($dom);
$regexNeedles = [];
foreach ($lookup as $word => $tagName) {
$regexNeedles[] = preg_quote($word, '~');
}
$pattern = '~\b(' . implode('|', $regexNeedles) . ')\b~iu' ;
foreach($xpath->query('//*[not(self::a)]/text()') as $textNode) {
$newNodes = [];
$hasReplacement = false;
foreach (preg_split($pattern, $textNode->nodeValue, 0, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE) as $fragment) {
$fragmentLower = strtolower($fragment);
if (isset($lookup[$fragmentLower])) {
$hasReplacement = true;
$a = $dom->createElement($lookup[$fragmentLower]);
$a->nodeValue = $fragment;
$newNodes[] = $a;
} else {
$newNodes[] = $dom->createTextNode($fragment);
}
}
if ($hasReplacement) {
$newFragment = $dom->createDocumentFragment();
foreach ($newNodes as $newNode) {
$newFragment->appendChild($newNode);
}
$textNode->parentNode->replaceChild($newFragment, $textNode);
}
}
echo substr(trim(utf8_decode($dom->saveHTML($dom->documentElement))), 3, -4);
preferences:
77.06 ms | 402 KiB | 5 Q