- preg_match: documentation ( source)
- str_replace: documentation ( source)
- strtolower: documentation ( source)
<?php
$html = <<<HTML
<div>
<p>Paragraph one is okay </p>
<h2>This will work without problem</h2>
<p>Paragraph two is okay </p>
<h2><a href="#">This heading has anchor</a></h2>
<p>Paragraph one is okay </p>
<h2> This heading start with space</h2>
<p>Paragraph two is okay </p>
<h3>1. <a href="https://www.example1.com/">This wont work</a></h3>
<p>Paragraph one is okay </p>
<h3>2. <a href="https://www.example2.com/">Not working</a></h3>
<p>Paragraph two is okay </p>
<h3>3. Neither this one</h3>
<h3>But this works again</h3>
</div>
HTML;
$dom = new DOMDocument;
$dom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$xpath = new DOMXPath($dom);
foreach ($xpath->query("//h2 | //h3") as $node) {
if (preg_match('~^\s*(?:\d+\.)?\s*\K\S+(?:\s+\S+){1,2}~', $node->nodeValue, $m)) {
$node->setAttribute('id', str_replace(' ', '-', strtolower($m[0])));
}
}
echo $dom->saveHTML();