- mb_convert_encoding: documentation ( source)
<?php
$html_string = <<<STR
<p>paragraph
<a>link</a>
</p>
<div class="myclass">
<div>something</div>
<div style="mystyle">something</div>
<b><a href="#">link</a></b>
<b><a href="#" name="a name">link</a></b>
<b style="color:red">bold</b>
<img src="../path" alt="something" />
<img src="../path" alt="something" class="myclass" />
</div>
STR;
$dom = new DOMDocument; // init new DOMDocument
$dom->loadHTML(mb_convert_encoding($html_string, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$xpath = new DOMXPath($dom);
$nodes = $xpath->query('//@*');
foreach ($nodes as $node) {
if($node->nodeName != "src" && $node->nodeName != "href" && $node->nodeName != "alt") {
$node->parentNode->removeAttribute($node->nodeName);
}
}
echo $dom->saveHTML(); // output cleaned HTML