- libxml_use_internal_errors: documentation ( source)
<?php
$html=<<<doc
<html>
<head>
<title>My page</title>
<script>
//<![CDATA[
$(function(){
$('.ajax').trigger('change');
})
//]]></script>
<style>ul li ol li{color;red;}</style>
</head>
<body>
<div>
<ul>
<li>Languages
<ol>
<li>PHP</li>
<li class='noparse'>C++</li>
</ol>
</li>
</ul>
<span>inline text</span>
<p class="generic">some long text data</p>
<a href="https://stackoverflow.com" title="resource hub">Stack Overflow</a>
<a href="https://google.nl" title="Google" class="inline-a noparse otherclass">Google</a>
<img class="img-responsive parse round red" src="" alt="round image" />
<img class="img-responsive noparse round red" src="" alt="square image" />
</div>
</body>
</html>
doc;
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->preserveWhiteSpace = false;
$dom->loadHTML($html, LIBXML_SCHEMA_CREATE);
$xpath = new DOMXPath($dom);
$exclude='.generic,.noparse';
foreach ($xpath->query("//*/text()[not(@class='$exclude')]|//a/@title[not(@class='$exclude')]|//img/@alt[not(@class='$exclude')]") as $node)
{
$node->textContent=$node->textContent.' powered by sometext';
}
echo $dom->saveHTML();
?>