- libxml_use_internal_errors: documentation ( source)
<?php
$txt = <<<EOT
<p>The breed was first...</p>
<p>Semencic credits his...</p>
<h1>Appearance</h1>
<p>The breed's distinctive...</p>
<p>It should be symmetrical...</p>
<figure id="attachment_6" style="width: 840px" class="wp-caption alignnone">
<img class="size-large wp-image-6" src="...jpg" alt="boerboel appearance" width="840" height="746">
<figcaption class="wp-caption-text">The dog appearance.</figcaption>
</figure>
<h1>Requirements</h1>
<p>Prospective owners....</p>
<p>These dogs....</p>
<h2>A Little Warning!</h2>
<p>If you are considering...</p>
<blockquote>
<p>According to...</p>
<p>Source: http://...</p>
</blockquote>
<p>Although more suitable...</p>
EOT;
$doc = new DOMDocument();
libxml_use_internal_errors(1);
$doc->loadHTML( "<body>$txt</body>", LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD );
$xpath = new DOMXpath( $doc );
$nodes = $xpath->query( "/*/*" );
foreach( $nodes as $key => $node ) echo $node->nodeName . PHP_EOL;