- strtr: documentation ( source)
- str_replace: documentation ( source)
<?php
function load($html) {
$document = <<<EOD
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head>
<body>!html</body>
</html>
EOD;
// PHP's \DOMDocument::saveXML() encodes carriage returns as so
// normalize all newlines to line feeds.
$html = str_replace(["\r\n", "\r"], "\n", $html);
// PHP's \DOMDocument serialization adds extra whitespace when the markup
// of the wrapping document contains newlines, so ensure we remove all
// newlines before injecting the actual HTML body to be processed.
$document = strtr($document, ["\n" => '', '!html' => $html]);
$dom = new \DOMDocument();
// Ignore warnings during HTML soup loading.
@$dom->loadHTML($document, LIBXML_NOBLANKS);
return $dom;
}
function _serialize(\DOMDocument $document) {
$body_node = $document->getElementsByTagName('body')->item(0);
$html = '';
if ($body_node !== NULL) {
foreach ($body_node->childNodes as $node) {
$html .= $document->saveXML($node);
}
}
return $html;
}
print _serialize(load('<a class="sample" href="http://www.example.com/partial/path">foo</a>'));
print PHP_EOL;
print _serialize(load('<a class="sample" href="http://www.example.com/partial/path'));