<?php
class Html {
public static function load($html) {
$document = <<<EOD
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head>
<body>!html</body>
</html>
EOD;
// PHP's \DOMDocument serialization adds extra whitespace when the markup
// of the wrapping document contains newlines, so ensure we remove all
// newlines before injecting the actual HTML body to be processed.
$document = strtr($document, ["\n" => '', '!html' => $html]);
$dom = new \DOMDocument();
// Ignore warnings during HTML soup loading.
@$dom->loadHTML($document);
return $dom;
}
/**
* Converts the body of a \DOMDocument back to an HTML snippet.
*
* The function serializes the body part of a \DOMDocument back to an (X)HTML
* snippet. The resulting (X)HTML snippet will be properly formatted to be
* compatible with HTML user agents.
*
* @param \DOMDocument $document
* A \DOMDocument object to serialize, only the tags below the first <body>
* node will be converted.
*
* @return string
* A valid (X)HTML snippet, as a string.
*/
public static function serialize(\DOMDocument $document) {
$body_node = $document->getElementsByTagName('body')->item(0);
$html = '';
if ($body_node !== NULL) {
foreach ($body_node->childNodes as $node) {
$html .= $document->saveXML($node);
}
}
return $html;
}
}
$source = <<<EOT
<table>
<tr>
<td>L</td>
<td>R</td>
</tr>
</table>
EOT;
$document = Html::load($source);
$out['document'] = "\n" . $document->saveHTML();
$body = $document->getElementsByTagName('body')->item(0);
$out['body'] = "\n" . $document->saveXML($body);
$table = $body->childNodes[0];
$out['table'] = "\n" . $document->saveXML($table);
$out['serialize'] = "\n" . Html::serialize($document);
var_export($out);