- var_dump: documentation ( source)
- strpos: documentation ( source)
- trim: documentation ( source)
- preg_replace: documentation ( source)
<?php
$content = "λ";
$dom = new \DOMDocument('1.0', 'UTF-8');
$dom->encoding = "UTF-8";
$dom->strictErrorChecking = false;
$dom->substituteEntities = false;
$dom->formatOutput = false;
// Little hack to force UTF-8
if (strpos($content, '<?xml encoding') === false) {
$hack = strpos($content, '<body') === false ? '<?xml encoding="UTF-8"><body>' : '<?xml encoding="UTF-8">';
$loaded = $dom->loadHTML($hack . $content);
} else {
$loaded = $dom->loadHTML($content);
}
foreach ($dom->childNodes as $item) {
if ($item->nodeType === XML_PI_NODE) {
$dom->removeChild($item); // remove encoding hack
break;
}
}
// Remove added body & doctype
$exportedContent = preg_replace(array(
"/^\<\!DOCTYPE.*?<html><body>/si",
"!</body></html>$!si"),
"", $dom->saveHTML());
var_dump(trim($exportedContent)); // Should return string(8) "λ"