<?php
$content = "<p>äöü</p><p>ß</p>"; // multibyte UTF-8 input
$doc = new DOMDocument('1.0', 'UTF-8');
// DOMDocument has no info about the charset/encoding, defaults to latin-1
$doc->loadHTML($content, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
var_dump($doc->saveHTML());
// Output: string(75) "<p>äöü<p>ß</p></p> "
// 1) element "reordering" issue (see: http://stackoverflow.com/questions/29493678/loadhtml-libxml-html-noimplied-on-an-html-fragment-generates-incorrect-tags)
// 2) encoding issue
- Output for 5.6.0 - 5.6.40, 7.0.0 - 7.0.33, 7.1.0 - 7.1.33, 7.2.0 - 7.2.34, 7.3.0 - 7.3.33, 7.4.0 - 7.4.33, 8.0.0 - 8.0.30, 8.1.0 - 8.1.33, 8.2.0 - 8.2.29, 8.3.0 - 8.3.25, 8.4.1 - 8.4.12
- string(75) "<p>äöü<p>ß</p></p>
"
preferences:
152.47 ms | 408 KiB | 5 Q