<?php
$html = 'This should be extracted <p>I do not want this</p> This should also be extracted <a>This may appear after other tags and I do not want this</a>';
$doc = new DOMDocument();
$doc->loadHTML("<div>$html</div>", LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED);
$xpath = new DOMXPath($doc);
$texts = array();
foreach ($xpath->query('/div/text()') as $text) {
$texts[] = $text->nodeValue;
}
print_r($texts);
Array
(
[0] => This should be extracted
[1] => This should also be extracted
)
Output for 8.3.5
Warning: PHP Startup: Unable to load dynamic library 'sodium.so' (tried: /usr/lib/php/8.3.5/modules/sodium.so (libsodium.so.23: cannot open shared object file: No such file or directory), /usr/lib/php/8.3.5/modules/sodium.so.so (/usr/lib/php/8.3.5/modules/sodium.so.so: cannot open shared object file: No such file or directory)) in Unknown on line 0
Array
(
[0] => This should be extracted
[1] => This should also be extracted
)