- trim: documentation ( source)
- array_keys: documentation ( source)
- json_encode: documentation ( source)
- libxml_use_internal_errors: documentation ( source)
<?php
/**
* @link http://stackoverflow.com/a/23543493/367456
*/
$html = '<body>...
<th class="name" align="left" scope="col">
<a class="foo" href="foo.html">foo</a>
</th>
...
<th class="name" align="left" scope="col">
<a class="bar" href="bar.html">bar</a>
</th>
...
<th class="name" align="left" scope="col">
<a class="ba" href="baz.html">baz</a>
</th></body>';
$names = function($html) {
$doc = new DOMDocument();
$last = libxml_use_internal_errors(TRUE);
$doc->loadHTML($html);
libxml_use_internal_errors($last);
$xp = new DOMXPath($doc);
$result = [];
foreach ($xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' name ')]") as $node)
$result[trim($node->textContent)] = 1;
return array_keys($result);
};
echo json_encode(["names" => $names($html)]);