3v4l.org

run code in 200+ php & hhvm versions
Bugs & Features
<?php $scraper = new DOMScraper(); //example couldent think of a site with an example table $scraper->setSite('http://www.farsnews.com/economy/agriculture')->setSource(); //get only tables with id="some_table_id" or any attribute match eg class="somthing" echo '<div>'.$scraper->getInnerHTML('div','class=ctgnewsmainpane').'</div>'; /** * Generic DOM scapper using DOMDocument and cURL */ Class DOMScraper extends DOMDocument{ public $site; private $source; private $dom; function __construct(){ libxml_use_internal_errors(true); $this->preserveWhiteSpace = false; $this->strictErrorChecking = false; } function setSite($site){ $this->site = $site; return $this; } function setSource(){ if(empty($this->site))return 'Error: Missing $this->site, use setSite() first'; $this->source = $this->get_data($this->site); return $this; } function getInnerHTML($tag, $class=null, $nodeValue = false){ if(empty($this->site))return 'Error: Missing $this->source, use setSource() first'; $this->loadHTML($this->source); $tmp = $this->getElementsByTagName($tag); $ret = null; foreach ($tmp as $v){ if($class !== null){ $attr = explode('=',$class); if($v->getAttribute($attr[0])==$attr[1]){ if($nodeValue == true){ $ret .= trim($v->nodeValue); }else{ $ret .= $this->innerHTML($v); } } }else{ if($nodeValue == true){ $ret .= trim($v->nodeValue); }else{ $ret .= $this->innerHTML($v); } } } return $ret; } function innerHTML($dom){ $ret = ""; $nodes = $dom->childNodes; foreach($nodes as $v){ $tmp = new DOMDocument(); $tmp->appendChild($tmp->importNode($v, true)); $ret .= trim($tmp->saveHTML()); } return $ret; } function get_data($url){ if(function_exists('curl_init')){ $ch = curl_init(); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_TIMEOUT, 5); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $data = curl_exec($ch); curl_close($ch); return $data; }else{ return file_get_contents($url); } } } ?>
Output for 5.3.18 - 5.4.45, 5.5.33, 5.6.18 - 5.6.19, 7.0.4, 7.0.20, 7.1.5 - 7.2.0
Warning: file_get_contents(): php_network_getaddresses: getaddrinfo failed: System error in /in/ekWpF on line 84 Warning: file_get_contents(http://www.farsnews.com/economy/agriculture): failed to open stream: php_network_getaddresses: getaddrinfo failed: System error in /in/ekWpF on line 84 Warning: DOMDocument::loadHTML(): Empty string supplied as input in /in/ekWpF on line 38 Warning: DOMDocument::getElementsByTagName(): Couldn't fetch DOMScraper in /in/ekWpF on line 39 Warning: Invalid argument supplied for foreach() in /in/ekWpF on line 41 <div></div>
Output for 5.5.24 - 5.5.32, 5.5.34 - 5.6.17, 5.6.20 - 7.0.3, 7.0.5 - 7.0.6, 7.1.0
Warning: file_get_contents(): php_network_getaddresses: getaddrinfo failed: Name or service not known in /in/ekWpF on line 84 Warning: file_get_contents(http://www.farsnews.com/economy/agriculture): failed to open stream: php_network_getaddresses: getaddrinfo failed: Name or service not known in /in/ekWpF on line 84 Warning: DOMDocument::loadHTML(): Empty string supplied as input in /in/ekWpF on line 38 Warning: DOMDocument::getElementsByTagName(): Couldn't fetch DOMScraper in /in/ekWpF on line 39 Warning: Invalid argument supplied for foreach() in /in/ekWpF on line 41 <div></div>