<?php
$t=time();
//csv file write
#$filename=date("Y-m-d",$t)."_work.csv";
$filename=$t."_data.csv";
$handle = fopen($filename, "w");
//Header for csv file
$business = array("Business Name" , "Address1" ,"City","State","Zip code","Address2","Phone" , "Website" ,"Yelp Url");
fputcsv($handle,$business);
$start=0;
$csv_records = array();
for ($i = 0; $i <= 100; $i++) {
$start=$i*10;//start count for pagination
$args = $_SERVER['argv'];
//$html = file_get_contents($args[1].'&start='.$start);
$html = file_get_contents('http://www.yelp.com/search?find_desc=tasty+burger&find_loc=San+Francisco%2C+CA&start=10', 'w');
if(!empty($html)){
$pokemon_doc = new DOMDocument();
libxml_use_internal_errors(TRUE); //disable libxml errors
$pokemon_doc->loadHTML($html);
libxml_clear_errors(); //remove errors for yucky html
$pokemon_xpath = new DOMXPath($pokemon_doc);
//get all the span's with an class for getting Url
$pokemon_row = $pokemon_xpath->query('/html/body//span[@class="indexed-biz-name"]/a');
$biz = array();
if($pokemon_row->length > 0){
foreach($pokemon_row as $row){
$biz['name'] = $row->nodeValue;
$url1 = $row->getAttribute("href");
$biz['yelp_url'] = "http://www.yelp.com".$url1;
$html1 = file_get_contents("http://www.yelp.com/".$url1);
$pokemon_doc1 = new DOMDocument();
libxml_use_internal_errors(TRUE);
$pokemon_doc1->loadHTML($html1);
libxml_clear_errors(); //remove errors for yucky html
$pokemon_xpath1 = new DOMXPath($pokemon_doc1);
//get all the h2's with an id
$pokemon_row1 = $pokemon_xpath1->query('/html/body//div[@class="mapbox-text"]/ul/li');
$biz['address'] = $pokemon_row1->item(0)->nodeValue;
$biz['phone'] = $pokemon_row1->item(2)->nodeValue;
if(preg_match('/Business website/',$pokemon_row1->item(3)->nodeValue)){
$biz['website']= $pokemon_row1->item(3)->nodeValue;
}
else{
$biz['website']= $pokemon_row1->item(4)->nodeValue;
}
//$biz['website'] ="http://".ltrim($bizwebsite,1);
/* ############# Code to remove extra spaces and new line chars*/
$line_out_name = preg_replace('/[\n\r]+/', '', $biz['name']);
$business_details_name = preg_replace('/[\s]+/', ' ', $line_out_name);
$line_out_address = preg_replace('/[\r]+/', '', $biz['address']);
$business_details_address = preg_replace('/[\s]+/', ' ', $line_out_address);
$business_address=explode(",",$business_details_address);
$add1=explode(" ",$business_address[0]);
$add=$add1[0]." ".$add1[1]." ".$add1[2];
$add1=$add1[3]." ".$add1[4];
$add2=explode("b/t",$business_address[1]);
$s=explode(" ",$add2[0]);
$state=$s[1];
$zip=$s[2];
$lastadd="b/t".$add2[1];
$line_out_phone = preg_replace('/[\n\r]+/', '', $biz['phone']);
$business_details_phone = preg_replace('/[\s]+/', ' ', $line_out_phone);
$business_details_phone = preg_replace('/Phone number/','', $business_details_phone);
$line_out_website = preg_replace('/[\n\r]+/', '', $biz['website']);
$business_details_website = preg_replace('/Business website/','', $line_out_website);
$business_details_website = preg_replace('/[\s]+/', '', $business_details_website);
$business_details_website = "http://".$business_details_website;
$business[0]= stripslashes(htmlspecialchars($business_details_name));
$business[1]= stripslashes($add);
$business[2]= stripslashes($add1);
$business[3]= stripslashes($state);
$business[4]= stripslashes($zip);
$business[5]= stripslashes($lastadd);
$business[6]= stripslashes($business_details_phone);
$business[7]= stripslashes($business_details_website);
$business[8]= stripslashes($biz['yelp_url']);
array_push($csv_records,($business[0]),$business[1],$business[2],$business[3],$business[4],$business[5],$business[6],$business[7],$business[8]);
if (false !== $handle) {
fputcsv($handle,$csv_records);
}
$csv_records = array();
} #foreach ends $pokemon_row
} #if ends $pokemon_row->length > 0
}#if(!empty($html))
$business1 = "";
}#for ($i = 1; $i < 5; $i++) end
?>
preferences:
27.99 ms | 402 KiB | 5 Q