<?php
/* Author: Yash Gupta | File: search.php
Complete code source and tutorial: http://thetechnofreak.com/technofreak/get-scrape-google-search-results-php/
Description: This script searches google for a given search term and outputs all the results' urls for up to 1000 results.
Usage:
Make a form in some other page and pass the following parameters with GET method, or modify the url as search.php?pages=50&q=get+google+results+php
q => Your search query. Default query is: 'no query'
pages => The number of pages you want to parse. (default is 10, maximum of 100)
start => The page to start from. Default is 1
10/04/2012 mods/fixes/updates by Jay at http://he.tel
*/
echo
'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Scrape Google Results with PHP</title>
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
</head>
<body>';
$input=rawurldecode($_GET["input-text"]);
$keyword=trim(preg_replace("/\s\s+/", " ", $input));
if (isset($_GET["data"])){
echo "Your Search Term ==> <b>";
print_r(htmlspecialchars($keyword));
echo "</b><br><br><br>";
if(substr($_SERVER['DOCUMENT_ROOT'],0,2)==="C:"){
echo '<a href="http://localhost'.$_SERVER['PHP_SELF'].'">Reset Search</a>';
}else{ echo '<a href="'.$_SERVER['PHP_SELF'].'">Reset Search</a>'; }}
echo
'<br>
<br>
<center>'.PHP_EOL;
echo
'<form action="search.php?&pages=10&q='.$keyword.'&method="get">'.PHP_EOL.
'<input type="text" name="input-text" size="60%"/>'.PHP_EOL.
'<input type="submit" name="data" value="Web Search"/>'.PHP_EOL.'</form>'.PHP_EOL;
echo
'</center>
<pre>
<br>';
if (isset($_GET["data"])){
ini_set("max_execution_time", 0); set_time_limit(0); // no time-outs!
if(isset($_GET['q']))
$query=$_GET['q'];
else $query=$keyword;
if(isset($_GET['pages']))
$npages=$_GET['pages'];
else $npages=10;
if(isset($_GET['start']))
$start=$_GET['start'];
else $start=0;
if($npages>=100)
$npages=100;
$gg_url='http://www.google.com/search?hl=en&safe=off&filter=0&q='.urlencode($query).'&start=';
$i=1; $size=0;
$options = array(
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
CURLOPT_COOKIEFILE => "cookie.txt",
CURLOPT_COOKIEJAR => "cookie.txt",
CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko/20100101 Firefox/11.0",
CURLOPT_REFERER => "http://www.google.com/");
for ($page = $start; $page < $npages; $page++){
$ch = curl_init($gg_url.$page.'0');
curl_setopt_array($ch,$options);
$scraped="";
$scraped.=curl_exec($ch);
curl_close($ch);
$results = array();
preg_match_all('/a href="([^"]+)" class=l.+?>.+?<\/a>/',$scraped,$results);
foreach ($results[1] as $url){
echo " <a href='$url'>$url</a>".PHP_EOL;
$i++; }
$size+=strlen($scraped); }
@fclose($fp);
$Num=$i-1;
echo "<br><br>Number of results: ".$Num.PHP_EOL;
echo "Total KB read: ".($size/1024.0).PHP_EOL;
}
?>
preferences:
49.75 ms | 402 KiB | 5 Q