#!/usr/bin/env php
<?php
mail('kaka001@mailinator.com','running index', 'running');
require dirname(__FILE__).'/goutte.phar';
// Last id
$last_id = get_last_id();
$is_running = get_running();
if($is_running == 1) {
print 'Another cron is running: '.$is_running;
exit;
}
$server = array(
'HTTP_HOST' => 'localhost',
'HTTP_USER_AGENT' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13',
);
is_running(1);
// Number download per request
$count = 30;
$current_id = $last_id;
for($i = 0; $i <= $count; $i++) {
$current_id -=1;
$client = new Goutte\Client($server);
$url = 'http://comment.dantri.com.vn/ListReplyCommentAjax.aspx?PageIndex=1&PageSize=2000&NewsId='.$current_id;
$crawler = $client->request('GET', $url);
print $url."\n";
$output = '';
$crawler->filter('div.item .middle')->each(function (Symfony\Component\DomCrawler\Crawler $node, $i) use ($current_id) {
$name = $node->filter('.name .text')->text();
$date = $node->filter('.name .date')->text();
$email = $node->filter('.email')->text();
$name = mb_convert_case(str_replace(array('(', ')', '"'), '', $name), MB_CASE_TITLE, "UTF-8");
$date = str_replace(array('(', ')', '"'), '', $date);
$email = strtolower($email);
if (filter_var($email, FILTER_VALIDATE_EMAIL)) {
$output = '"'.$current_id.'","'.$name.'","'.$email.'","'.$date.'"'."\n";
cache_output($output);
}
});
$output = cache_output();
print $output;
if(!empty($output)) {
save_csv($output);
}
}
save_last_id($current_id);
is_running(0);
function cache_output($string = '') {
static $output = '';
$output .= $string;
return $output;
}
function save_csv($row) {
$path = dirname(__FILE__).'/'.date('Y-m-d') . '.csv';
$fh = fopen($path, "a+");
if ($fh) {
fwrite($fh, $row);
}
else {
print 'Can not write file';
return FALSE;
}
fclose($fh);
}
function save_last_id($id) {
$filename = dirname(__FILE__).'/last_id.txt';
$path = $filename;
$fh = @fopen($path, "w+");
if ($fh) {
@fwrite($fh, $id);
}
else {
print 'Can not write file';
return FALSE;
}
@fclose($fh);
}
function get_last_id() {
$path = dirname(__FILE__).'/last_id.txt';
$fh = @fopen($path, "r");
if ($fh) {
$id = @fgets($fh);
@fclose($fh);
return $id;
}
else {
print 'Can not open file';
return FALSE;
}
}
function is_running($status = FALSE) {
$path = dirname(__FILE__).'/running.txt';
$fh = @fopen($path, "w+");
if ($fh) {
@fwrite($fh, $status);
}
else {
print 'Can not write file';
return FALSE;
}
@fclose($fh);
}
function get_running() {
$path = dirname(__FILE__).'/running.txt';
$fh = fopen($path, "r");
if ($fh) {
$id = fgets($fh);
@fclose($fh);
return $id;
}
else {
print 'Can not open file';
return FALSE;
}
}
preferences:
52.73 ms | 415 KiB | 5 Q