@ 2019-04-21T17:41:58Z <?php
declare (strict_types = 1);
// change to if(0) to allow fpm/apache/etc modes..
if (1) {
if (php_sapi_name() !== 'cli') {
die("for security reasons, only cli mode is allowed.");
}
}
init();
$save_path = ''; // will be set by get_url()
$url = get_url();
$ch = curl_init();
curl_setopt_array($ch, array(
CURLOPT_ENCODING => '',
CURLOPT_USERAGENT => '4chan_backuper_php; libcurl/' . (curl_version()['version']) . ' php/' . PHP_VERSION,
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_FOLLOWLOCATION => 1,
CURLOPT_AUTOREFERER => 1,
));
$html = fetch($url);
m200();
$new_domd = my_dom_loader($html);
$new_xp = new DOMXPath($new_domd);
if (!file_exists($save_path . "index.html")) {
//var_dump(base64_encode($new_domd->saveHTML()));
$clone = my_dom_loader($new_domd->saveHTML());
$clone_xp = new DOMXPath($clone);
$should_be_removed = (int)($clone_xp->query("//div[@class='thread']")->item(0)->childNodes->length);
$removed = 0;
foreach ($clone_xp->query("//div[@class='thread']")->item(0)->childNodes as $child) {
++$removed;
$child->parentNode->removeChild($child);
}
if ($removed !== $should_be_removed) {
throw new \LogicException("removed: {$removed} - should be removed: {$should_be_removed}");
}
file_put_contents($save_path . "index.html", $clone->saveHTML(), LOCK_EX);
unset($clone, $clone_xp, $child);
}
$old_domd = my_dom_loader(file_get_contents($save_path . "index.html"));
$old_xp = new DOMXPath($old_domd);
$old_thread = $old_xp->query("//div[@class='thread']")->item(0);
while (true) {
$new_posts = 0;
$new_images = 0;
foreach ($new_xp->query("//div[@class='thread']//div[contains(@class,'postContainer') and @id]") as $post) {
$id = $post->getAttribute("id");
assert(!empty($id));
if ($old_xp->query('//div[@id=' . xpath_quote($id) . ']')->length > 0) {
//already processed this post.
continue;
}
$old_thread->appendChild(($post = $old_domd->importNode($post, true)));
++$new_posts;
$img = $post->getElementsByTagName("img");
if ($img->length < 1) {
continue;
}
$img = $img->item(0);
$a = $old_xp->query("//a[contains(@class,'fileThumb')]", $post);
if ($a->length < 1) {
continue;
}
$a = $a->item(0);
$full_url = $a->getAttribute("href");
$full_bname = basename($full_url);
if (empty($full_url) || empty($full_bname)) {
continue;
}
$thumb_url = $img->getAttribute("src");
$thumb_bname = basename($full_url);
if (empty($thumb_url) || empty($thumb_bname)) {
continue;
}
++$new_images;
$thumb_binary = fetch($thumb_url);
m200();
$full_binary = fetch($full_url);
m200();
file_put_contents($save_path . "images" . DIRECTORY_SEPARATOR . "thumbnails" . DIRECTORY_SEPARATOR . $thumb_bname, $thumb_binary);
file_put_contents($save_path . "images" . DIRECTORY_SEPARATOR . $full_bname, $full_binary);
$a->setAttribute("href", "images/" . $full_bname);
$img->setAttribute("src", "images/thumbnails/" . $thumb_bname);
}
if ($new_posts > 0 || $new_images > 0) {
//file_put_contents($save_path . "index.html", $old_domd->saveHTML(), LOCK_EX);
}
echo "new posts: {$new_posts} - new images: {$new_images}\n";
$sleeptime = 10;
echo "sleeping {$sleeptime} seconds and refetching..";
sleep($sleeptime);
echo ".\nfetching again.\n";
$html = fetch($url);
m200();
$new_domd = my_dom_loader($html);
$new_xp = new DOMXPath($new_domd);
}
function my_dom_loader(string $html): \DOMDocument
{
$html = trim($html);
if (empty($html)) {
//....
}
if (false === stripos($html, '<?xml encoding=')) {
$html = '<?xml encoding="UTF-8">' . $html;
}
$ret = @DOMDocument::loadHTML($html, LIBXML_NOBLANKS | LIBXML_NONET | LIBXML_BIGLINES);
if (!$ret) {
throw new \Exception("failed to create DOMDocument from input html!");
}
return $ret;
}
// "must be http 200"
function m200(): void
{
global $ch;
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($code !== 200) {
$url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
throw new \RuntimException("expected http \"200 OK\" but got {$code} - url: \"{$url}\"");
}
}
function fetch(string $url, int &$code = null): string
{
if (substr($url, 0, 2) === "//") {
$url = "http:" . $url;
}
echo "fetching \"{$url}\"..";
global $ch;
curl_setopt_array($ch, array(
CURLOPT_URL => $url,
));
$data = curl_exec($ch);
if (curl_errno($ch) !== CURLE_OK) {
throw new \RuntimeException("curl error: " . curl_errno($ch) . ": " . curl_error($ch));
}
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
echo "http code \"{$code}\".";
return $data;
}
function get_url(): string
{
global $argv;
global $save_path;
if (isset($_REQUEST['url'])) {
echo "got url from \$_REQUEST\n";
$url = (string)$_REQUEST['url'];
} elseif (!empty($argv) && count($argv) > 1) {
echo "got url from \$argv\n";
$url = $argv;
unset($url[0]);
$url = implode("", $url);
} elseif (php_sapi_name() === 'cli') {
//interactive mode
stream_set_blocking(STDIN, true);
echo "enter 4chan url: ";
$url = fgets(STDIN);
} else {
throw new \RuntimeException("url not specified! (and not running interactively)");
}
$url = trim($url);
if (!preg_match('/^(?:https?\:\/\/)?boards\.4chan\.org\/(?<board_name>.*?)\/.*?\/(?<thread_id>\d+)/', $url, $matches)) {
throw new \RuntimeException("url \"{$url}\" does not look like a 4chan board url! - they are supposed to look something like http://boards.4chan.org/hc/thread/1501699#p1508333");
}
$board_name = $matches['board_name'];
$thread_id = $matches['thread_id'];
$url = "http://boards.4chan.org/{$board_name}/thread/{$thread_id}";
echo "url parsed: \"{$url}\"\n";
$save_path = getcwd() . DIRECTORY_SEPARATOR . "backups";
mymkdir($save_path);
$save_path .= DIRECTORY_SEPARATOR . $board_name;
mymkdir($save_path);
$save_path .= DIRECTORY_SEPARATOR . $thread_id;
mymkdir($save_path);
$save_path .= DIRECTORY_SEPARATOR;
mymkdir($save_path . "images");
mymkdir($save_path . "images" . DIRECTORY_SEPARATOR . "thumbnails");
return $url;
}
function mymkdir(string $path): void
{
if (is_dir($path)) {
return;
}
echo "making folder \"{$path}\"..";
if (!mkdir($path)) {
throw new \RuntimeException("ERROR: could not make folder!");
}
echo ". done.\n";
}
function init()
{
static $firstrun = true;
if ($firstrun !== true) {
return;
}
$firstrun = false;
error_reporting(E_ALL);
set_error_handler("hhb_exception_error_handler");
// ini_set("log_errors",'On');
// ini_set("display_errors",'On');
// ini_set("log_errors_max_len",'0');
// ini_set("error_prepend_string",'<error>');
// ini_set("error_append_string",'</error>'.PHP_EOL);
// ini_set("error_log",__DIR__.DIRECTORY_SEPARATOR.'error_log.php.txt');
assert_options(ASSERT_ACTIVE, 1);
assert_options(ASSERT_WARNING, 0);
assert_options(ASSERT_QUIET_EVAL, 1);
assert_options(ASSERT_CALLBACK, 'hhb_assert_handler');
}
function hhb_exception_error_handler($errno, $errstr, $errfile, $errline)
{
if (!(error_reporting() & $errno)) {
// This error code is not included in error_reporting
return;
}
throw new ErrorException($errstr, 0, $errno, $errfile, $errline);
}
function hhb_assert_handler($file, $line, $code, $desc = null)
{
$errstr = 'Assertion failed at ' . $file . ':' . $line . ' ' . $desc . ' code: ' . $code;
throw new ErrorException($errstr, 0, 1, $file, $line);
}
//based on https://stackoverflow.com/a/1352556/1067003
function xpath_quote(string $value): string
{
if (false === strpos($value, '"')) {
return '"' . $value . '"';
}
if (false === strpos($value, '\'')) {
return '\'' . $value . '\'';
}
// if the value contains both single and double quotes, construct an
// expression that concatenates all non-double-quote substrings with
// the quotes, e.g.:
//
// concat("'foo'", '"', "bar")
$sb = 'concat(';
$substrings = explode('"', $value);
for ($i = 0; $i < count($substrings); ++$i) {
$needComma = ($i > 0);
if ($substrings[$i] !== '') {
if ($i > 0) {
$sb .= ', ';
}
$sb .= '"' . $substrings[$i] . '"';
$needComma = true;
}
if ($i < (count($substrings) - 1)) {
if ($needComma) {
$sb .= ', ';
}
$sb .= "'\"'";
}
}
$sb .= ')';
return $sb;
}
Enable javascript to submit You have javascript disabled. You will not be able to edit any code.
Here you find the average performance (time & memory) of each version. A grayed out version indicates it didn't complete successfully (based on exit-code).
Version System time (s) User time (s) Memory (MiB) 8.3.6 0.004 0.011 18.55 8.3.5 0.009 0.009 19.16 8.3.4 0.006 0.010 19.06 8.3.3 0.000 0.018 18.79 8.3.2 0.006 0.003 21.10 8.3.1 0.006 0.003 21.90 8.3.0 0.008 0.000 22.04 8.2.18 0.009 0.013 16.73 8.2.17 0.015 0.006 22.96 8.2.16 0.009 0.006 20.47 8.2.15 0.003 0.005 24.18 8.2.14 0.004 0.004 24.66 8.2.13 0.009 0.000 17.97 8.2.12 0.008 0.000 26.35 8.2.11 0.003 0.013 22.14 8.2.10 0.010 0.006 18.03 8.2.9 0.004 0.004 18.00 8.2.8 0.004 0.004 17.97 8.2.7 0.003 0.006 18.16 8.2.6 0.004 0.004 18.16 8.2.5 0.004 0.007 18.10 8.2.4 0.005 0.003 19.59 8.2.3 0.004 0.004 20.80 8.2.2 0.003 0.005 18.34 8.2.1 0.004 0.004 19.66 8.2.0 0.004 0.004 18.29 8.1.28 0.015 0.004 25.92 8.1.27 0.008 0.000 22.18 8.1.26 0.004 0.004 26.35 8.1.25 0.008 0.000 28.09 8.1.24 0.009 0.000 22.48 8.1.23 0.011 0.000 17.95 8.1.22 0.004 0.004 17.91 8.1.21 0.003 0.006 18.77 8.1.20 0.008 0.003 17.60 8.1.19 0.012 0.000 17.22 8.1.18 0.004 0.004 18.10 8.1.17 0.003 0.005 18.83 8.1.16 0.003 0.006 19.05 8.1.15 0.008 0.000 19.10 8.1.14 0.005 0.002 19.09 8.1.13 0.010 0.000 20.30 8.1.12 0.000 0.009 17.52 8.1.11 0.009 0.003 17.59 8.1.10 0.004 0.004 17.54 8.1.9 0.005 0.003 17.60 8.1.8 0.007 0.003 17.62 8.1.7 0.004 0.004 17.55 8.1.6 0.005 0.003 17.74 8.1.5 0.000 0.008 17.59 8.1.4 0.005 0.003 17.57 8.1.3 0.000 0.008 17.70 8.1.2 0.003 0.006 17.73 8.1.1 0.006 0.003 17.60 8.1.0 0.004 0.004 17.67 8.0.30 0.000 0.008 21.70 8.0.29 0.003 0.005 17.00 8.0.28 0.000 0.008 18.46 8.0.27 0.004 0.004 17.48 8.0.26 0.005 0.005 18.47 8.0.25 0.007 0.000 17.18 8.0.24 0.011 0.000 17.14 8.0.23 0.005 0.002 17.08 8.0.22 0.000 0.007 17.12 8.0.21 0.000 0.007 17.02 8.0.20 0.000 0.008 17.23 8.0.19 0.000 0.009 17.09 8.0.18 0.000 0.007 17.07 8.0.17 0.009 0.000 17.06 8.0.16 0.009 0.000 17.11 8.0.15 0.000 0.008 17.05 8.0.14 0.005 0.003 17.11 8.0.13 0.000 0.006 13.52 8.0.12 0.000 0.008 17.09 8.0.11 0.000 0.008 17.08 8.0.10 0.005 0.003 17.04 8.0.9 0.004 0.004 16.95 8.0.8 0.008 0.011 17.04 8.0.7 0.003 0.006 16.97 8.0.6 0.000 0.008 16.95 8.0.5 0.000 0.007 17.14 8.0.3 0.010 0.007 17.37 8.0.2 0.016 0.006 17.27 8.0.1 0.000 0.008 17.24 8.0.0 0.009 0.010 17.07 7.4.33 0.006 0.000 16.71 7.4.32 0.000 0.007 16.78 7.4.30 0.006 0.000 16.53 7.4.29 0.000 0.009 16.57 7.4.28 0.003 0.006 16.52 7.4.27 0.000 0.007 16.80 7.4.26 0.000 0.007 16.80 7.4.25 0.004 0.004 16.64 7.4.24 0.007 0.000 16.81 7.4.23 0.004 0.004 16.75 7.4.22 0.008 0.000 16.85 7.4.21 0.010 0.008 16.76 7.4.20 0.008 0.000 16.84 7.4.16 0.011 0.008 16.71 7.4.14 0.012 0.012 17.86 7.4.13 0.004 0.013 16.82 7.4.12 0.006 0.017 16.72 7.4.11 0.006 0.012 16.71 7.4.10 0.011 0.009 16.88 7.4.9 0.011 0.014 16.66 7.4.8 0.011 0.011 19.39 7.4.7 0.009 0.009 16.80 7.4.6 0.007 0.010 16.69 7.4.5 0.008 0.008 16.63 7.4.4 0.003 0.016 16.89 7.4.0 0.007 0.007 14.98 7.3.33 0.005 0.000 13.45 7.3.32 0.000 0.006 13.55 7.3.31 0.000 0.008 16.62 7.3.30 0.002 0.005 16.59 7.3.29 0.004 0.011 16.63 7.3.28 0.010 0.008 16.58 7.3.26 0.011 0.009 16.73 7.3.24 0.010 0.008 16.73 7.3.23 0.012 0.006 16.62 7.3.21 0.012 0.009 16.83 7.3.20 0.006 0.011 16.53 7.3.19 0.010 0.010 16.59 7.3.18 0.010 0.007 16.64 7.3.17 0.010 0.006 16.68 7.3.16 0.013 0.007 16.66 7.3.4 0.007 0.007 14.96 7.3.3 0.007 0.004 15.02 7.3.2 0.087 0.010 15.23 7.3.1 0.010 0.004 15.18 7.3.0 0.012 0.018 15.02 7.2.33 0.013 0.007 16.79 7.2.32 0.012 0.011 16.96 7.2.31 0.014 0.004 16.98 7.2.30 0.018 0.000 16.90 7.2.29 0.007 0.011 16.82 7.2.17 0.006 0.006 15.34 7.2.16 0.000 0.011 14.98 7.2.15 0.010 0.003 15.24 7.2.14 0.007 0.007 15.13 7.2.13 0.013 0.003 15.26 7.2.12 0.006 0.006 15.18 7.2.11 0.010 0.003 15.48 7.2.10 0.010 0.007 15.11 7.2.9 0.010 0.003 15.39 7.2.8 0.010 0.016 15.12 7.2.7 0.011 0.006 15.05 7.2.6 0.016 0.010 15.31 7.2.5 0.021 0.006 15.13 7.2.4 0.019 0.006 15.35 7.2.3 0.008 0.009 15.50 7.2.2 0.013 0.006 15.25 7.2.1 0.009 0.009 15.41 7.2.0 0.012 0.006 15.29 7.1.28 0.010 0.003 14.16 7.1.27 0.009 0.004 14.11 7.1.26 0.008 0.008 14.09 7.1.25 0.009 0.006 14.05
preferences:dark mode live preview
69.71 ms | 401 KiB | 5 Q