<?php
$charmap = [
'NUL' => "\x00", // NULL (U+0000)
'SOH' => "\x01", // START OF HEADING (U+0001)
'STX' => "\x02", // START OF TEXT (U+0002)
'ETX' => "\x03", // END OF TEXT (U+0003)
'EOT' => "\x04", // END OF TRANSMISSION (U+0004)
'ENQ' => "\x05", // ENQUIRY (U+0005)
'HT' => "\x09", // HORIZONTAL TAB (U+0009)
'LF' => "\x0a", // LINE FEED (U+000A)
'VT' => "\x0b", // VERTICAL TAB (U+000B)
'CR' => "\x0d", // CARRIAGE RETURN (U+000D)
'ETB' => "\x17", // END OF TRANSMISSION BLOCK (U+0017)
'SP' => "\x20", // SPACE (U+0020)
'ZWS' => "\xe2\x80\x8b", // ZERO WIDTH SPACE (U+200B)
'MSBS' => "\xf0\x9d\x85\xb7", // MUSICAL SYMBOL BEGIN SLUR (U+1D177)
'MSBP' => "\xf0\x9d\x85\xb9", // MUSICAL SYMBOL BEGIN PHRASE (U+1D179)
];
foreach ($charmap as $k => $v) {
define($k, $v);
}
$strings = [
'user6003859' =>
LF .
LF .
LF .
'a' . SP . 'b'. SP . SP . 'c' . SP . SP . SP . LF .
SP . 'd' . SP . SP . SP . SP . 'e' . LF .
MSBS . 'f' . MSBS . 'g' . MSBS . MSBS . 'h' . MSBS . MSBS . ZWS . ZWS . MSBP . MSBP . 'i' . MSBP . SP . MSBP . SP . 'j' . LF .
LF .
LF .
LF .
LF .
'k' . SP . 'l' . SP . 'm' . SP . 'n' . SP . SP . SP . LF .
MSBP . 'o' . MSBP . MSBP . 'p' . LF .
LF .
LF .
LF,
'mickmackusa' =>
NUL . LF .
LF .
SOH . LF .
CR . LF .
VT .
ETB . 'a' . SP . 'ab' . CR . LF .
HT . HT . CR .
CR . LF .
'cà' . SOH . 'ê߀' . NUL . NUL . 'abcbc'. SP . SP . SP . 'd' . LF .
LF .
HT . CR . LF .
ENQ . SP . SP . SP . 'e' . STX . LF .
ETX . LF .
EOT . LF
];
function display($str, $charmap) {
$converter = array_map(function ($i) { return '{'.$i.'}'; }, array_flip($charmap));
$handle = fopen("data:text/plain,$str", 'r');
while ( false !== $line = fgets($handle) ) {
echo strtr($line, $converter), PHP_EOL;
}
fclose($handle);
}
class Replacements {
const FUNC = 0;
const REGEX = 1;
protected $patterns;
protected $replacements;
protected $func;
protected $typeRegex;
public function __construct($arg) {
if ( is_array($arg) ) {
$this->type = self::REGEX;
$this->patterns = [];
$this->replacements = [];
$this->addPatterns($arg);
} elseif ( is_callable($arg) ) {
$this->type = self::FUNC;
$this->addFunction($arg);
} else throw new Exception('invalid argument type');
}
protected function addPatterns($replacements) {
foreach($replacements as $pattern => $replacement) {
$this->patterns[] = $pattern;
$this->replacements[] = $replacement;
}
}
protected function addFunction($func) {
$this->func = $func;
}
public function execute($str) {
if ( $this->type === self::REGEX )
return preg_replace($this->patterns, $this->replacements, $str);
return call_user_func_array($this->func, [&$str]);
}
};
$original = new Replacements([
'~\R~u' => "\n",
'/(?:^((\pZ)+|((?!\n)\pC)+)(?1)*)|((?1)$)|(?:((?2)+|(?3)+)(?=(?2)|(?3)))/um' => '',
'/(\pZ+)|((?!\n)\pC)/u' => ' ',
'/(^\n+)|(\n+$)|(\n(?=\n{2}))/u' => ''
]);
$simple = new Replacements([
'~\A[\pZ\pC]+|[\pZ\pC]+\z~u' => '', # trim the string
'~\R~u' => "\n", # normalize newlines
'~\pZ+|[^\n\PC]+~u' => ' ', # replace Z and C with space
'~^ +| +$| \K +~m' => '', # trim lines, delete consecutive spaces
'~\n\n\K\n+~' => '' # removes more than 2 consecutives newlines
]);
$optimized = new Replacements([
'~\r\n?|\x0b|\f|\xe2\x80[\xa8\xa9]~S' => "\n",
'~
[^\pZ\pC]+ \K
\pZ* (?:[^\PC\n]+\pZ*)*
(?: (\n) \pZ*+ (?:[^\PC\n]+\pZ*)*+ (?: (\n) [\pZ\pC]* )?+ (?!\z) | [\pZ\pC]+ )?
|
[\pZ\pC]+
~Aux' => '$1$2 ',
'~ (?:$|(?<=^ ))~m' => ''
]);
$func = new Replacements(function (&$str) {
$parts = preg_split('~^[\pC\pZ]+|[\pC\pZ]+$|\R(?:[\pC\pZ]*?(\R)[\pC\pZ]*)?~u', $str, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
return implode("\n", array_map(function($i) { return trim(preg_replace('~[\pC\pZ]+~u', ' ', $i));}, $parts));
});
// tests
$tests = ['original' => $original, 'simple' => $simple, 'function' => $func, 'optimized' => $optimized];
$str = $strings['user6003859'] . $strings['mickmackusa'];
$str = str_repeat($str, 10);
$res = [];
foreach ($tests as $k=>&$test) {
$res[$k] = $test->execute($str);
}
echo 'same result: ', var_dump(count(array_unique($res)) === 1);
$names = array_keys($tests);
$times = array_fill_keys($names, 0);
define('REPETITIONS', 100);
for ($i=0; $i < REPETITIONS; $i++) {
shuffle($names);
foreach ($names as $name) {
$start = microtime(true);
$tests[$name]->execute($str);
$stop = microtime(true);
$times[$name] += $stop - $start;
}
}
foreach($times as $k=>$v) {
printf("%-12s: %.2es\n", $k, $v/REPETITIONS);
}
// display($res['optimized'], $charmap);
- Output for 7.3.3
- same result: bool(true)
original : 1.41e-4s
simple : 8.26e-5s
function : 9.19e-5s
optimized : 5.14e-5s
- Output for 7.3.2
- same result: bool(true)
original : 8.08e-5s
simple : 4.54e-5s
function : 5.06e-5s
optimized : 2.76e-5s
- Output for 7.3.1
- same result: bool(true)
original : 8.30e-5s
simple : 4.46e-5s
function : 4.92e-5s
optimized : 2.82e-5s
- Output for 7.3.0
- same result: bool(true)
original : 9.03e-5s
simple : 5.11e-5s
function : 5.48e-5s
optimized : 3.13e-5s
- Output for 7.2.16
- same result: bool(true)
original : 1.60e-4s
simple : 8.24e-5s
function : 1.04e-4s
optimized : 5.50e-5s
- Output for 7.2.15
- same result: bool(true)
original : 8.42e-5s
simple : 4.68e-5s
function : 5.81e-5s
optimized : 3.12e-5s
- Output for 7.2.14
- same result: bool(true)
original : 1.36e-4s
simple : 7.50e-5s
function : 9.41e-5s
optimized : 5.04e-5s
- Output for 7.2.13
- same result: bool(true)
original : 1.52e-4s
simple : 8.57e-5s
function : 1.02e-4s
optimized : 5.52e-5s
- Output for 7.2.12
- same result: bool(true)
original : 7.12e-5s
simple : 4.08e-5s
function : 5.05e-5s
optimized : 2.66e-5s
- Output for 7.2.11
- same result: bool(true)
original : 1.41e-4s
simple : 7.92e-5s
function : 9.40e-5s
optimized : 5.01e-5s
- Output for 7.2.10
- same result: bool(true)
original : 1.41e-4s
simple : 8.01e-5s
function : 9.55e-5s
optimized : 5.24e-5s
- Output for 7.2.9
- same result: bool(true)
original : 8.68e-5s
simple : 4.92e-5s
function : 6.22e-5s
optimized : 3.29e-5s
- Output for 7.2.8
- same result: bool(true)
original : 7.68e-5s
simple : 4.37e-5s
function : 5.35e-5s
optimized : 2.82e-5s
- Output for 7.2.7
- same result: bool(true)
original : 1.25e-4s
simple : 7.14e-5s
function : 9.09e-5s
optimized : 4.66e-5s
- Output for 7.2.6
- same result: bool(true)
original : 7.82e-5s
simple : 4.54e-5s
function : 5.30e-5s
optimized : 2.88e-5s
- Output for 7.2.5
- same result: bool(true)
original : 8.50e-5s
simple : 4.35e-5s
function : 6.94e-5s
optimized : 4.00e-5s
- Output for 7.2.4
- same result: bool(true)
original : 8.07e-5s
simple : 4.51e-5s
function : 5.42e-5s
optimized : 3.00e-5s
- Output for 7.2.3
- same result: bool(true)
original : 1.39e-4s
simple : 7.86e-5s
function : 9.74e-5s
optimized : 5.20e-5s
- Output for 7.2.2
- same result: bool(true)
original : 1.32e-4s
simple : 7.64e-5s
function : 9.24e-5s
optimized : 4.81e-5s
- Output for 7.2.1
- same result: bool(true)
original : 1.35e-4s
simple : 7.45e-5s
function : 9.41e-5s
optimized : 5.04e-5s
- Output for 7.2.0
- same result: bool(true)
original : 7.35e-5s
simple : 4.12e-5s
function : 5.16e-5s
optimized : 2.72e-5s
- Output for 7.1.27
- same result: bool(true)
original : 8.75e-5s
simple : 7.07e-5s
function : 6.35e-5s
optimized : 3.30e-5s
- Output for 7.1.26
- same result: bool(true)
original : 8.69e-5s
simple : 6.85e-5s
function : 6.38e-5s
optimized : 3.41e-5s
- Output for 7.1.25
- same result: bool(true)
original : 1.40e-4s
simple : 1.10e-4s
function : 1.00e-4s
optimized : 5.38e-5s
- Output for 7.1.24
- same result: bool(true)
original : 1.32e-4s
simple : 1.01e-4s
function : 9.60e-5s
optimized : 5.08e-5s
- Output for 7.1.23
- same result: bool(true)
original : 9.88e-5s
simple : 7.87e-5s
function : 7.21e-5s
optimized : 3.86e-5s
- Output for 7.1.22
- same result: bool(true)
original : 9.61e-5s
simple : 7.76e-5s
function : 7.07e-5s
optimized : 3.70e-5s
- Output for 7.1.21
- same result: bool(true)
original : 9.43e-5s
simple : 7.58e-5s
function : 6.88e-5s
optimized : 3.64e-5s
- Output for 7.1.20
- same result: bool(true)
original : 1.84e-4s
simple : 1.39e-4s
function : 1.37e-4s
optimized : 7.16e-5s
- Output for 7.1.19
- same result: bool(true)
original : 1.26e-4s
simple : 1.02e-4s
function : 9.35e-5s
optimized : 4.92e-5s
- Output for 7.1.18
- same result: bool(true)
original : 1.16e-4s
simple : 8.96e-5s
function : 8.33e-5s
optimized : 4.48e-5s
- Output for 7.1.17
- same result: bool(true)
original : 1.20e-4s
simple : 9.45e-5s
function : 8.80e-5s
optimized : 4.64e-5s
- Output for 7.1.16
- same result: bool(true)
original : 1.26e-4s
simple : 9.82e-5s
function : 9.49e-5s
optimized : 4.92e-5s
- Output for 7.1.15
- same result: bool(true)
original : 8.99e-5s
simple : 7.16e-5s
function : 6.76e-5s
optimized : 3.47e-5s
- Output for 7.1.14
- same result: bool(true)
original : 1.23e-4s
simple : 9.82e-5s
function : 9.33e-5s
optimized : 4.91e-5s
- Output for 7.1.13
- same result: bool(true)
original : 1.35e-4s
simple : 1.05e-4s
function : 9.70e-5s
optimized : 5.38e-5s
- Output for 7.1.12
- same result: bool(true)
original : 1.06e-4s
simple : 8.36e-5s
function : 8.23e-5s
optimized : 4.06e-5s
- Output for 7.1.11
- same result: bool(true)
original : 1.15e-4s
simple : 8.92e-5s
function : 8.32e-5s
optimized : 4.20e-5s
- Output for 7.1.10
- same result: bool(true)
original : 1.31e-4s
simple : 1.02e-4s
function : 9.45e-5s
optimized : 5.04e-5s
- Output for 7.1.9
- same result: bool(true)
original : 1.20e-4s
simple : 9.47e-5s
function : 8.70e-5s
optimized : 4.49e-5s
- Output for 7.1.8
- same result: bool(true)
original : 1.09e-4s
simple : 8.30e-5s
function : 8.22e-5s
optimized : 4.18e-5s
- Output for 7.1.7
- same result: bool(true)
original : 1.23e-4s
simple : 9.85e-5s
function : 8.93e-5s
optimized : 4.68e-5s
- Output for 7.1.6
- same result: bool(true)
original : 9.26e-5s
simple : 7.75e-5s
function : 6.86e-5s
optimized : 3.54e-5s
- Output for 7.1.5
- same result: bool(true)
original : 1.01e-4s
simple : 8.17e-5s
function : 7.49e-5s
optimized : 3.98e-5s
- Output for 7.1.4
- same result: bool(true)
original : 1.43e-4s
simple : 1.15e-4s
function : 1.10e-4s
optimized : 5.46e-5s
- Output for 7.1.3
- same result: bool(true)
original : 1.05e-4s
simple : 7.90e-5s
function : 7.37e-5s
optimized : 3.99e-5s
- Output for 7.1.2
- same result: bool(true)
original : 1.29e-4s
simple : 1.03e-4s
function : 9.59e-5s
optimized : 4.81e-5s
- Output for 7.1.1
- same result: bool(true)
original : 1.67e-4s
simple : 1.26e-4s
function : 1.23e-4s
optimized : 6.61e-5s
- Output for 7.1.0
- same result: bool(true)
original : 9.75e-5s
simple : 7.71e-5s
function : 7.38e-5s
optimized : 5.61e-5s
- Output for 7.0.33
- same result: bool(true)
original : 1.19e-4s
simple : 9.39e-5s
function : 8.74e-5s
optimized : 4.55e-5s
- Output for 7.0.32
- same result: bool(true)
original : 1.29e-4s
simple : 1.01e-4s
function : 9.60e-5s
optimized : 4.82e-5s
- Output for 7.0.31
- same result: bool(true)
original : 1.60e-4s
simple : 1.15e-4s
function : 1.14e-4s
optimized : 6.22e-5s
- Output for 7.0.30
- same result: bool(true)
original : 1.37e-4s
simple : 1.08e-4s
function : 9.80e-5s
optimized : 5.15e-5s
- Output for 7.0.29
- same result: bool(true)
original : 9.21e-5s
simple : 7.20e-5s
function : 6.61e-5s
optimized : 3.51e-5s
- Output for 7.0.28
- same result: bool(true)
original : 1.18e-4s
simple : 9.21e-5s
function : 9.07e-5s
optimized : 4.57e-5s
- Output for 7.0.27
- same result: bool(true)
original : 9.94e-5s
simple : 7.84e-5s
function : 7.41e-5s
optimized : 4.58e-5s
- Output for 7.0.26
- same result: bool(true)
original : 1.32e-4s
simple : 1.01e-4s
function : 9.58e-5s
optimized : 4.76e-5s
- Output for 7.0.25
- same result: bool(true)
original : 1.85e-4s
simple : 1.34e-4s
function : 1.30e-4s
optimized : 7.17e-5s
- Output for 7.0.24
- same result: bool(true)
original : 1.18e-4s
simple : 9.47e-5s
function : 8.76e-5s
optimized : 4.66e-5s
- Output for 7.0.23
- same result: bool(true)
original : 9.39e-5s
simple : 7.44e-5s
function : 6.80e-5s
optimized : 3.57e-5s
- Output for 7.0.22
- same result: bool(true)
original : 1.26e-4s
simple : 9.56e-5s
function : 8.81e-5s
optimized : 4.68e-5s
- Output for 7.0.21
- same result: bool(true)
original : 9.38e-5s
simple : 7.61e-5s
function : 6.87e-5s
optimized : 3.72e-5s
- Output for 7.0.20
- same result: bool(true)
original : 1.05e-4s
simple : 8.54e-5s
function : 7.87e-5s
optimized : 4.03e-5s
- Output for 7.0.19
- same result: bool(true)
original : 1.05e-4s
simple : 8.24e-5s
function : 7.67e-5s
optimized : 3.92e-5s
- Output for 7.0.18
- same result: bool(true)
original : 1.23e-4s
simple : 9.85e-5s
function : 9.06e-5s
optimized : 4.77e-5s
- Output for 7.0.17
- same result: bool(true)
original : 9.12e-5s
simple : 7.36e-5s
function : 6.85e-5s
optimized : 3.51e-5s
- Output for 7.0.16
- same result: bool(true)
original : 1.42e-4s
simple : 1.04e-4s
function : 9.94e-5s
optimized : 5.53e-5s
- Output for 7.0.15
- same result: bool(true)
original : 1.05e-4s
simple : 8.21e-5s
function : 8.10e-5s
optimized : 3.96e-5s
- Output for 7.0.14
- same result: bool(true)
original : 1.26e-4s
simple : 9.93e-5s
function : 9.51e-5s
optimized : 4.83e-5s
- Output for 7.0.13
- same result: bool(true)
original : 1.21e-4s
simple : 9.24e-5s
function : 8.68e-5s
optimized : 6.00e-5s
- Output for 7.0.12
- same result: bool(true)
original : 1.25e-4s
simple : 1.00e-4s
function : 9.87e-5s
optimized : 4.88e-5s
- Output for 7.0.11
- same result: bool(true)
original : 9.09e-5s
simple : 7.46e-5s
function : 6.77e-5s
optimized : 3.67e-5s
- Output for 7.0.10
- same result: bool(true)
original : 1.08e-4s
simple : 8.52e-5s
function : 7.90e-5s
optimized : 4.04e-5s
- Output for 7.0.9
- same result: bool(true)
original : 1.18e-4s
simple : 9.09e-5s
function : 8.44e-5s
optimized : 4.27e-5s
- Output for 7.0.8
- same result: bool(true)
original : 1.00e-4s
simple : 7.87e-5s
function : 7.44e-5s
optimized : 3.90e-5s
- Output for 7.0.7
- same result: bool(true)
original : 9.42e-5s
simple : 7.63e-5s
function : 6.94e-5s
optimized : 3.85e-5s
- Output for 7.0.6
- same result: bool(true)
original : 1.06e-4s
simple : 8.24e-5s
function : 8.21e-5s
optimized : 3.93e-5s
- Output for 7.0.5
- same result: bool(true)
original : 1.23e-4s
simple : 9.75e-5s
function : 9.45e-5s
optimized : 4.78e-5s
- Output for 7.0.4
- same result: bool(true)
original : 1.56e-4s
simple : 1.23e-4s
function : 1.15e-4s
optimized : 6.01e-5s
- Output for 7.0.3
- same result: bool(true)
original : 1.45e-4s
simple : 1.18e-4s
function : 1.12e-4s
optimized : 5.71e-5s
- Output for 7.0.2
- same result: bool(true)
original : 1.09e-4s
simple : 8.48e-5s
function : 8.05e-5s
optimized : 3.85e-5s
- Output for 7.0.1
- same result: bool(true)
original : 1.05e-4s
simple : 8.28e-5s
function : 7.84e-5s
optimized : 3.87e-5s
- Output for 7.0.0
- same result: bool(true)
original : 1.75e-4s
simple : 1.32e-4s
function : 1.32e-4s
optimized : 6.59e-5s
- Output for 5.6.40
- same result: bool(true)
original : 6.70e-4s
simple : 3.99e-4s
function : 3.04e-4s
optimized : 1.45e-4s
- Output for 5.6.39
- same result: bool(true)
original : 5.55e-4s
simple : 3.19e-4s
function : 2.53e-4s
optimized : 1.21e-4s
- Output for 5.6.38
- same result: bool(true)
original : 6.53e-4s
simple : 3.77e-4s
function : 3.10e-4s
optimized : 1.41e-4s
preferences:
103.02 ms | 401 KiB | 91 Q