<?php
$charmap = [
'NUL' => "\x00", // NULL (U+0000)
'SOH' => "\x01", // START OF HEADING (U+0001)
'STX' => "\x02", // START OF TEXT (U+0002)
'ETX' => "\x03", // END OF TEXT (U+0003)
'EOT' => "\x04", // END OF TRANSMISSION (U+0004)
'ENQ' => "\x05", // ENQUIRY (U+0005)
'HT' => "\x09", // HORIZONTAL TAB (U+0009)
'LF' => "\x0a", // LINE FEED (U+000A)
'VT' => "\x0b", // VERTICAL TAB (U+000B)
'CR' => "\x0d", // CARRIAGE RETURN (U+000D)
'ETB' => "\x17", // END OF TRANSMISSION BLOCK (U+0017)
'SP' => "\x20", // SPACE (U+0020)
'ZWS' => "\xe2\x80\x8b", // ZERO WIDTH SPACE (U+200B)
'MSBS' => "\xf0\x9d\x85\xb7", // MUSICAL SYMBOL BEGIN SLUR (U+1D177)
'MSBP' => "\xf0\x9d\x85\xb9", // MUSICAL SYMBOL BEGIN PHRASE (U+1D179)
];
foreach ($charmap as $k => $v) {
define($k, $v);
}
$strings = [
'user6003859' =>
LF .
LF .
LF .
'a' . SP . 'b'. SP . SP . 'c' . SP . SP . SP . LF .
SP . 'd' . SP . SP . SP . SP . 'e' . LF .
MSBS . 'f' . MSBS . 'g' . MSBS . MSBS . 'h' . MSBS . MSBS . ZWS . ZWS . MSBP . MSBP . 'i' . MSBP . SP . MSBP . SP . 'j' . LF .
LF .
LF .
LF .
LF .
'k' . SP . 'l' . SP . 'm' . SP . 'n' . SP . SP . SP . LF .
MSBP . 'o' . MSBP . MSBP . 'p' . LF .
LF .
LF .
LF,
'mickmackusa' =>
NUL . LF .
LF .
SOH . LF .
CR . LF .
VT .
ETB . 'a' . SP . 'ab' . CR . LF .
HT . HT . CR .
CR . LF .
'cà' . SOH . 'ê߀' . NUL . NUL . 'abcbc'. SP . SP . SP . 'd' . LF .
LF .
HT . CR . LF .
ENQ . SP . SP . SP . 'e' . STX . LF .
ETX . LF .
EOT . LF
];
function display($str, $charmap) {
$converter = array_map(function ($i) { return '{'.$i.'}'; }, array_flip($charmap));
$handle = fopen("data:text/plain,$str", 'r');
while ( false !== $line = fgets($handle) ) {
echo strtr($line, $converter), PHP_EOL;
}
fclose($handle);
}
class Replacements {
const FUNC = 0;
const REGEX = 1;
protected $patterns;
protected $replacements;
protected $func;
protected $typeRegex;
public function __construct($arg) {
if ( is_array($arg) ) {
$this->type = self::REGEX;
$this->patterns = [];
$this->replacements = [];
$this->addPatterns($arg);
} elseif ( is_callable($arg) ) {
$this->type = self::FUNC;
$this->addFunction($arg);
} else throw new Exception('invalid argument type');
}
protected function addPatterns($replacements) {
foreach($replacements as $pattern => $replacement) {
$this->patterns[] = $pattern;
$this->replacements[] = $replacement;
}
}
protected function addFunction($func) {
$this->func = $func;
}
public function execute($str) {
if ( $this->type === self::REGEX )
return preg_replace($this->patterns, $this->replacements, $str);
return call_user_func_array($this->func, [&$str]);
}
};
$original = new Replacements([
'~\R~u' => "\n",
'/(?:^((\pZ)+|((?!\n)\pC)+)(?1)*)|((?1)$)|(?:((?2)+|(?3)+)(?=(?2)|(?3)))/um' => '',
'/(\pZ+)|((?!\n)\pC)/u' => ' ',
'/(^\n+)|(\n+$)|(\n(?=\n{2}))/u' => ''
]);
$simple = new Replacements([
'~\A[\pZ\pC]+|[\pZ\pC]+\z~u' => '', # trim the string
'~\R~u' => "\n", # normalize newlines
'~\pZ+|[^\n\PC]+~u' => ' ', # replace Z and C with space
'~^ +| +$| \K +~m' => '', # trim lines, delete consecutive spaces
'~\n\n\K\n+~' => '' # removes more than 2 consecutives newlines
]);
$optimized = new Replacements([
'~\r\n?|\x0b|\f|\xe2\x80[\xa8\xa9]~S' => "\n",
'~
[^\pZ\pC]+ \K
\pZ* (?:[^\PC\n]+\pZ*)*
(?: (\n) \pZ*+ (?:[^\PC\n]+\pZ*)*+ (?: (\n) [\pZ\pC]* )?+ (?!\z) | [\pZ\pC]+ )?
|
[\pZ\pC]+
~Aux' => '$1$2 ',
'~ (?:$|(?<=^ ))~m' => ''
]);
// tests
$tests = ['original' => $original, 'simple' => $simple, 'optimized' => $optimized];
$str = $strings['user6003859'] . $strings['mickmackusa'];
$str = str_repeat($str, 10);
$res = [];
foreach ($tests as $k=>&$test) {
$res[$k] = $test->execute($str);
}
echo 'same result: ', var_dump(count(array_unique($res)) === 1);
$names = array_keys($tests);
$times = array_fill_keys($names, 0);
define('REPETITIONS', 100);
for ($i=0; $i < REPETITIONS; $i++) {
shuffle($names);
foreach ($names as $name) {
$start = microtime(true);
$tests[$name]->execute($str);
$stop = microtime(true);
$times[$name] += $stop - $start;
}
}
foreach($times as $k=>$v) {
printf("%-12s: %.2es\n", $k, $v/REPETITIONS);
}
// display($res['optimized'], $charmap);
- Output for 7.3.12
- same result: bool(true)
original : 1.04e-4s
simple : 6.59e-5s
optimized : 3.85e-5s
- Output for 7.3.11
- same result: bool(true)
original : 1.16e-4s
simple : 7.36e-5s
optimized : 4.28e-5s
- Output for 7.3.10
- same result: bool(true)
original : 1.04e-4s
simple : 6.62e-5s
optimized : 3.84e-5s
- Output for 7.3.9
- same result: bool(true)
original : 1.16e-4s
simple : 7.29e-5s
optimized : 4.49e-5s
- Output for 7.3.8
- same result: bool(true)
original : 1.19e-4s
simple : 7.44e-5s
optimized : 4.60e-5s
- Output for 7.3.7
- same result: bool(true)
original : 1.03e-4s
simple : 6.51e-5s
optimized : 3.85e-5s
- Output for 7.3.6
- same result: bool(true)
original : 1.09e-4s
simple : 7.02e-5s
optimized : 4.19e-5s
- Output for 7.3.5
- same result: bool(true)
original : 1.05e-4s
simple : 6.65e-5s
optimized : 3.97e-5s
- Output for 7.3.4
- same result: bool(true)
original : 1.11e-4s
simple : 7.15e-5s
optimized : 4.43e-5s
- Output for 7.3.3
- same result: bool(true)
original : 1.06e-4s
simple : 6.91e-5s
optimized : 4.01e-5s
- Output for 7.3.2
- same result: bool(true)
original : 1.02e-4s
simple : 6.68e-5s
optimized : 3.85e-5s
- Output for 7.3.1
- same result: bool(true)
original : 1.03e-4s
simple : 6.62e-5s
optimized : 3.84e-5s
- Output for 7.3.0
- same result: bool(true)
original : 1.01e-4s
simple : 6.30e-5s
optimized : 3.67e-5s
- Output for 7.2.24
- same result: bool(true)
original : 1.04e-4s
simple : 6.83e-5s
optimized : 4.02e-5s
- Output for 7.2.23
- same result: bool(true)
original : 1.12e-4s
simple : 7.13e-5s
optimized : 4.30e-5s
- Output for 7.2.22
- same result: bool(true)
original : 9.74e-5s
simple : 6.38e-5s
optimized : 3.75e-5s
- Output for 7.2.21
- same result: bool(true)
original : 1.20e-4s
simple : 7.73e-5s
optimized : 4.58e-5s
- Output for 7.2.20
- same result: bool(true)
original : 9.88e-5s
simple : 6.40e-5s
optimized : 3.70e-5s
- Output for 7.2.19
- same result: bool(true)
original : 1.11e-4s
simple : 7.19e-5s
optimized : 4.09e-5s
- Output for 7.2.18
- same result: bool(true)
original : 1.06e-4s
simple : 6.87e-5s
optimized : 4.06e-5s
- Output for 7.2.17
- same result: bool(true)
original : 1.12e-4s
simple : 7.22e-5s
optimized : 4.24e-5s
- Output for 7.2.16
- same result: bool(true)
original : 9.69e-5s
simple : 6.18e-5s
optimized : 3.70e-5s
- Output for 7.2.15
- same result: bool(true)
original : 9.79e-5s
simple : 6.38e-5s
optimized : 3.72e-5s
- Output for 7.2.14
- same result: bool(true)
original : 9.76e-5s
simple : 6.29e-5s
optimized : 3.78e-5s
- Output for 7.2.13
- same result: bool(true)
original : 9.61e-5s
simple : 6.25e-5s
optimized : 7.21e-5s
- Output for 7.2.12
- same result: bool(true)
original : 9.32e-5s
simple : 6.05e-5s
optimized : 3.49e-5s
- Output for 7.2.11
- same result: bool(true)
original : 9.44e-5s
simple : 6.12e-5s
optimized : 3.55e-5s
- Output for 7.2.10
- same result: bool(true)
original : 9.69e-5s
simple : 6.21e-5s
optimized : 3.67e-5s
- Output for 7.2.9
- same result: bool(true)
original : 9.77e-5s
simple : 6.22e-5s
optimized : 3.72e-5s
- Output for 7.2.8
- same result: bool(true)
original : 1.09e-4s
simple : 7.05e-5s
optimized : 4.33e-5s
- Output for 7.2.7
- same result: bool(true)
original : 9.67e-5s
simple : 6.28e-5s
optimized : 3.71e-5s
- Output for 7.2.6
- same result: bool(true)
original : 1.10e-4s
simple : 7.26e-5s
optimized : 4.25e-5s
- Output for 7.2.5
- same result: bool(true)
original : 9.85e-5s
simple : 6.49e-5s
optimized : 3.69e-5s
- Output for 7.2.4
- same result: bool(true)
original : 9.54e-5s
simple : 6.29e-5s
optimized : 3.62e-5s
- Output for 7.2.3
- same result: bool(true)
original : 1.10e-4s
simple : 6.98e-5s
optimized : 4.11e-5s
- Output for 7.2.2
- same result: bool(true)
original : 1.05e-4s
simple : 6.75e-5s
optimized : 3.99e-5s
- Output for 7.2.1
- same result: bool(true)
original : 9.68e-5s
simple : 6.25e-5s
optimized : 3.64e-5s
- Output for 7.2.0
- same result: bool(true)
original : 9.58e-5s
simple : 6.18e-5s
optimized : 3.62e-5s
- Output for 7.1.33
- same result: bool(true)
original : 1.21e-4s
simple : 9.75e-5s
optimized : 4.68e-5s
- Output for 7.1.32
- same result: bool(true)
original : 1.11e-4s
simple : 8.90e-5s
optimized : 4.25e-5s
- Output for 7.1.31
- same result: bool(true)
original : 1.34e-4s
simple : 1.06e-4s
optimized : 5.23e-5s
- Output for 7.1.30
- same result: bool(true)
original : 1.25e-4s
simple : 9.93e-5s
optimized : 4.84e-5s
- Output for 7.1.29
- same result: bool(true)
original : 1.24e-4s
simple : 9.96e-5s
optimized : 4.88e-5s
- Output for 7.1.28
- same result: bool(true)
original : 1.19e-4s
simple : 9.68e-5s
optimized : 4.49e-5s
- Output for 7.1.27
- same result: bool(true)
original : 1.08e-4s
simple : 8.75e-5s
optimized : 4.18e-5s
- Output for 7.1.26
- same result: bool(true)
original : 1.07e-4s
simple : 8.72e-5s
optimized : 4.22e-5s
- Output for 7.1.25
- same result: bool(true)
original : 1.14e-4s
simple : 9.12e-5s
optimized : 4.39e-5s
- Output for 7.1.11
- same result: bool(true)
original : 1.57e-4s
simple : 1.19e-4s
optimized : 5.50e-5s
- Output for 7.1.10
- same result: bool(true)
original : 2.98e-4s
simple : 2.31e-4s
optimized : 1.14e-4s
- Output for 7.1.9
- same result: bool(true)
original : 2.49e-4s
simple : 1.95e-4s
optimized : 9.21e-5s
- Output for 7.1.8
- same result: bool(true)
original : 2.94e-4s
simple : 2.23e-4s
optimized : 1.13e-4s
- Output for 7.1.7
- same result: bool(true)
original : 1.63e-4s
simple : 1.27e-4s
optimized : 5.97e-5s
- Output for 7.1.6
- same result: bool(true)
original : 1.46e-4s
simple : 1.15e-4s
optimized : 5.27e-5s
- Output for 7.1.5
- same result: bool(true)
original : 1.52e-4s
simple : 1.15e-4s
optimized : 5.22e-5s
- Output for 7.1.4
- same result: bool(true)
original : 1.47e-4s
simple : 1.13e-4s
optimized : 5.91e-5s
- Output for 7.1.3
- same result: bool(true)
original : 1.49e-4s
simple : 1.16e-4s
optimized : 5.41e-5s
- Output for 7.1.2
- same result: bool(true)
original : 1.66e-4s
simple : 1.40e-4s
optimized : 6.60e-5s
- Output for 7.1.1
- same result: bool(true)
original : 1.48e-4s
simple : 1.15e-4s
optimized : 5.46e-5s
- Output for 7.1.0
- same result: bool(true)
original : 1.60e-4s
simple : 1.31e-4s
optimized : 5.88e-5s
- Output for 7.0.25
- same result: bool(true)
original : 1.68e-4s
simple : 1.28e-4s
optimized : 6.09e-5s
- Output for 7.0.24
- same result: bool(true)
original : 1.53e-4s
simple : 1.21e-4s
optimized : 5.41e-5s
- Output for 7.0.23
- same result: bool(true)
original : 1.91e-4s
simple : 1.50e-4s
optimized : 6.93e-5s
- Output for 7.0.22
- same result: bool(true)
original : 1.67e-4s
simple : 1.31e-4s
optimized : 6.40e-5s
- Output for 7.0.21
- same result: bool(true)
original : 1.87e-4s
simple : 1.40e-4s
optimized : 6.73e-5s
- Output for 7.0.20
- same result: bool(true)
original : 1.63e-4s
simple : 1.29e-4s
optimized : 5.98e-5s
- Output for 7.0.19
- same result: bool(true)
original : 1.42e-4s
simple : 1.13e-4s
optimized : 5.23e-5s
- Output for 7.0.18
- same result: bool(true)
original : 1.41e-4s
simple : 1.17e-4s
optimized : 5.20e-5s
- Output for 7.0.17
- same result: bool(true)
original : 1.48e-4s
simple : 1.16e-4s
optimized : 5.74e-5s
- Output for 7.0.16
- same result: bool(true)
original : 1.67e-4s
simple : 1.34e-4s
optimized : 6.48e-5s
- Output for 7.0.15
- same result: bool(true)
original : 1.71e-4s
simple : 1.39e-4s
optimized : 6.34e-5s
- Output for 7.0.14
- same result: bool(true)
original : 1.51e-4s
simple : 1.21e-4s
optimized : 5.61e-5s
- Output for 7.0.13
- same result: bool(true)
original : 2.54e-4s
simple : 1.86e-4s
optimized : 9.53e-5s
- Output for 7.0.12
- same result: bool(true)
original : 2.39e-4s
simple : 1.72e-4s
optimized : 8.66e-5s
- Output for 7.0.11
- same result: bool(true)
original : 1.54e-4s
simple : 1.18e-4s
optimized : 5.49e-5s
- Output for 7.0.10
- same result: bool(true)
original : 1.52e-4s
simple : 1.24e-4s
optimized : 5.97e-5s
- Output for 7.0.9
- same result: bool(true)
original : 1.52e-4s
simple : 1.25e-4s
optimized : 5.43e-5s
- Output for 7.0.8
- same result: bool(true)
original : 1.87e-4s
simple : 1.52e-4s
optimized : 6.44e-5s
- Output for 7.0.7
- same result: bool(true)
original : 2.86e-4s
simple : 2.25e-4s
optimized : 1.10e-4s
- Output for 7.0.6
- same result: bool(true)
original : 1.75e-4s
simple : 1.32e-4s
optimized : 6.52e-5s
- Output for 7.0.5
- same result: bool(true)
original : 1.51e-4s
simple : 1.25e-4s
optimized : 5.56e-5s
- Output for 7.0.4
- same result: bool(true)
original : 1.76e-4s
simple : 1.37e-4s
optimized : 6.39e-5s
- Output for 7.0.3
- same result: bool(true)
original : 1.58e-4s
simple : 1.23e-4s
optimized : 5.66e-5s
- Output for 7.0.2
- same result: bool(true)
original : 1.47e-4s
simple : 1.16e-4s
optimized : 5.55e-5s
- Output for 7.0.1
- same result: bool(true)
original : 1.88e-4s
simple : 1.47e-4s
optimized : 7.28e-5s
- Output for 7.0.0
- same result: bool(true)
original : 2.41e-4s
simple : 1.89e-4s
optimized : 7.93e-5s
preferences:
53.19 ms | 530 KiB | 5 Q