@ 2023-09-19T15:56:54Z <?php
namespace LLSDN\Token;
enum Type: string
{
// structural tokens
case LeftMapDelimiter = '{';
case RightMapDelimiter = '}';
case LeftArrayDelimiter = '[';
case RightArrayDelimiter = ']';
case ItemSeparator = ',';
case PairSeparator = ':';
// key
case Name = '#'; // <-- symbole bidon
// type tokens
case Undef = '!';
case BooleanTrue = 'true';
case BooleanFalse = 'false';
case Integer = 'i';
case Real = 'r';
case UUID = 'u';
case String = 's';
case RawBinary = 'b';
case Binary16 = 'b16';
case Binary64 = 'b64';
case URI = 'l';
case Date = 'd';
public function endsValue(): bool {
return match($this) {
Type::RightMapDelimiter, Type::RightArrayDelimiter, Type::ItemSeparator => true,
default => false
};
}
public function isStructural(): bool {
return match($this) {
Type::LeftMapDelimiter, Type::RightMapDelimiter,
Type::LeftArrayDelimiter, Type::RightArrayDelimiter,
Type::PairSeparator, Type::ItemSeparator => true,
default => false
};
}
};
abstract class AbstractToken
{
public ?Type $type;
public ?string $value;
}
class Token extends AbstractToken
{
}
class Tokenizer
{
protected \Iterator $chunk;
protected AbstractToken $token;
protected ?string $onHold = null;
public function __construct(\Iterator | Array $chunks, AbstractToken $token = new Token())
{
$this->chunk = is_array($chunks)
? new \ArrayIterator($chunks)
: $chunks;
$this->token = $token;
}
public function tokenize()
{
while($this->chunk->valid()) {
$chunk = $this->chunk->current();
if (in_array($chunk, ["'", '"', 's'], true)) {
$this->onHold = $this->stringContent();
}
else {
yield from match($chunk) {
'{', '}', '[', ']', ',', ':', '!' => $this->noValue(),
'0', 'f', 'false', 'F', 'FALSE' => $this->boolean(false),
'1', 't', 'true', 'T', 'TRUE' => $this->boolean(true),
'b16', 'b64', 'd', 'l' => $this->quoted(),
'b' => $this->rawBinary(),
'i', 'r', 'u' => $this->simple(),
};
}
$this->skipWS();
}
}
protected function noValue()
{
$type = Type::from($this->chunk->current());
if ($this->onHold !== null) {
if ($type->endsValue()) {
yield $this->buildToken(Type::String, $this->onHold);
}
elseif ($type === Type::PairSeparator) {
yield $this->buildToken(Type::Name, $this->onHold);
}
$this->onHold = null;
}
yield $this->buildToken($type);
}
protected function simple()
{
$type = Type::from($this->chunk->current());
$this->chunk->next();
yield $this->buildToken($type, $this->chunk->current());
}
protected function quoted(): \Generator
{
$type = Type::from($this->chunk->current());
$this->chunk->next();
$value = $this->escapedContent();
yield $this->buildToken($type, $value);
}
protected function boolean($value): \Generator
{
yield $value ? $this->buildToken(Type::BooleanTrue, 'true')
: $this->buildToken(Type::BooleanFalse, 'false');
}
protected function RawBinary(): \Generator
{
$this->chunk->next();
yield $this->buildToken(Type::RawBinary, $this->sizedContent());
}
protected function stringContent(): string {
if (in_array($this->chunk->current(), ["'", '"'], true)) {
return $this->escapedContent();
}
else {
$this->chunk->next(); // parenthèse ouvrante
return $this->sizedContent();
}
}
protected function escapedContent(): string {
$delimiter = $this->chunk->current();
$content = '';
do {
$escaped = false;
$this->chunk->next();
if ($this->chunk->current() === '\\') {
$escaped = true;
$this->chunk->next();
if ($this->chunk->current() !== $delimiter) {
$content .= '\\';
}
$content .= $this->chunk->current();
}
elseif ($this->chunk->current() !== $delimiter) {
$content .= $this->chunk->current();
}
} while($this->chunk->current() !== $delimiter || $escaped === true);
return $content;
}
protected function sizedContent(): string {
$this->chunk->next(); // taille
$size = (int)$this->chunk->current();
$this->chunk->next(); // parenthèse fermante
$this->chunk->next(); // délimiteur de début
$content = '';
while(strlen($content) < $size) {
$this->chunk->next();
$content .= $this->chunk->current();
}
$this->chunk->next(); // délimiteur de fin
return $content;
}
protected function skipWS() {
$this->chunk->next();
if ($this->chunk->valid() && preg_match('~\A \s+ \z~ux', $this->chunk->current())) {
$this->chunk->next();
}
}
protected function buildToken(Type $tokenType, ?string $value = null): AbstractToken {
$token = clone $this->token;
$token->type = $tokenType;
$token->value = $value;
return $token;
}
}
function toJSON(\Iterator $tokens): string {
$result = '';
foreach ($tokens as $token) {
if ($token->type->isStructural()) {
$result .= $token->type->value;
}
elseif ($token->type === Type::Name) {
$result .= '"' . $token->value . '"';
}
elseif ($token->type === Type::Undef) {
$result .= 'null';
}
elseif (str_starts_with(needle: 'Boolean', haystack: $token->type->name)) {
$result .= '{"value":"' . $token->value . '", "type":"Boolean"}';
}
else {
$result .= '{"value":'
. json_encode($token->value)
. ',"type":"'
. $token->type->name . '"}';
}
}
return $result;
}
$test = <<<'LLSDN'
[
{
'creation-date':d"2007-03-15T18:30:18Z",
'creator-id':u3c115e51-04f4-523c-9fa6-98aff1034730
},
s(10)"0123456789",
"Where are the beef & the <pig>?",
'Over here.',
b(158)"default
{
state_entry()
{
llSay(0, "Hello, Avatar!");
}
touch_start(integer total_number)
{
llSay(0, "Touched.");
}
}",
b64"AABAAAAAAAAAAAIAAAA//wAAP/8AAADgAAAA5wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AABkAAAAZAAAAAAAAAAAAAAAZAAAAAAAAAABAAAAAAAAAAAAAAAAAAAABQAAAAEAAAAQAAAAAAAA
AAUAAAAFAAAAABAAAAAAAAAAPgAAAAQAAAAFAGNbXgAAAABgSGVsbG8sIEF2YXRhciEAZgAAAABc
XgAAAAhwEQjRABeVAAAABQBjW14AAAAAYFRvdWNoZWQuAGYAAAAAXF4AAAAIcBEI0QAXAZUAAEAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
!, 0, f, t, T, TRUE, FALSE, F
]
LLSDN;
$pattern = <<<'REGEX'
~(?xx)
(?<chunk>
[ ] [ ) ( } { : , ! ' " \\ ]
|
[[:<:]]
(
( # boolean
(?<! [ - + . ] ) [ 0 1 ] (?= [ ] \s , } ])
|
f (alse)? | t (rue)? | F (ALSE)? | T (RUE)?
| # binary
b ( 16 | 64 )?
| # string, date, URI
[ s d l ]
)
[[:>:]]
| # integer, real, UUID
[ i r u ]
)
|
\s+
)
~un
REGEX;
$chunks = preg_split(
pattern: $pattern,
subject: $test,
flags: PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
);
$myTKZ = new Tokenizer($chunks);
$tokenGen = $myTKZ->tokenize();
print_r(json_decode(toJSON($tokenGen), true));
Enable javascript to submit You have javascript disabled. You will not be able to edit any code.
Here you find the average performance (time & memory) of each version. A grayed out version indicates it didn't complete successfully (based on exit-code).
Version System time (s) User time (s) Memory (MiB) 8.4.12 0.005 0.005 22.66 8.4.11 0.012 0.010 22.50 8.4.10 0.011 0.010 17.92 8.4.9 0.010 0.011 20.47 8.4.8 0.007 0.003 20.15 8.4.7 0.010 0.007 18.25 8.4.6 0.013 0.011 20.55 8.4.5 0.009 0.012 20.46 8.4.4 0.011 0.015 18.02 8.4.3 0.007 0.014 20.85 8.4.2 0.007 0.014 20.07 8.4.1 0.006 0.003 19.81 8.3.25 0.011 0.009 19.20 8.3.24 0.011 0.009 20.94 8.3.23 0.007 0.003 16.87 8.3.22 0.007 0.001 19.23 8.3.21 0.011 0.009 16.81 8.3.20 0.008 0.002 17.06 8.3.19 0.008 0.011 19.16 8.3.18 0.007 0.010 18.81 8.3.17 0.019 0.003 17.41 8.3.16 0.012 0.006 18.80 8.3.15 0.003 0.005 20.74 8.3.14 0.000 0.016 18.84 8.3.13 0.008 0.000 18.57 8.3.12 0.010 0.010 16.90 8.3.5 0.009 0.009 18.67 8.2.29 0.012 0.008 20.48 8.2.28 0.012 0.009 18.64 8.2.27 0.016 0.003 17.30 8.2.26 0.015 0.000 19.36 8.2.25 0.006 0.012 18.48 8.2.24 0.006 0.003 16.86 8.2.10 0.018 0.004 19.55 8.1.33 0.011 0.008 22.07 8.1.32 0.007 0.012 16.41 8.1.31 0.004 0.004 16.84 8.1.30 0.004 0.015 16.45
preferences:dark mode live preview ace vim emacs key bindings
34.7 ms | 403 KiB | 5 Q