3v4l.org

run code in 300+ PHP versions simultaneously
<?php namespace LLSDN\Token; enum Type: string { // structural tokens case LeftMapDelimiter = '{'; case RightMapDelimiter = '}'; case LeftArrayDelimiter = '['; case RightArrayDelimiter = ']'; case ItemSeparator = ','; case PairSeparator = ':'; // key case Name = '#'; // <-- symbole bidon // type tokens case Undef = '!'; case BooleanTrue = 'true'; case BooleanFalse = 'false'; case Integer = 'i'; case Real = 'r'; case UUID = 'u'; case String = 's'; case RawBinary = 'b'; case Binary16 = 'b16'; case Binary64 = 'b64'; case URI = 'l'; case Date = 'd'; public function endsValue(): bool { return match($this) { Type::RightMapDelimiter, Type::RightArrayDelimiter, Type::ItemSeparator => true, default => false }; } public function isStructural(): bool { return match($this) { Type::LeftMapDelimiter, Type::RightMapDelimiter, Type::LeftArrayDelimiter, Type::RightArrayDelimiter, Type::PairSeparator, Type::ItemSeparator => true, default => false }; } }; abstract class AbstractToken { public ?Type $type; public ?string $value; } class Token extends AbstractToken { } class Tokenizer { protected \Iterator $chunk; protected AbstractToken $token; protected ?string $onHold = null; public function __construct(\Iterator | Array $chunks, AbstractToken $token = new Token()) { $this->chunk = is_array($chunks) ? new \ArrayIterator($chunks) : $chunks; $this->token = $token; } public function tokenize() { while($this->chunk->valid()) { $chunk = $this->chunk->current(); if (in_array($chunk, ["'", '"', 's'], true)) { $this->onHold = $this->stringContent(); } else { yield from match($chunk) { '{', '}', '[', ']', ',', ':', '!' => $this->noValue(), '0', 'f', 'false', 'F', 'FALSE' => $this->boolean(false), '1', 't', 'true', 'T', 'TRUE' => $this->boolean(true), 'b16', 'b64', 'd', 'l' => $this->quoted(), 'b' => $this->rawBinary(), 'i', 'r', 'u' => $this->simple(), }; } $this->skipWS(); } } protected function noValue() { $type = Type::from($this->chunk->current()); if ($this->onHold !== null) { if ($type->endsValue()) { yield $this->buildToken(Type::String, $this->onHold); } elseif ($type === Type::PairSeparator) { yield $this->buildToken(Type::Name, $this->onHold); } $this->onHold = null; } yield $this->buildToken($type); } protected function simple() { $type = Type::from($this->chunk->current()); $this->chunk->next(); yield $this->buildToken($type, $this->chunk->current()); } protected function quoted(): \Generator { $type = Type::from($this->chunk->current()); $this->chunk->next(); $value = $this->escapedContent(); yield $this->buildToken($type, $value); } protected function boolean($value): \Generator { yield $value ? $this->buildToken(Type::BooleanTrue, 'true') : $this->buildToken(Type::BooleanFalse, 'false'); } protected function RawBinary(): \Generator { $this->chunk->next(); yield $this->buildToken(Type::RawBinary, $this->sizedContent()); } protected function stringContent(): string { if (in_array($this->chunk->current(), ["'", '"'], true)) { return $this->escapedContent(); } else { $this->chunk->next(); // parenthèse ouvrante return $this->sizedContent(); } } protected function escapedContent(): string { $delimiter = $this->chunk->current(); $content = ''; do { $escaped = false; $this->chunk->next(); if ($this->chunk->current() === '\\') { $escaped = true; $this->chunk->next(); if ($this->chunk->current() !== $delimiter) { $content .= '\\'; } $content .= $this->chunk->current(); } elseif ($this->chunk->current() !== $delimiter) { $content .= $this->chunk->current(); } } while($this->chunk->current() !== $delimiter || $escaped === true); return $content; } protected function sizedContent(): string { $this->chunk->next(); // taille $size = (int)$this->chunk->current(); $this->chunk->next(); // parenthèse fermante $this->chunk->next(); // délimiteur de début $content = ''; while(strlen($content) < $size) { $this->chunk->next(); $content .= $this->chunk->current(); } $this->chunk->next(); // délimiteur de fin return $content; } protected function skipWS() { $this->chunk->next(); if ($this->chunk->valid() && preg_match('~\A \s+ \z~ux', $this->chunk->current())) { $this->chunk->next(); } } protected function buildToken(Type $tokenType, ?string $value = null): AbstractToken { $token = clone $this->token; $token->type = $tokenType; $token->value = $value; return $token; } } function toJSON(\Iterator $tokens): string { $result = ''; foreach ($tokens as $token) { if ($token->type->isStructural()) { $result .= $token->type->value; } elseif ($token->type === Type::Name) { $result .= '"' . $token->value . '"'; } elseif ($token->type === Type::Undef) { $result .= 'null'; } elseif (str_starts_with(needle: 'Boolean', haystack: $token->type->name)) { $result .= '{"value":"' . $token->value . '", "type":"Boolean"}'; } else { $result .= '{"value":' . json_encode($token->value) . ',"type":"' . $token->type->name . '"}'; } } return $result; } $test = <<<'LLSDN' [ { 'creation-date':d"2007-03-15T18:30:18Z", 'creator-id':u3c115e51-04f4-523c-9fa6-98aff1034730 }, s(10)"0123456789", "Where are the beef & the <pig>?", 'Over here.', b(158)"default { state_entry() { llSay(0, "Hello, Avatar!"); } touch_start(integer total_number) { llSay(0, "Touched."); } }", b64"AABAAAAAAAAAAAIAAAA//wAAP/8AAADgAAAA5wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AABkAAAAZAAAAAAAAAAAAAAAZAAAAAAAAAABAAAAAAAAAAAAAAAAAAAABQAAAAEAAAAQAAAAAAAA AAUAAAAFAAAAABAAAAAAAAAAPgAAAAQAAAAFAGNbXgAAAABgSGVsbG8sIEF2YXRhciEAZgAAAABc XgAAAAhwEQjRABeVAAAABQBjW14AAAAAYFRvdWNoZWQuAGYAAAAAXF4AAAAIcBEI0QAXAZUAAEAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", !, 0, f, t, T, TRUE, FALSE, F ] LLSDN; $pattern = <<<'REGEX' ~(?xx) (?<chunk> [ ] [ ) ( } { : , ! ' " \\ ] | [[:<:]] ( ( # boolean (?<! [ - + . ] ) [ 0 1 ] (?= [ ] \s , } ]) | f (alse)? | t (rue)? | F (ALSE)? | T (RUE)? | # binary b ( 16 | 64 )? | # string, date, URI [ s d l ] ) [[:>:]] | # integer, real, UUID [ i r u ] ) | \s+ ) ~un REGEX; $chunks = preg_split( pattern: $pattern, subject: $test, flags: PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); $myTKZ = new Tokenizer($chunks); $tokenGen = $myTKZ->tokenize(); print_r(json_decode(toJSON($tokenGen), true));
Output for 8.2.10
Array ( [0] => Array ( [creation-date] => Array ( [value] => 2007-03-15T18:30:18Z [type] => Date ) [creator-id] => Array ( [value] => 3c115e51-04f4-523c-9fa6-98aff1034730 [type] => UUID ) ) [1] => Array ( [value] => 0123456789 [type] => String ) [2] => Array ( [value] => Where are the beef & the <pig>? [type] => String ) [3] => Array ( [value] => Over here. [type] => String ) [4] => Array ( [value] => default { state_entry() { llSay(0, "Hello, Avatar!"); } touch_start(integer total_number) { llSay(0, "Touched."); } } [type] => RawBinary ) [5] => Array ( [value] => AABAAAAAAAAAAAIAAAA//wAAP/8AAADgAAAA5wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AABkAAAAZAAAAAAAAAAAAAAAZAAAAAAAAAABAAAAAAAAAAAAAAAAAAAABQAAAAEAAAAQAAAAAAAA AAUAAAAFAAAAABAAAAAAAAAAPgAAAAQAAAAFAGNbXgAAAABgSGVsbG8sIEF2YXRhciEAZgAAAABc XgAAAAhwEQjRABeVAAAABQBjW14AAAAAYFRvdWNoZWQuAGYAAAAAXF4AAAAIcBEI0QAXAZUAAEAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA [type] => Binary64 ) [6] => [7] => Array ( [value] => false [type] => Boolean ) [8] => Array ( [value] => false [type] => Boolean ) [9] => Array ( [value] => true [type] => Boolean ) [10] => Array ( [value] => true [type] => Boolean ) [11] => Array ( [value] => true [type] => Boolean ) [12] => Array ( [value] => false [type] => Boolean ) [13] => Array ( [value] => false [type] => Boolean ) )

preferences:
140.69 ms | 1406 KiB | 8 Q