<?php
namespace LLSDN\Token;
enum Type: string
{
// structural tokens
case LeftMapDelimiter = '{';
case RightMapDelimiter = '}';
case LeftArrayDelimiter = '[';
case RightArrayDelimiter = ']';
case ItemSeparator = ',';
case PairSeparator = ':';
// key
case Name = '#'; // <-- symbole bidon
// type tokens
case Undef = '!';
case BooleanTrue = 'true';
case BooleanFalse = 'false';
case Integer = 'i';
case Real = 'r';
case UUID = 'u';
case String = 's';
case RawBinary = 'b';
case Binary16 = 'b16';
case Binary64 = 'b64';
case URI = 'l';
case Date = 'd';
public function endsValue(): bool {
return match($this) {
Type::RightMapDelimiter, Type::RightArrayDelimiter, Type::ItemSeparator => true,
default => false
};
}
public function isStructural(): bool {
return match($this) {
Type::LeftMapDelimiter, Type::RightMapDelimiter,
Type::LeftArrayDelimiter, Type::RightArrayDelimiter,
Type::PairSeparator, Type::ItemSeparator => true,
default => false
};
}
};
abstract class AbstractToken
{
public ?Type $type;
public ?string $value;
}
class Token extends AbstractToken
{
}
class Tokenizer
{
protected \Iterator $chunk;
protected AbstractToken $token;
protected ?string $onHold = null;
public function __construct(\Iterator | Array $chunks, AbstractToken $token = new Token())
{
$this->chunk = is_array($chunks)
? new \ArrayIterator($chunks)
: $chunks;
$this->token = $token;
}
public function tokenize()
{
while($this->chunk->valid()) {
$chunk = $this->chunk->current();
if (in_array($chunk, ["'", '"', 's'], true)) {
$this->onHold = $this->stringContent();
}
else {
yield from match($chunk) {
'{', '}', '[', ']', ',', ':', '!' => $this->noValue(),
'0', 'f', 'false', 'F', 'FALSE' => $this->boolean(false),
'1', 't', 'true', 'T', 'TRUE' => $this->boolean(true),
'b16', 'b64', 'd', 'l' => $this->quoted(),
'b' => $this->rawBinary(),
'i', 'r', 'u' => $this->simple(),
};
}
$this->skipWS();
}
}
protected function noValue()
{
$type = Type::from($this->chunk->current());
if ($this->onHold !== null) {
if ($type->endsValue()) {
yield $this->buildToken(Type::String, $this->onHold);
}
elseif ($type === Type::PairSeparator) {
yield $this->buildToken(Type::Name, $this->onHold);
}
$this->onHold = null;
}
yield $this->buildToken($type);
}
protected function simple()
{
$type = Type::from($this->chunk->current());
$this->chunk->next();
yield $this->buildToken($type, $this->chunk->current());
}
protected function quoted(): \Generator
{
$type = Type::from($this->chunk->current());
$this->chunk->next();
$value = $this->escapedContent();
yield $this->buildToken($type, $value);
}
protected function boolean($value): \Generator
{
yield $value ? $this->buildToken(Type::BooleanTrue, 'true')
: $this->buildToken(Type::BooleanFalse, 'false');
}
protected function RawBinary(): \Generator
{
$this->chunk->next();
yield $this->buildToken(Type::RawBinary, $this->sizedContent());
}
protected function stringContent(): string {
if (in_array($this->chunk->current(), ["'", '"'], true)) {
return $this->escapedContent();
}
else {
$this->chunk->next(); // parenthèse ouvrante
return $this->sizedContent();
}
}
protected function escapedContent(): string {
$delimiter = $this->chunk->current();
$content = '';
do {
$escaped = false;
$this->chunk->next();
if ($this->chunk->current() === '\\') {
$escaped = true;
$this->chunk->next();
if ($this->chunk->current() !== $delimiter) {
$content .= '\\';
}
$content .= $this->chunk->current();
}
elseif ($this->chunk->current() !== $delimiter) {
$content .= $this->chunk->current();
}
} while($this->chunk->current() !== $delimiter || $escaped === true);
return $content;
}
protected function sizedContent(): string {
$this->chunk->next(); // taille
$size = (int)$this->chunk->current();
$this->chunk->next(); // parenthèse fermante
$this->chunk->next(); // délimiteur de début
$content = '';
while(strlen($content) < $size) {
$this->chunk->next();
$content .= $this->chunk->current();
}
$this->chunk->next(); // délimiteur de fin
return $content;
}
protected function skipWS() {
$this->chunk->next();
if ($this->chunk->valid() && preg_match('~\A \s+ \z~ux', $this->chunk->current())) {
$this->chunk->next();
}
}
protected function buildToken(Type $tokenType, ?string $value = null): AbstractToken {
$token = clone $this->token;
$token->type = $tokenType;
$token->value = $value;
return $token;
}
}
function toJSON(\Iterator $tokens): string {
$result = '';
foreach ($tokens as $token) {
if ($token->type->isStructural()) {
$result .= $token->type->value;
}
elseif ($token->type === Type::Name) {
$result .= '"' . $token->value . '"';
}
elseif ($token->type === Type::Undef) {
$result .= 'null';
}
elseif (str_starts_with(needle: 'Boolean', haystack: $token->type->name)) {
$result .= '{"value":"' . $token->value . '", "type":"Boolean"}';
}
else {
$result .= '{"value":'
. json_encode($token->value)
. ',"type":"'
. $token->type->name . '"}';
}
}
return $result;
}
$test = <<<'LLSDN'
[
{
'creation-date':d"2007-03-15T18:30:18Z",
'creator-id':u3c115e51-04f4-523c-9fa6-98aff1034730
},
s(10)"0123456789",
"Where are the beef & the <pig>?",
'Over here.',
b(158)"default
{
state_entry()
{
llSay(0, "Hello, Avatar!");
}
touch_start(integer total_number)
{
llSay(0, "Touched.");
}
}",
b64"AABAAAAAAAAAAAIAAAA//wAAP/8AAADgAAAA5wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AABkAAAAZAAAAAAAAAAAAAAAZAAAAAAAAAABAAAAAAAAAAAAAAAAAAAABQAAAAEAAAAQAAAAAAAA
AAUAAAAFAAAAABAAAAAAAAAAPgAAAAQAAAAFAGNbXgAAAABgSGVsbG8sIEF2YXRhciEAZgAAAABc
XgAAAAhwEQjRABeVAAAABQBjW14AAAAAYFRvdWNoZWQuAGYAAAAAXF4AAAAIcBEI0QAXAZUAAEAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
!, 0, f, t, T, TRUE, FALSE, F
]
LLSDN;
$pattern = <<<'REGEX'
~(?xx)
(?<chunk>
[ ] [ ) ( } { : , ! ' " \\ ]
|
[[:<:]]
(
( # boolean
(?<! [ - + . ] ) [ 0 1 ] (?= [ ] \s , } ])
|
f (alse)? | t (rue)? | F (ALSE)? | T (RUE)?
| # binary
b ( 16 | 64 )?
| # string, date, URI
[ s d l ]
)
[[:>:]]
| # integer, real, UUID
[ i r u ]
)
|
\s+
)
~un
REGEX;
$chunks = preg_split(
pattern: $pattern,
subject: $test,
flags: PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
);
$myTKZ = new Tokenizer($chunks);
$tokenGen = $myTKZ->tokenize();
print_r(json_decode(toJSON($tokenGen), true));
- Output for 8.2.10
- Array
(
[0] => Array
(
[creation-date] => Array
(
[value] => 2007-03-15T18:30:18Z
[type] => Date
)
[creator-id] => Array
(
[value] => 3c115e51-04f4-523c-9fa6-98aff1034730
[type] => UUID
)
)
[1] => Array
(
[value] => 0123456789
[type] => String
)
[2] => Array
(
[value] => Where are the beef & the <pig>?
[type] => String
)
[3] => Array
(
[value] => Over here.
[type] => String
)
[4] => Array
(
[value] => default
{
state_entry()
{
llSay(0, "Hello, Avatar!");
}
touch_start(integer total_number)
{
llSay(0, "Touched.");
}
}
[type] => RawBinary
)
[5] => Array
(
[value] => AABAAAAAAAAAAAIAAAA//wAAP/8AAADgAAAA5wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AABkAAAAZAAAAAAAAAAAAAAAZAAAAAAAAAABAAAAAAAAAAAAAAAAAAAABQAAAAEAAAAQAAAAAAAA
AAUAAAAFAAAAABAAAAAAAAAAPgAAAAQAAAAFAGNbXgAAAABgSGVsbG8sIEF2YXRhciEAZgAAAABc
XgAAAAhwEQjRABeVAAAABQBjW14AAAAAYFRvdWNoZWQuAGYAAAAAXF4AAAAIcBEI0QAXAZUAAEAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
[type] => Binary64
)
[6] =>
[7] => Array
(
[value] => false
[type] => Boolean
)
[8] => Array
(
[value] => false
[type] => Boolean
)
[9] => Array
(
[value] => true
[type] => Boolean
)
[10] => Array
(
[value] => true
[type] => Boolean
)
[11] => Array
(
[value] => true
[type] => Boolean
)
[12] => Array
(
[value] => false
[type] => Boolean
)
[13] => Array
(
[value] => false
[type] => Boolean
)
)
preferences:
140.69 ms | 1406 KiB | 8 Q