<?php
namespace LLSDN\Token;
enum Type: string
{
// structural tokens
case LeftMapDelimiter = '{';
case RightMapDelimiter = '}';
case LeftArrayDelimiter = '[';
case RightArrayDelimiter = ']';
case ItemSeparator = ',';
case PairSeparator = ':';
// key
case Name = '#'; // <-- symbole bidon
// type tokens
case Undef = '!';
case BooleanTrue = 'true';
case BooleanFalse = 'false';
case Integer = 'i';
case Real = 'r';
case UUID = 'u';
case String = 's';
case RawBinary = 'b';
case Binary16 = 'b16';
case Binary64 = 'b64';
case URI = 'l';
case Date = 'd';
public function endsValue(): bool {
return match($this) {
Type::RightMapDelimiter, Type::RightArrayDelimiter, Type::ItemSeparator => true,
default => false
};
}
public function isStructural(): bool {
return match($this) {
Type::LeftMapDelimiter, Type::RightMapDelimiter,
Type::LeftArrayDelimiter, Type::RightArrayDelimiter,
Type::PairSeparator, Type::ItemSeparator => true,
default => false
};
}
};
abstract class AbstractToken
{
public ?Type $type;
public ?string $value;
}
class Token extends AbstractToken
{
}
class Tokenizer
{
protected \Iterator $chunk;
protected AbstractToken $token;
protected ?string $onHold = null;
public function __construct(\Iterator | Array $chunks, AbstractToken $token = new Token())
{
$this->chunk = is_array($chunks)
? new \ArrayIterator($chunks)
: $chunks;
$this->token = $token;
}
public function tokenize()
{
while($this->chunk->valid()) {
$chunk = $this->chunk->current();
if (in_array($chunk, ["'", '"', 's'], true)) {
$this->onHold = $this->stringContent();
}
else {
yield from match($chunk) {
'{', '}', '[', ']', ',', ':', '!' => $this->noValue(),
'0', 'f', 'false', 'F', 'FALSE' => $this->boolean(false),
'1', 't', 'true', 'T', 'TRUE' => $this->boolean(true),
'b16', 'b64', 'd', 'l' => $this->quoted(),
'b' => $this->rawBinary(),
'i', 'r', 'u' => $this->simple(),
};
}
$this->skipWS();
}
}
protected function noValue()
{
$type = Type::from($this->chunk->current());
if ($this->onHold !== null) {
if ($type->endsValue()) {
yield $this->buildToken(Type::String, $this->onHold);
}
elseif ($type === Type::PairSeparator) {
yield $this->buildToken(Type::Name, $this->onHold);
}
$this->onHold = null;
}
yield $this->buildToken($type);
}
protected function simple()
{
$type = Type::from($this->chunk->current());
$this->chunk->next();
yield $this->buildToken($type, $this->chunk->current());
}
protected function quoted(): \Generator
{
$type = Type::from($this->chunk->current());
$this->chunk->next();
$value = $this->escapedContent();
yield $this->buildToken($type, $value);
}
protected function boolean($value): \Generator
{
yield $value ? $this->buildToken(Type::BooleanTrue, 'true')
: $this->buildToken(Type::BooleanFalse, 'false');
}
protected function RawBinary(): \Generator
{
$this->chunk->next();
yield $this->buildToken(Type::RawBinary, $this->sizedContent());
}
protected function stringContent(): string {
if (in_array($this->chunk->current(), ["'", '"'], true)) {
return $this->escapedContent();
}
else {
$this->chunk->next(); // parenthèse ouvrante
return $this->sizedContent();
}
}
protected function escapedContent(): string {
$delimiter = $this->chunk->current();
$content = '';
do {
$escaped = false;
$this->chunk->next();
if ($this->chunk->current() === '\\') {
$escaped = true;
$this->chunk->next();
if ($this->chunk->current() !== $delimiter) {
$content .= '\\';
}
$content .= $this->chunk->current();
}
elseif ($this->chunk->current() !== $delimiter) {
$content .= $this->chunk->current();
}
} while($this->chunk->current() !== $delimiter || $escaped === true);
return $content;
}
protected function sizedContent(): string {
$this->chunk->next(); // taille
$size = (int)$this->chunk->current();
$this->chunk->next(); // parenthèse fermante
$this->chunk->next(); // délimiteur de début
$content = '';
while(strlen($content) < $size) {
$this->chunk->next();
$content .= $this->chunk->current();
}
$this->chunk->next(); // délimiteur de fin
return $content;
}
protected function skipWS() {
$this->chunk->next();
if ($this->chunk->valid() && preg_match('~\A \s+ \z~ux', $this->chunk->current())) {
$this->chunk->next();
}
}
protected function buildToken(Type $tokenType, ?string $value = null): AbstractToken {
$token = clone $this->token;
$token->type = $tokenType;
$token->value = $value;
return $token;
}
}
function toXML(\Iterator $tokens): string {
$result = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
$nameAttribute = null;
foreach($tokens as $token) {
$tagName = null;
if ($token->type === Type::Name) {
$nameAttribute = $token->value;
}
elseif (in_array($token->type, [Type::ItemSeparator, Type::PairSeparator], true)) {
continue;
}
else {
$tagName = match($token->type) {
Type::LeftMapDelimiter, Type::RightMapDelimiter => 'map',
Type::LeftArrayDelimiter, Type::RightArrayDelimiter => 'array',
Type::BooleanFalse, Type::BooleanTrue => 'boolean',
Type::UUID => 'UUID',
Type::URI => 'URI',
default => strtolower($token->type->name)
};
$result .= '<';
if (str_starts_with(needle: 'Right', haystack: $token->type->name)) {
$result .= '/' . $tagName . '>';
continue;
}
$result .= $tagName;
if ($nameAttribute) {
$result .= ' name="' . $nameAttribute . '"';
$nameAttribute = null;
}
if ($token->type === Type::Undef) {
$result .= '/>';
continue;
}
$result .= '>';
if (str_starts_with(needle: 'Left', haystack: $token->type->name))
continue;
$result .= $token->type === Type::RawBinary
? '<![CDATA[' . strtr($token->value, [']]>' => ']]]]><![CDATA[>']) . ']]>'
: htmlspecialchars($token->value, ENT_XML1, 'UTF-8');
$result .= '</' . $tagName . '>';
}
}
return $result;
}
$test = <<<'LLSDN'
[
{
'creation-date':d"2007-03-15T18:30:18Z",
'creator-id':u3c115e51-04f4-523c-9fa6-98aff1034730
},
s(10)"0123456789",
"Where are the beef & the <pig>?",
'Over here.',
b(158)"default
{
state_entry()
{
llSay(0, "Hello, Avatar!");
}
touch_start(integer total_number)
{
llSay(0, "Touched.");
}
}",
b64"AABAAAAAAAAAAAIAAAA//wAAP/8AAADgAAAA5wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AABkAAAAZAAAAAAAAAAAAAAAZAAAAAAAAAABAAAAAAAAAAAAAAAAAAAABQAAAAEAAAAQAAAAAAAA
AAUAAAAFAAAAABAAAAAAAAAAPgAAAAQAAAAFAGNbXgAAAABgSGVsbG8sIEF2YXRhciEAZgAAAABc
XgAAAAhwEQjRABeVAAAABQBjW14AAAAAYFRvdWNoZWQuAGYAAAAAXF4AAAAIcBEI0QAXAZUAAEAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
!, 0, f, t, T, TRUE, FALSE, F
]
LLSDN;
$pattern = <<<'REGEX'
~(?xx)
(?<chunk>
[ ] [ ) ( } { : , ! ' " \\ ]
|
[[:<:]]
(
( # boolean
(?<! [ - + . ] ) [ 0 1 ] (?= [ ] \s , } ])
|
f (alse)? | t (rue)? | F (ALSE)? | T (RUE)?
| # binary
b ( 16 | 64 )?
| # string, date, URI
[ s d l ]
)
[[:>:]]
| # integer, real, UUID
[ i r u ]
)
|
\s+
)
~un
REGEX;
$chunks = preg_split(
pattern: $pattern,
subject: $test,
flags: PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
);
$myTKZ = new Tokenizer($chunks);
$tokenGen = $myTKZ->tokenize();
$dom = new \DOMDocument;
$dom->loadXML(toXML($tokenGen));
$dom->preserveWhiteSpace = false;
$dom->formatOutput = true;
echo $dom->saveXML();
- Output for 8.2.10
- <?xml version="1.0" encoding="UTF-8"?>
<array>
<map>
<date name="creation-date">2007-03-15T18:30:18Z</date>
<UUID name="creator-id">3c115e51-04f4-523c-9fa6-98aff1034730</UUID>
</map>
<string>0123456789</string>
<string>Where are the beef & the <pig>?</string>
<string>Over here.</string>
<rawbinary><![CDATA[default
{
state_entry()
{
llSay(0, "Hello, Avatar!");
}
touch_start(integer total_number)
{
llSay(0, "Touched.");
}
}]]></rawbinary>
<binary64>AABAAAAAAAAAAAIAAAA//wAAP/8AAADgAAAA5wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AABkAAAAZAAAAAAAAAAAAAAAZAAAAAAAAAABAAAAAAAAAAAAAAAAAAAABQAAAAEAAAAQAAAAAAAA
AAUAAAAFAAAAABAAAAAAAAAAPgAAAAQAAAAFAGNbXgAAAABgSGVsbG8sIEF2YXRhciEAZgAAAABc
XgAAAAhwEQjRABeVAAAABQBjW14AAAAAYFRvdWNoZWQuAGYAAAAAXF4AAAAIcBEI0QAXAZUAAEAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA</binary64>
<undef/>
<boolean>false</boolean>
<boolean>false</boolean>
<boolean>true</boolean>
<boolean>true</boolean>
<boolean>true</boolean>
<boolean>false</boolean>
<boolean>false</boolean>
</array>
preferences:
137.53 ms | 1401 KiB | 8 Q