3v4l.org

run code in 300+ PHP versions simultaneously
<?php class Trie { public $data = []; public $value = false; } $tokens = [ "apple" => "T_APPLE", "ape" => "T_APE", "cape" => "T_CAPE", "(" => "T_L_PARENTH", ")" => "T_R_PARENTH", "is_string" => "T_IS_STRING", "is_bool" => "T_IS_BOOL", "!is_bool" => "T_NOT_IS_BOOL", "&" => "T_AND", "|" => "T_OR", ]; $root = new Trie; foreach ($tokens as $name => $value) { $node = $root; // For each character in the string for ($i = 0; $i < strlen($name); $i++) { $char = $name[$i]; if (!isset($node->data[$char])) { // If we don't have a child with that char // Create it $node->data[$char] = new Trie; } // Reset the node for the next character $node = $node->data[$char]; } // Finally, set the value on the final node $node->value = $value; } // "ape" is a valid token, so we expect T_APE // var_dump($root->data['a']->data['p']->data['n']->value); // T_APE var_dump( $root ->data['a'] ->data['p'] ->data['e'] ->value ); // T_APE var_dump( $root ->data['('] ->value ); // T_L_PARENTH var_dump( $root ->data['i'] ->data['s'] ->data['_'] ->data['s'] ->data['t'] ->data['r'] ->data['i'] ->data['n'] ->data['g'] ->value ); // T_APE function lex($string, Trie $root) { $length = strlen($string); $i = 0; $tokens = []; $node = $root; $buffer = ''; // We want to iterate over the entire string. while ($i < $length) { // Get the current character $char = $string[$i]; if (isset($node->data[$char])) { // We have a valid next character $i++; // Save the character in the buffer $buffer .= $char; // Move to the next state $node = $node->data[$char]; } elseif ($node->value) { // We have a value and no valid next character // Emit the token $tokens[] = [$node->value, $buffer]; // Clear the buffer $buffer = ''; // Reset back to the root for the next token $node = $root; } else { // We can't continue parsing this node throw new Exception("Syntax error at offset $i"); } } if ($buffer !== '') { // We finished without flushing one token if ($node->value) { $tokens[] = [$node->value, $buffer]; } else { // Not a valid complete token throw new Exception("Syntax error at offset $i"); } } return $tokens; } var_dump(lex('(is_string)', $root)); var_dump(lex('(is_string)&(is_string|!is_bool)', $root));
Output for 5.4.2 - 5.4.45, 5.5.24 - 5.5.35, 5.6.8 - 5.6.28, 7.0.0 - 7.0.20, 7.1.0 - 7.1.20, 7.2.0 - 7.2.33, 7.3.16 - 7.3.33, 7.4.0 - 7.4.33, 8.0.0 - 8.0.30, 8.1.0 - 8.1.28, 8.2.0 - 8.2.18, 8.3.0 - 8.3.6
string(5) "T_APE" string(11) "T_L_PARENTH" string(11) "T_IS_STRING" array(3) { [0]=> array(2) { [0]=> string(11) "T_L_PARENTH" [1]=> string(1) "(" } [1]=> array(2) { [0]=> string(11) "T_IS_STRING" [1]=> string(9) "is_string" } [2]=> array(2) { [0]=> string(11) "T_R_PARENTH" [1]=> string(1) ")" } } array(9) { [0]=> array(2) { [0]=> string(11) "T_L_PARENTH" [1]=> string(1) "(" } [1]=> array(2) { [0]=> string(11) "T_IS_STRING" [1]=> string(9) "is_string" } [2]=> array(2) { [0]=> string(11) "T_R_PARENTH" [1]=> string(1) ")" } [3]=> array(2) { [0]=> string(5) "T_AND" [1]=> string(1) "&" } [4]=> array(2) { [0]=> string(11) "T_L_PARENTH" [1]=> string(1) "(" } [5]=> array(2) { [0]=> string(11) "T_IS_STRING" [1]=> string(9) "is_string" } [6]=> array(2) { [0]=> string(4) "T_OR" [1]=> string(1) "|" } [7]=> array(2) { [0]=> string(13) "T_NOT_IS_BOOL" [1]=> string(8) "!is_bool" } [8]=> array(2) { [0]=> string(11) "T_R_PARENTH" [1]=> string(1) ")" } }

preferences:
224.87 ms | 406 KiB | 227 Q