3v4l.org

run code in 300+ PHP versions simultaneously
<?php const TK_WHITESPACES = 0, TK_OPENING_PARENTHESIS = 2, // 0b00000010 TK_CLOSING_PARENTHESIS = 4, // 0b00000100 TK_LOGICAL_CONJUNCTION = 8, // 0b00001000 TK_LOGICAL_DISJUNCTION = 16, // 0b00010000 TK_LOGICAL_NEGATION = 32, // etc. TK_STRING = 64, TK_EQUAL = 128, TK_NOT_EQUAL = 256, TK_COLUMN_NAME = 512, TK_END = 1024; const ERR_NONE = 0, ERROR = 1; // 0b00000001 const TOKENIZATION_ERROR = 3, // 0b00000011 ERR_UNEXPECTED_CHAR = 7, // 0b00000111 ERR_UNCLOSED_STRING = 11, // 0b00001011 ERR_BAD_UTF8 = 19, // 0b00010011 ERR_EMPTY_STRING = 35; // TE const LOGIC_ERROR = 129, // 0b10000001 ERR_UNEXPECTED_TOKEN = 133, // 0b10000101 ERR_UNEXPECTED_CLOSING_PARENTHESIS = 137, // 0b10001001 ERR_MISSING_CLOSING_PARENTHESIS = 145, // 0b10010001 ERR_UNKNOW_COLUMN_NAME = 161, // 0b10100001 ERR_EMPTY_QUERY = 193; // L E const RULES = [ TK_COLUMN_NAME => TK_EQUAL | TK_NOT_EQUAL, TK_EQUAL => TK_STRING, TK_NOT_EQUAL => TK_STRING, TK_STRING => TK_END | TK_CLOSING_PARENTHESIS | TK_LOGICAL_DISJUNCTION | TK_LOGICAL_CONJUNCTION, TK_CLOSING_PARENTHESIS => TK_END | TK_CLOSING_PARENTHESIS | TK_LOGICAL_DISJUNCTION | TK_LOGICAL_CONJUNCTION, TK_LOGICAL_DISJUNCTION => TK_LOGICAL_NEGATION | TK_OPENING_PARENTHESIS | TK_COLUMN_NAME, TK_LOGICAL_CONJUNCTION => TK_LOGICAL_NEGATION | TK_OPENING_PARENTHESIS | TK_COLUMN_NAME, TK_LOGICAL_NEGATION => TK_OPENING_PARENTHESIS | TK_COLUMN_NAME, TK_OPENING_PARENTHESIS => TK_OPENING_PARENTHESIS | TK_COLUMN_NAME ]; const COLUMN_NAMES = ['family', 'conditions_of_use', 'taxonomical_group']; const ERROR_MESSAGES = [ ERR_UNEXPECTED_CHAR => 'caractère inattendu à la position %d.', ERR_UNCLOSED_STRING => 'chaîne de caractères non fermée à la position %d.', ERR_BAD_UTF8 => 'erreur d\'encodage UTF-8 à la position %d.', ERR_EMPTY_STRING => 'chaîne vide à la position %d.', // ***** ERR_UNEXPECTED_TOKEN => 'élément inattendu à la position %d.', ERR_UNEXPECTED_CLOSING_PARENTHESIS => 'parenthèse fermante inattendue à la position %d.', ERR_MISSING_CLOSING_PARENTHESIS => 'parenthèse fermante manquante à la position %d.', ERR_UNKNOW_COLUMN_NAME => 'nom de colonne inconnu à la position %d.', ERR_EMPTY_QUERY => 'requête vide à la position %d.' ]; const TOKEN2SQL = [ TK_EQUAL => ' = ', TK_NOT_EQUAL => ' <> ', TK_CLOSING_PARENTHESIS => ')', TK_OPENING_PARENTHESIS => '(', TK_LOGICAL_DISJUNCTION => ' OR ', TK_LOGICAL_CONJUNCTION => ' AND ', TK_LOGICAL_NEGATION => 'NOT ', TK_STRING => '?' // placeholder ]; function tokenize(string $str) { $pattern = <<<'REGEX' ~ \s+ (*:TK_WHITESPACES) | \( (*:TK_OPENING_PARENTHESIS) | \) (*:TK_CLOSING_PARENTHESIS) | \b ET \b (*:TK_LOGICAL_CONJUNCTION) | \b OU \b (*:TK_LOGICAL_DISJUNCTION) | \b NON \b (*:TK_LOGICAL_NEGATION) | \B " [^"\\]* (?s: \\ . [^"\\]* )* (?: " \B (*:TK_STRING) | " (*:ERR_UNEXPECTED_CHAR) | \z (*:ERR_UNCLOSED_STRING) ) | \b \w+ (*:TK_COLUMN_NAME) | = (*:TK_EQUAL) | (?: != | <> ) (*:TK_NOT_EQUAL) | \z (*:TK_END) | (*COMMIT) (*FAIL) # UNEXPECTED CHARACTER ⮕ tokenization aborted ~xu REGEX; $count = preg_match_all($pattern, $str, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); if ( preg_last_error() === PREG_BAD_UTF8_ERROR ) { preg_match('~(?: # séquences UTF-8 valides [\x00-\x7F] | [\xC2-\xDF][\x80-\xBF] | \xE0[\xA0-\xBF][\x80-\xBF] | \xED[\x80-\x9F][\x80-\xBF] | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} | \xF0[\x90-\xBF][\x80-\xBF]{2} | \xF4[\x80-\x8F][\x80-\xBF]{2} | [\xF1-\xF3][\x80-\xBF]{3} )*~x', $str, $m); return [ 'error' => ['type' => ERR_BAD_UTF8, 'offset' => strlen($m[0])] ]; } if ( $count === 0 ) // (*COMMIT) (*FAIL) a été atteint dés la première position return [ 'error' => ['type' => ERR_UNEXPECTED_CHAR, 'offset' => 0] ]; $errors = array_filter($matches, fn($m) => constant($m['MARK']) & ERROR); if ( !empty($errors) ) { // la fin a été atteinte mais il y a des marqueurs d'erreurs parmi les tokens $error = array_shift($errors); list($match, $offset) = $error[0]; return [ 'error' => ['type' => constant($error['MARK']), 'offset' => $offset + strlen($match)] ]; } $lastMatch = end($matches); if ( constant($lastMatch['MARK']) !== TK_END ) { // (*COMMIT) (*FAIL) a été atteint avant la fin list($match, $offset) = $lastMatch[0]; return [ 'error' => ['type' => ERR_UNEXPECTED_CHAR, 'offset' => $offset + strlen($match)] ]; } if ( $count === 1 ) // l'unique token est TK_END, la chaîne est vide return [ 'error' => ['type' => ERR_EMPTY_STRING, 'offset' => 0] ]; return [ 'error' => ERR_NONE, 'tokens' => array_map( fn($m) => [ 'value' => $m[0][0], 'offset' => $m[0][1], 'type' => constant($m['MARK']) ], $matches ) ]; } function checkLogic($tokens) { $parenthesis = 0; $current = current($tokens); if ( $current['type'] === TK_END ) return [ 'type' => ERR_EMPTY_QUERY, 'offset' => $current['offset'] ]; do { if ( $current['type'] === TK_COLUMN_NAME && !in_array($current['value'], COLUMN_NAMES) ) return [ 'type' => ERR_UNKNOW_COLUMN_NAME, 'offset' => $current['offset'] ]; if ( $current['type'] === TK_OPENING_PARENTHESIS ) $parenthesis++; elseif ( $current['type'] === TK_CLOSING_PARENTHESIS && --$parenthesis < 0 ) return [ 'type' => ERR_UNEXPECTED_CLOSING_PARENTHESIS, 'offset' => $current['offset'] ]; $next = next($tokens); if ( RULES[$current['type']] & $next['type'] ) // le token courant est suivi par un token autorisé $current = $next; else return [ 'type' => ERR_UNEXPECTED_TOKEN, 'offset' => $next['offset'] ]; } while ( $next['type'] !== TK_END ); return $parenthesis ? [ 'type' => ERR_MISSING_CLOSING_PARENTHESIS, 'offset' => $current['offset'] ] : true ; } function displayError($error, $query) { $position = grapheme_strlen(substr($query, 0, $error['offset'])); $format = ERROR_MESSAGES[$error['type']]."\n%s\n" . str_repeat(' ', $position) . '^'; printf($format, $position, $query); } function buildQuery($tokens, $prefix = '') { $SQLQuery = $prefix; $params = []; foreach ($tokens as $token) { switch($token['type']) { case TK_COLUMN_NAME: $SQLQuery .= $token['value']; break; case TK_END: break; case TK_STRING: $param = substr($token['value'], 1, -1); $params[] = strtr($param, ['\\\\' => '\\\\', '\\"' => '"']); default: $SQLQuery .= TOKEN2SQL[$token['type']]; } } return [$SQLQuery, $params]; } $query = "".'family="_\\"bîd'."u\xCC\x8A".'le\\"_" OU (((taxonomical_group="machin")))'; $tokenization = tokenize($query); $error = $tokenization['error']; if ( $error !== ERR_NONE ) { displayError($error, $query); } else { $tokens = array_filter($tokenization['tokens'], fn($token) => $token['type'] !== TK_WHITESPACES); if ( true !== $error = checkLogic($tokens) ) { displayError($error, $query); } else { $prefix = 'SELECT * FROM matable WHERE '; list($SQLQuery, $params) = buildQuery($tokens, $prefix); echo "requête: $query", PHP_EOL, "requête préparée: $SQLQuery", PHP_EOL, 'paramètres: ', print_r($params, true); // $sth = $dbh->prepare($SQLQuery); // $sth->execute($params); } }
Output for 7.4.0 - 7.4.33, 8.0.1 - 8.0.30, 8.1.0 - 8.1.33, 8.2.0 - 8.2.29, 8.3.0 - 8.3.25, 8.4.1 - 8.4.12
requête: family="_\"bîdůle\"_" OU (((taxonomical_group="machin"))) requête préparée: SELECT * FROM matable WHERE family = ? OR (((taxonomical_group = ?))) paramètres: Array ( [0] => _"bîdůle"_ [1] => machin )

preferences:
320.4 ms | 407 KiB | 5 Q