3v4l.org

run code in 300+ PHP versions simultaneously
<?php function utf8_scrub($str, $substitute = 0x3013) { $regex = '/ ([\x00-\x7F] # U+0000 - U+007F |[\xC2-\xDF][\x80-\xBF] # U+0080 - U+07FF | \xE0[\xA0-\xBF][\x80-\xBF] # U+0800 - U+0FFF |[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # U+1000 - U+CFFF | \xED[\x80-\x9F][\x80-\xBF] # U+D000 - U+D7FF | \xF0[\x90-\xBF][\x80-\xBF]{2} # U+10000 - U+3FFFF |[\xF1-\xF3][\x80-\xBF]{3} # U+40000 - U+FFFFF | \xF4[\x80-\x8F][\x80-\xBF]{2}) # U+100000 - U+10FFFF |(\xE0[\xA0-\xBF] # U+0800 - U+0FFF (invalid) |[\xE1-\xEC\xEE\xEF][\x80-\xBF] # U+1000 - U+CFFF (invalid) | \xED[\x80-\x9F] # U+D000 - U+D7FF (invalid) | \xF0[\x90-\xBF][\x80-\xBF]? # U+10000 - U+3FFFF (invalid) |[\xF1-\xF3][\x80-\xBF]{1,2} # U+40000 - U+FFFFF (invalid) | \xF4[\x80-\x8F][\x80-\xBF]?) # U+100000 - U+10FFFF (invalid) |(.) # invalid 1-byte /xs'; // $matches[1]: valid character // $matches[2]: invalid 3-byte or 4-byte character // $matches[3]: invalid 1-byte $ret = preg_replace_callback( $regex, function ($matches) use ($substitute) { if (isset($matches[2]) || isset($matches[3])) { return $substitute; } return $matches[1]; }, $str ); return $ret; } $examples = array( 'Valid ASCII' => "a", 'Valid 2 Octet Sequence' => "\xc3\xb1", 'Invalid 2 Octet Sequence' => "\xc3\x28", 'Invalid Sequence Identifier' => "\xa0\xa1", 'Valid 3 Octet Sequence' => "\xe2\x82\xa1", 'Invalid 3 Octet Sequence (in 2nd Octet)' => "\xe2\x28\xa1", 'Invalid 3 Octet Sequence (in 3rd Octet)' => "\xe2\x82\x28", 'Valid 4 Octet Sequence' => "\xf0\x90\x8c\xbc", 'Invalid 4 Octet Sequence (in 2nd Octet)' => "\xf0\x28\x8c\xbc", 'Invalid 4 Octet Sequence (in 3rd Octet)' => "\xf0\x90\x28\xbc", 'Invalid 4 Octet Sequence (in 4th Octet)' => "\xf0\x28\x8c\x28", 'Valid 5 Octet Sequence (but not Unicode!)' => "\xf8\xa1\xa1\xa1\xa1", 'Valid 6 Octet Sequence (but not Unicode!)' => "\xfc\xa1\xa1\xa1\xa1\xa1", ); foreach ($examples as $k => $v) { echo "{$k}\n"; echo utf8_scrub($v); echo "\n"; }
Finding entry points
Branch analysis from position: 0
2 jumps found. (Code = 77) Position 1 = 2, Position 2 = 13
Branch analysis from position: 2
2 jumps found. (Code = 78) Position 1 = 3, Position 2 = 13
Branch analysis from position: 3
1 jumps found. (Code = 42) Position 1 = 2
Branch analysis from position: 2
Branch analysis from position: 13
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 13
filename:       /in/NJVXe
function name:  (null)
number of ops:  15
compiled vars:  !0 = $examples, !1 = $v, !2 = $k
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
   40     0  E >   ASSIGN                                                   !0, <array>
   57     1      > FE_RESET_R                                       $4      !0, ->13
          2    > > FE_FETCH_R                                       ~5      $4, !1, ->13
          3    >   ASSIGN                                                   !2, ~5
   59     4        NOP                                                      
          5        FAST_CONCAT                                      ~7      !2, '%0A'
          6        ECHO                                                     ~7
   60     7        INIT_FCALL                                               'utf8_scrub'
          8        SEND_VAR                                                 !1
          9        DO_FCALL                                      0  $8      
         10        ECHO                                                     $8
   61    11        ECHO                                                     '%0A'
   57    12      > JMP                                                      ->2
         13    >   FE_FREE                                                  $4
   63    14      > RETURN                                                   1

Function utf8_scrub:
Finding entry points
Branch analysis from position: 0
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/NJVXe
function name:  utf8_scrub
number of ops:  13
compiled vars:  !0 = $str, !1 = $substitute, !2 = $regex, !3 = $ret
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
    2     0  E >   RECV                                             !0      
          1        RECV_INIT                                        !1      12307
    4     2        ASSIGN                                                   !2, '%2F%0A++++++++%28%5B%5Cx00-%5Cx7F%5D+++++++++++++++++++++++%23+++U%2B0000+-+++U%2B007F%0A++++++++%7C%5B%5CxC2-%5CxDF%5D%5B%5Cx80-%5CxBF%5D++++++++++++%23+++U%2B0080+-+++U%2B07FF%0A++++++++%7C+%5CxE0%5B%5CxA0-%5CxBF%5D%5B%5Cx80-%5CxBF%5D+++++++%23+++U%2B0800+-+++U%2B0FFF%0A++++++++%7C%5B%5CxE1-%5CxEC%5CxEE%5CxEF%5D%5B%5Cx80-%5CxBF%5D%7B2%7D+%23+++U%2B1000+-+++U%2BCFFF%0A++++++++%7C+%5CxED%5B%5Cx80-%5Cx9F%5D%5B%5Cx80-%5CxBF%5D+++++++%23+++U%2BD000+-+++U%2BD7FF%0A++++++++%7C+%5CxF0%5B%5Cx90-%5CxBF%5D%5B%5Cx80-%5CxBF%5D%7B2%7D++++%23++U%2B10000+-++U%2B3FFFF%0A++++++++%7C%5B%5CxF1-%5CxF3%5D%5B%5Cx80-%5CxBF%5D%7B3%7D+++++++++%23++U%2B40000+-++U%2BFFFFF%0A++++++++%7C+%5CxF4%5B%5Cx80-%5Cx8F%5D%5B%5Cx80-%5CxBF%5D%7B2%7D%29+++%23+U%2B100000+-+U%2B10FFFF%0A++++++++%7C%28%5CxE0%5B%5CxA0-%5CxBF%5D++++++++++++++++++%23+++U%2B0800+-+++U%2B0FFF+%28invalid%29%0A++++++++%7C%5B%5CxE1-%5CxEC%5CxEE%5CxEF%5D%5B%5Cx80-%5CxBF%5D++++%23+++U%2B1000+-+++U%2BCFFF+%28invalid%29%0A++++++++%7C+%5CxED%5B%5Cx80-%5Cx9F%5D++++++++++++++++++%23+++U%2BD000+-+++U%2BD7FF+%28invalid%29%0A++++++++%7C+%5CxF0%5B%5Cx90-%5CxBF%5D%5B%5Cx80-%5CxBF%5D%3F++++++%23++U%2B10000+-++U%2B3FFFF+%28invalid%29%0A++++++++%7C%5B%5CxF1-%5CxF3%5D%5B%5Cx80-%5CxBF%5D%7B1%2C2%7D+++++++%23++U%2B40000+-++U%2BFFFFF+%28invalid%29%0A++++++++%7C+%5CxF4%5B%5Cx80-%5Cx8F%5D%5B%5Cx80-%5CxBF%5D%3F%29+++++%23+U%2B100000+-+U%2B10FFFF+%28invalid%29%0A++++++++%7C%28.%29+++++++++++++++++++++++++++++++%23+invalid+1-byte%0A++++%2Fxs'
   26     3        INIT_FCALL                                               'preg_replace_callback'
   27     4        SEND_VAR                                                 !2
   28     5        DECLARE_LAMBDA_FUNCTION                                  '%00%7Bclosure%7D%2Fin%2FNJVXe%3A28%240'
          6        BIND_LEXICAL                                             ~5, !1
   33     7        SEND_VAL                                                 ~5
   34     8        SEND_VAR                                                 !0
          9        DO_ICALL                                         $6      
   26    10        ASSIGN                                                   !3, $6
   37    11      > RETURN                                                   !3
   38    12*     > RETURN                                                   null

End of function utf8_scrub

Function %00%7Bclosure%7D%2Fin%2FNJVXe%3A28%240:
Finding entry points
Branch analysis from position: 0
2 jumps found. (Code = 47) Position 1 = 4, Position 2 = 6
Branch analysis from position: 4
2 jumps found. (Code = 43) Position 1 = 7, Position 2 = 8
Branch analysis from position: 7
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 8
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 6
filename:       /in/NJVXe
function name:  {closure}
number of ops:  11
compiled vars:  !0 = $matches, !1 = $substitute
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
   28     0  E >   RECV                                             !0      
          1        BIND_STATIC                                              !1
   29     2        ISSET_ISEMPTY_DIM_OBJ                         0  ~2      !0, 2
          3      > JMPNZ_EX                                         ~2      ~2, ->6
          4    >   ISSET_ISEMPTY_DIM_OBJ                         0  ~3      !0, 3
          5        BOOL                                             ~2      ~3
          6    > > JMPZ                                                     ~2, ->8
   30     7    > > RETURN                                                   !1
   32     8    >   FETCH_DIM_R                                      ~4      !0, 1
          9      > RETURN                                                   ~4
   33    10*     > RETURN                                                   null

End of function %00%7Bclosure%7D%2Fin%2FNJVXe%3A28%240

Generated using Vulcan Logic Dumper, using php 8.0.0


preferences:
178.39 ms | 1402 KiB | 16 Q