3v4l.org

run code in 300+ PHP versions simultaneously
<?php /** * Strip the HTML of Word documents * * @author rene.veldink * @author christoph.roensch */ class Hgs_Filter_StripWordHtml #implements Zend_Filter_Interface { /** * @var string */ const ALLOW_DEFAULT = '<b><i><sup><sub><em><strong><u><br><ol><li><ul><span><div><h1><h2><h3><p>'; /** * @var string */ protected $_allow = self::ALLOW_DEFAULT; /** * Constructor * * @param array|string $options * @return void */ public function __construct($options = array()) { if (is_string($options)) { $this->_allow = $options; } if (isset($options['allow'])) { $this->_allow = $options['allow']; } } /** * @param string $value * @return string */ public function filter($value) { return $this->strip_word_html($value, $this->_allow); } /** * - moved $allow default value to class property * - fixed regex for 'simplify style tags', <br> were replaced with <b> * - fixed bad escape sequence near $num_matches * @link http://php.net/manual/de/function.strip-tags.php#99643 */ private function strip_word_html($text, $allowed_tags = self::ALLOW_DEFAULT) { echo "Debug: $allowed_tags\n"; mb_regex_encoding('UTF-8'); //replace MS special characters first $search = array('/&lsquo;/u', '/&rsquo;/u', '/&ldquo;/u', '/&rdquo;/u', '/&mdash;/u'); $replace = array('\'', '\'', '"', '"', '-'); $text = preg_replace($search, $replace, $text); //make sure _all_ html entities are converted to the plain ascii equivalents - it appears //in some MS headers, some html entities are encoded and some aren't $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); //try to strip out any C style comments first, since these, embedded in html comments, seem to //prevent strip_tags from removing html comments (MS Word introduced combination) if(mb_stripos($text, '/*') !== FALSE){ $text = mb_eregi_replace('#/\*.*?\*/#s', '', $text, 'm'); } //introduce a space into any arithmetic expressions that could be caught by strip_tags so that they won't be //'<1' becomes '< 1'(note: somewhat application specific) $text = preg_replace(array('/<([0-9]+)/'), array('< $1'), $text); $text = strip_tags($text, $allowed_tags); //eliminate extraneous whitespace from start and end of line, or anywhere there are two or more spaces, convert it to one $text = preg_replace(array('/^\s\s+/', '/\s\s+$/', '/\s\s+/u'), array('', '', ' '), $text); //strip out inline css and simplify style tags $search = array('#<(strong|b)\s+[^>]*>(.*?)</(strong|b)>#isu', '#<(em|i)[^>]*>(.*?)</(em|i)>#isu', '#<u[^>]*>(.*?)</u>#isu'); $replace = array('<b>$2</b>', '<i>$2</i>', '<u>$1</u>'); $text = preg_replace($search, $replace, $text); //on some of the ?newer MS Word exports, where you get conditionals of the form 'if gte mso 9', etc., it appears //that whatever is in one of the html comments prevents strip_tags from eradicating the html comment that contains //some MS Style Definitions - this last bit gets rid of any leftover comments $num_matches = preg_match_all('/\<!--/u', $text, $matches); if ($num_matches) { $text = preg_replace('/\<!--(.)*--\>/isu', '', $text); } return $text; } } $a = "<p>Hey<br /><b>Ho</b>,<i>Lets Go!</i></p><ol><li>1</li></ol><ul><li>2</li></ul>"; $filter = new Hgs_Filter_StripWordHtml(Hgs_Filter_StripWordHtml::ALLOW_DEFAULT); $b = $filter->filter($a); var_dump($a == $b); echo "\n$b";
Finding entry points
Branch analysis from position: 0
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/IYFlu
function name:  (null)
number of ops:  17
compiled vars:  !0 = $a, !1 = $filter, !2 = $b
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
   88     0  E >   ASSIGN                                                   !0, '%3Cp%3EHey%3Cbr+%2F%3E%3Cb%3EHo%3C%2Fb%3E%2C%3Ci%3ELets+Go%21%3C%2Fi%3E%3C%2Fp%3E%3Col%3E%3Cli%3E1%3C%2Fli%3E%3C%2Fol%3E%3Cul%3E%3Cli%3E2%3C%2Fli%3E%3C%2Ful%3E'
   90     1        NEW                                              $4      'Hgs_Filter_StripWordHtml'
          2        SEND_VAL_EX                                              '%3Cb%3E%3Ci%3E%3Csup%3E%3Csub%3E%3Cem%3E%3Cstrong%3E%3Cu%3E%3Cbr%3E%3Col%3E%3Cli%3E%3Cul%3E%3Cspan%3E%3Cdiv%3E%3Ch1%3E%3Ch2%3E%3Ch3%3E%3Cp%3E'
          3        DO_FCALL                                      0          
          4        ASSIGN                                                   !1, $4
   91     5        INIT_METHOD_CALL                                         !1, 'filter'
          6        SEND_VAR_EX                                              !0
          7        DO_FCALL                                      0  $7      
          8        ASSIGN                                                   !2, $7
   93     9        INIT_FCALL                                               'var_dump'
         10        IS_EQUAL                                         ~9      !0, !2
         11        SEND_VAL                                                 ~9
         12        DO_ICALL                                                 
   95    13        NOP                                                      
         14        FAST_CONCAT                                      ~11     '%0A', !2
         15        ECHO                                                     ~11
         16      > RETURN                                                   1

Class Hgs_Filter_StripWordHtml:
Function __construct:
Finding entry points
Branch analysis from position: 0
2 jumps found. (Code = 43) Position 1 = 3, Position 2 = 5
Branch analysis from position: 3
2 jumps found. (Code = 43) Position 1 = 7, Position 2 = 10
Branch analysis from position: 7
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 10
Branch analysis from position: 5
filename:       /in/IYFlu
function name:  __construct
number of ops:  11
compiled vars:  !0 = $options
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
   26     0  E >   RECV_INIT                                        !0      <array>
   28     1        TYPE_CHECK                                   64          !0
          2      > JMPZ                                                     ~1, ->5
   29     3    >   ASSIGN_OBJ                                               '_allow'
          4        OP_DATA                                                  !0
   31     5    >   ISSET_ISEMPTY_DIM_OBJ                         0          !0, 'allow'
          6      > JMPZ                                                     ~3, ->10
   32     7    >   FETCH_DIM_R                                      ~5      !0, 'allow'
          8        ASSIGN_OBJ                                               '_allow'
          9        OP_DATA                                                  ~5
   34    10    > > RETURN                                                   null

End of function __construct

Function filter:
Finding entry points
Branch analysis from position: 0
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/IYFlu
function name:  filter
number of ops:  9
compiled vars:  !0 = $value
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
   40     0  E >   RECV                                             !0      
   42     1        INIT_METHOD_CALL                                         'strip_word_html'
          2        SEND_VAR_EX                                              !0
          3        CHECK_FUNC_ARG                                           
          4        FETCH_OBJ_FUNC_ARG                               $1      '_allow'
          5        SEND_FUNC_ARG                                            $1
          6        DO_FCALL                                      0  $2      
          7      > RETURN                                                   $2
   43     8*     > RETURN                                                   null

End of function filter

Function strip_word_html:
Finding entry points
Branch analysis from position: 0
2 jumps found. (Code = 43) Position 1 = 29, Position 2 = 36
Branch analysis from position: 29
2 jumps found. (Code = 43) Position 1 = 68, Position 2 = 74
Branch analysis from position: 68
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 74
Branch analysis from position: 36
filename:       /in/IYFlu
function name:  strip_word_html
number of ops:  76
compiled vars:  !0 = $text, !1 = $allowed_tags, !2 = $search, !3 = $replace, !4 = $num_matches, !5 = $matches
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
   51     0  E >   RECV                                             !0      
          1        RECV_INIT                                        !1      <const ast>
   53     2        ROPE_INIT                                     3  ~7      'Debug%3A+'
          3        ROPE_ADD                                      1  ~7      ~7, !1
          4        ROPE_END                                      2  ~6      ~7, '%0A'
          5        ECHO                                                     ~6
   54     6        INIT_FCALL                                               'mb_regex_encoding'
          7        SEND_VAL                                                 'UTF-8'
          8        DO_ICALL                                                 
   56     9        ASSIGN                                                   !2, <array>
   57    10        ASSIGN                                                   !3, <array>
   58    11        INIT_FCALL                                               'preg_replace'
         12        SEND_VAR                                                 !2
         13        SEND_VAR                                                 !3
         14        SEND_VAR                                                 !0
         15        DO_ICALL                                         $12     
         16        ASSIGN                                                   !0, $12
   61    17        INIT_FCALL                                               'html_entity_decode'
         18        SEND_VAR                                                 !0
         19        SEND_VAL                                                 3
         20        SEND_VAL                                                 'UTF-8'
         21        DO_ICALL                                         $14     
         22        ASSIGN                                                   !0, $14
   64    23        INIT_FCALL                                               'mb_stripos'
         24        SEND_VAR                                                 !0
         25        SEND_VAL                                                 '%2F%2A'
         26        DO_ICALL                                         $16     
         27        TYPE_CHECK                                  1018          $16
         28      > JMPZ                                                     ~17, ->36
   65    29    >   INIT_FCALL                                               'mb_eregi_replace'
         30        SEND_VAL                                                 '%23%2F%5C%2A.%2A%3F%5C%2A%2F%23s'
         31        SEND_VAL                                                 ''
         32        SEND_VAR                                                 !0
         33        SEND_VAL                                                 'm'
         34        DO_ICALL                                         $18     
         35        ASSIGN                                                   !0, $18
   69    36    >   INIT_FCALL                                               'preg_replace'
         37        SEND_VAL                                                 <array>
         38        SEND_VAL                                                 <array>
         39        SEND_VAR                                                 !0
         40        DO_ICALL                                         $20     
         41        ASSIGN                                                   !0, $20
   70    42        INIT_FCALL                                               'strip_tags'
         43        SEND_VAR                                                 !0
         44        SEND_VAR                                                 !1
         45        DO_ICALL                                         $22     
         46        ASSIGN                                                   !0, $22
   72    47        INIT_FCALL                                               'preg_replace'
         48        SEND_VAL                                                 <array>
         49        SEND_VAL                                                 <array>
         50        SEND_VAR                                                 !0
         51        DO_ICALL                                         $24     
         52        ASSIGN                                                   !0, $24
   74    53        ASSIGN                                                   !2, <array>
   75    54        ASSIGN                                                   !3, <array>
   76    55        INIT_FCALL                                               'preg_replace'
         56        SEND_VAR                                                 !2
         57        SEND_VAR                                                 !3
         58        SEND_VAR                                                 !0
         59        DO_ICALL                                         $28     
         60        ASSIGN                                                   !0, $28
   80    61        INIT_FCALL                                               'preg_match_all'
         62        SEND_VAL                                                 '%2F%5C%3C%21--%2Fu'
         63        SEND_VAR                                                 !0
         64        SEND_REF                                                 !5
         65        DO_ICALL                                         $30     
         66        ASSIGN                                                   !4, $30
   81    67      > JMPZ                                                     !4, ->74
   82    68    >   INIT_FCALL                                               'preg_replace'
         69        SEND_VAL                                                 '%2F%5C%3C%21--%28.%29%2A--%5C%3E%2Fisu'
         70        SEND_VAL                                                 ''
         71        SEND_VAR                                                 !0
         72        DO_ICALL                                         $32     
         73        ASSIGN                                                   !0, $32
   84    74    > > RETURN                                                   !0
   85    75*     > RETURN                                                   null

End of function strip_word_html

End of class Hgs_Filter_StripWordHtml.

Generated using Vulcan Logic Dumper, using php 8.0.0


preferences:
168.18 ms | 1404 KiB | 29 Q