3v4l.org

run code in 300+ PHP versions simultaneously
<?php /** * Strip the HTML of Word documents * * @author rene.veldink * @author christoph.roensch */ class Hgs_Filter_StripWordHtml #implements Zend_Filter_Interface { /** * @var string */ const ALLOW_DEFAULT = '<b><i><sup><sub><em><strong><u><br><ol><li><ul><span><div><h1><h2><h3><p>'; /** * @var string */ protected $_allow = self::ALLOW_DEFAULT; /** * Constructor * * @param array|string $options * @return void */ public function __construct($options = array()) { if (is_string($options)) { $this->_allow = $options; } if (isset($options['allow'])) { $this->_allow = $options['allow']; } } /** * @param string $value * @return string */ public function filter($value) { return $this->strip_word_html($value, $this->_allow); } /** * - moved $allow default value to class property * - fixed regex for 'simplify style tags', <br> were replaced with <b> * - fixed bad escape sequence near $num_matches * @link http://php.net/manual/de/function.strip-tags.php#99643 */ private function strip_word_html($text, $allowed_tags = self::ALLOW_DEFAULT) { mb_regex_encoding('UTF-8'); //replace MS special characters first $search = array('/&lsquo;/u', '/&rsquo;/u', '/&ldquo;/u', '/&rdquo;/u', '/&mdash;/u'); $replace = array('\'', '\'', '"', '"', '-'); $text = preg_replace($search, $replace, $text); //make sure _all_ html entities are converted to the plain ascii equivalents - it appears //in some MS headers, some html entities are encoded and some aren't $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); //try to strip out any C style comments first, since these, embedded in html comments, seem to //prevent strip_tags from removing html comments (MS Word introduced combination) if(mb_stripos($text, '/*') !== FALSE){ $text = mb_eregi_replace('#/\*.*?\*/#s', '', $text, 'm'); } //introduce a space into any arithmetic expressions that could be caught by strip_tags so that they won't be //'<1' becomes '< 1'(note: somewhat application specific) $text = preg_replace(array('/<([0-9]+)/'), array('< $1'), $text); $text = strip_tags($text, $allowed_tags); //eliminate extraneous whitespace from start and end of line, or anywhere there are two or more spaces, convert it to one $text = preg_replace(array('/^\s\s+/', '/\s\s+$/', '/\s\s+/u'), array('', '', ' '), $text); //strip out inline css and simplify style tags $search = array('#<(strong|b)\s+[^>]*>(.*?)</(strong|b)>#isu', '#<(em|i)[^>]*>(.*?)</(em|i)>#isu', '#<u[^>]*>(.*?)</u>#isu'); $replace = array('<b>$2</b>', '<i>$2</i>', '<u>$1</u>'); $text = preg_replace($search, $replace, $text); //on some of the ?newer MS Word exports, where you get conditionals of the form 'if gte mso 9', etc., it appears //that whatever is in one of the html comments prevents strip_tags from eradicating the html comment that contains //some MS Style Definitions - this last bit gets rid of any leftover comments $num_matches = preg_match_all('/\<!--/u', $text, $matches); if ($num_matches) { $text = preg_replace('/\<!--(.)*--\>/isu', '', $text); } return $text; } } $a = "<p>Hey<br /><b>Ho</b>,<i>Lets Go!</i></p><ol><li>1</li></ol><ul><li>2</li></ul>"; $filter = new Hgs_Filter_StripWordHtml(); $b = $filter->filter($b); var_dump($a == $b); echo "\n$b";
Finding entry points
Branch analysis from position: 0
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/eBUmA
function name:  (null)
number of ops:  16
compiled vars:  !0 = $a, !1 = $filter, !2 = $b
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
   87     0  E >   ASSIGN                                                   !0, '%3Cp%3EHey%3Cbr+%2F%3E%3Cb%3EHo%3C%2Fb%3E%2C%3Ci%3ELets+Go%21%3C%2Fi%3E%3C%2Fp%3E%3Col%3E%3Cli%3E1%3C%2Fli%3E%3C%2Fol%3E%3Cul%3E%3Cli%3E2%3C%2Fli%3E%3C%2Ful%3E'
   89     1        NEW                                              $4      'Hgs_Filter_StripWordHtml'
          2        DO_FCALL                                      0          
          3        ASSIGN                                                   !1, $4
   90     4        INIT_METHOD_CALL                                         !1, 'filter'
          5        SEND_VAR_EX                                              !2
          6        DO_FCALL                                      0  $7      
          7        ASSIGN                                                   !2, $7
   92     8        INIT_FCALL                                               'var_dump'
          9        IS_EQUAL                                         ~9      !0, !2
         10        SEND_VAL                                                 ~9
         11        DO_ICALL                                                 
   94    12        NOP                                                      
         13        FAST_CONCAT                                      ~11     '%0A', !2
         14        ECHO                                                     ~11
         15      > RETURN                                                   1

Class Hgs_Filter_StripWordHtml:
Function __construct:
Finding entry points
Branch analysis from position: 0
2 jumps found. (Code = 43) Position 1 = 3, Position 2 = 5
Branch analysis from position: 3
2 jumps found. (Code = 43) Position 1 = 7, Position 2 = 10
Branch analysis from position: 7
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 10
Branch analysis from position: 5
filename:       /in/eBUmA
function name:  __construct
number of ops:  11
compiled vars:  !0 = $options
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
   26     0  E >   RECV_INIT                                        !0      <array>
   28     1        TYPE_CHECK                                   64          !0
          2      > JMPZ                                                     ~1, ->5
   29     3    >   ASSIGN_OBJ                                               '_allow'
          4        OP_DATA                                                  !0
   31     5    >   ISSET_ISEMPTY_DIM_OBJ                         0          !0, 'allow'
          6      > JMPZ                                                     ~3, ->10
   32     7    >   FETCH_DIM_R                                      ~5      !0, 'allow'
          8        ASSIGN_OBJ                                               '_allow'
          9        OP_DATA                                                  ~5
   34    10    > > RETURN                                                   null

End of function __construct

Function filter:
Finding entry points
Branch analysis from position: 0
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/eBUmA
function name:  filter
number of ops:  9
compiled vars:  !0 = $value
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
   40     0  E >   RECV                                             !0      
   42     1        INIT_METHOD_CALL                                         'strip_word_html'
          2        SEND_VAR_EX                                              !0
          3        CHECK_FUNC_ARG                                           
          4        FETCH_OBJ_FUNC_ARG                               $1      '_allow'
          5        SEND_FUNC_ARG                                            $1
          6        DO_FCALL                                      0  $2      
          7      > RETURN                                                   $2
   43     8*     > RETURN                                                   null

End of function filter

Function strip_word_html:
Finding entry points
Branch analysis from position: 0
2 jumps found. (Code = 43) Position 1 = 25, Position 2 = 32
Branch analysis from position: 25
2 jumps found. (Code = 43) Position 1 = 64, Position 2 = 70
Branch analysis from position: 64
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 70
Branch analysis from position: 32
filename:       /in/eBUmA
function name:  strip_word_html
number of ops:  72
compiled vars:  !0 = $text, !1 = $allowed_tags, !2 = $search, !3 = $replace, !4 = $num_matches, !5 = $matches
line      #* E I O op                           fetch          ext  return  operands
-------------------------------------------------------------------------------------
   51     0  E >   RECV                                             !0      
          1        RECV_INIT                                        !1      <const ast>
   53     2        INIT_FCALL                                               'mb_regex_encoding'
          3        SEND_VAL                                                 'UTF-8'
          4        DO_ICALL                                                 
   55     5        ASSIGN                                                   !2, <array>
   56     6        ASSIGN                                                   !3, <array>
   57     7        INIT_FCALL                                               'preg_replace'
          8        SEND_VAR                                                 !2
          9        SEND_VAR                                                 !3
         10        SEND_VAR                                                 !0
         11        DO_ICALL                                         $9      
         12        ASSIGN                                                   !0, $9
   60    13        INIT_FCALL                                               'html_entity_decode'
         14        SEND_VAR                                                 !0
         15        SEND_VAL                                                 3
         16        SEND_VAL                                                 'UTF-8'
         17        DO_ICALL                                         $11     
         18        ASSIGN                                                   !0, $11
   63    19        INIT_FCALL                                               'mb_stripos'
         20        SEND_VAR                                                 !0
         21        SEND_VAL                                                 '%2F%2A'
         22        DO_ICALL                                         $13     
         23        TYPE_CHECK                                  1018          $13
         24      > JMPZ                                                     ~14, ->32
   64    25    >   INIT_FCALL                                               'mb_eregi_replace'
         26        SEND_VAL                                                 '%23%2F%5C%2A.%2A%3F%5C%2A%2F%23s'
         27        SEND_VAL                                                 ''
         28        SEND_VAR                                                 !0
         29        SEND_VAL                                                 'm'
         30        DO_ICALL                                         $15     
         31        ASSIGN                                                   !0, $15
   68    32    >   INIT_FCALL                                               'preg_replace'
         33        SEND_VAL                                                 <array>
         34        SEND_VAL                                                 <array>
         35        SEND_VAR                                                 !0
         36        DO_ICALL                                         $17     
         37        ASSIGN                                                   !0, $17
   69    38        INIT_FCALL                                               'strip_tags'
         39        SEND_VAR                                                 !0
         40        SEND_VAR                                                 !1
         41        DO_ICALL                                         $19     
         42        ASSIGN                                                   !0, $19
   71    43        INIT_FCALL                                               'preg_replace'
         44        SEND_VAL                                                 <array>
         45        SEND_VAL                                                 <array>
         46        SEND_VAR                                                 !0
         47        DO_ICALL                                         $21     
         48        ASSIGN                                                   !0, $21
   73    49        ASSIGN                                                   !2, <array>
   74    50        ASSIGN                                                   !3, <array>
   75    51        INIT_FCALL                                               'preg_replace'
         52        SEND_VAR                                                 !2
         53        SEND_VAR                                                 !3
         54        SEND_VAR                                                 !0
         55        DO_ICALL                                         $25     
         56        ASSIGN                                                   !0, $25
   79    57        INIT_FCALL                                               'preg_match_all'
         58        SEND_VAL                                                 '%2F%5C%3C%21--%2Fu'
         59        SEND_VAR                                                 !0
         60        SEND_REF                                                 !5
         61        DO_ICALL                                         $27     
         62        ASSIGN                                                   !4, $27
   80    63      > JMPZ                                                     !4, ->70
   81    64    >   INIT_FCALL                                               'preg_replace'
         65        SEND_VAL                                                 '%2F%5C%3C%21--%28.%29%2A--%5C%3E%2Fisu'
         66        SEND_VAL                                                 ''
         67        SEND_VAR                                                 !0
         68        DO_ICALL                                         $29     
         69        ASSIGN                                                   !0, $29
   83    70    > > RETURN                                                   !0
   84    71*     > RETURN                                                   null

End of function strip_word_html

End of class Hgs_Filter_StripWordHtml.

Generated using Vulcan Logic Dumper, using php 8.0.0


preferences:
160.7 ms | 1404 KiB | 29 Q