3v4l.org

run code in 500+ PHP versions simultaneously
<?php /** * Over-engineered solution to most capitalisation issues. * * Version 1.0 */ class str { /** * Words or abbreviations that should always be all uppercase */ const ALL_UPPERCASE = [ "UK", "VAT", ]; /** * Words or abbreviations that should always be all lowercase */ const ALL_LOWERCASE = [ "and", "as", "by", "in", "of", "or", "to", ]; /** * Honorifics that only contain vowels. * */ const CONSONANT_ONLY_HONORIFICS = [ # English "Mr", "Mrs", "Ms", "Dr", "Br", "Sr", "Fr", "Pr", "St", # Afrikaans "Mnr", ]; /** * Surname prefixes that should be lowercase, * unless not following another word (firstname). */ const SURNAME_PREFIXES = [ "de la", "de las", "van de", "van der", "vit de", "von", "van", "del", "der", ]; /** * Capitalises every (appropriate) word in a given string. * * @param string|null $string * * @return string|null */ public static function capitalise(?string $string): ?string { if(!$string){ return $string; } # Strip away multi-spaces $string = preg_replace("/\s{2,}/", " ", $string); # Ensure there is always a space after a comma $string = preg_replace("/,([^\s])/", ", $1", $string); # A word is anything separated by spaces or a dash $string = preg_replace_callback("/([^\s\-\.]+)/", function($matches){ # Make the word lowercase $word = mb_strtolower($matches[1]); # If the word needs to be all lowercase if(in_array($word, self::ALL_LOWERCASE)){ return strtolower($word); } # If the word needs to be all uppercase if(in_array(mb_strtoupper($word), self::ALL_UPPERCASE)){ return strtoupper($word); } # Create a version without diacritics $transliterator = \Transliterator::createFromRules(':: Any-Latin; :: Latin-ASCII; :: NFD; :: [:Nonspacing Mark:] Remove; :: Lower(); :: NFC;', \Transliterator::FORWARD); $ascii_word = $transliterator->transliterate($word); # If the word contains non-alpha characters (numbers, &, etc), with exceptions (comma, '), assume it's an abbreviation if(preg_match("/[^a-z,']/i", $ascii_word)){ return strtoupper($word); } # If the word doesn't contain any vowels, assume it's an abbreviation if(!preg_match("/[aeiouy]/i", $ascii_word)){ # Unless the word is an honorific if(!in_array(ucfirst($word), self::CONSONANT_ONLY_HONORIFICS)){ return strtoupper($word); } } # If the word contains two of the same vowel and is 3 characters or fewer, assume it's an abbreviation if(strlen($word) <= 3 && preg_match("/([aeiouy])\1/", $word)){ return strtoupper($word); } # Ensure O'Connor, L'Oreal, etc, are double capitalised, with exceptions (d') if(preg_match("/\b([a-z]')(\w+)\b/i", $word, $match)){ # Some prefixes (like d') are not capitalised if(in_array($match[1], ["d'"])){ return $match[1] . ucfirst($match[2]); } # Otherwise, everything is capitalised return strtoupper($match[1]) . ucfirst($match[2]); } # Otherwise, return the word with the first letter (only) capitalised return ucfirst($word); //The most common outcome }, $string); # Cater for the Mc prefix $pattern = "/(Mc)([b-df-hj-np-tv-z])/"; //Mc followed by a consonant $string = preg_replace_callback($pattern, function($matches){ return "Mc" . ucfirst($matches[2]); }, $string); # Cater for Roman numerals (need to be in all caps) $pattern = "/\b((?<![MDCLXVI])(?=[MDCLXVI])M{0,3}(?:C[MD]|D?C{0,3})(?:X[CL]|L?X{0,3})(?:I[XV]|V?I{0,3}))\b/i"; $string = preg_replace_callback($pattern, function($matches){ return strtoupper($matches[1]); }, $string); # Cater for surname prefixes (must be after the Roman numerals) $pattern = "/\b (".implode("|", self::SURNAME_PREFIXES).") \b/i"; //A surname prefix, bookended by words $string = preg_replace_callback($pattern, function($matches){ return strtolower(" {$matches[1]} "); }, $string); # Cater for ordinal numbers $pattern = "/\b(\d+(?:st|nd|rd|th))\b/i"; //A number suffixed with an ordinal $string = preg_replace_callback($pattern, function($matches){ return strtolower($matches[1]); }, $string); # And we're done return $string; } } $complicated_names = " DONALD MCDONALD SINEAD O'CONNOR JOHAN VAN ZYL OSCAR DE LA HOYA P.F. CHANG KFC ST. JOHN DR ZEUZ PROF. GREEN VAN DER BERG THE 3RD SÃO JOÃO DOS SANTOS KING HENRY VII KUJE'S HIGH,ROAD FLUG-HAFEN FLUGIG-O'DONNALD MARY O'CALLAHAN JOHN O'DONALD THE O'CALLAHAN-O'DONALD RESIDENCE 2ND NOVEMBER STREET The 15th king of scotland FCT MICHAEL VIVA GINA C.A. KOTOR DUTCH NAMES van der vaart van vollenhoven van 't zandt van het zand el hamdoie van der Rooi-van Velzen Zuidewijn - van rooien teggelen onder t boven guido op 't drooge friso van drooge Zuidewijn - van rooien teggelen onder t boven ZUID-HOLLAND 's hertogen-bosch De Rooi Van Zuidewijn van onder Van Der Wijk-Zeewuster de Vries-van der Leest Den Oudsten - van 't Veldt Hare Koninklijke Hoogheid Alexia Juliana Marcela Laurentien Prinses der Nederlanden, Prinses van Oranje-Nassau Hare Koninklijke Hoogheid Máxima, Prinses der Nederlanden, Prinses van Oranje-Nassau, Mevrouw van Amsberg van Lippe-Biesterfeld van Vollenhoven "; var_dump(str::capitalise($complicated_names));
Finding entry points
Branch analysis from position: 0
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/SEdf7
function name:  (null)
number of ops:  8
compiled vars:  !0 = $complicated_names
line      #* E I O op                               fetch          ext  return  operands
-----------------------------------------------------------------------------------------
  171     0  E >   ASSIGN                                                       !0, '%0ADONALD+MCDONALD%0ASINEAD+O%27CONNOR%0AJOHAN+VAN+ZYL%0AOSCAR+DE+LA+HOYA%0AP.F.+CHANG%0AKFC%0AST.+JOHN%0ADR+ZEUZ%0APROF.+GREEN%0AVAN+DER+BERG+THE+3RD%0AS%C3%83O+JO%C3%83O+DOS+SANTOS%0AKING+HENRY+VII%0AKUJE%27S+HIGH%2CROAD%0AFLUG-HAFEN%0AFLUGIG-O%27DONNALD%0AMARY+O%27CALLAHAN%0AJOHN+O%27DONALD%0ATHE+O%27CALLAHAN-O%27DONALD+RESIDENCE%0A2ND+NOVEMBER+STREET%0AThe+15th+king+of+scotland%0AFCT%0AMICHAEL+VIVA%0AGINA+C.A.+KOTOR%0ADUTCH+NAMES%0Avan+der+vaart%0Avan+vollenhoven%0Avan+%27t+zandt%0Avan+het+zand%0Ael+hamdoie%0Avan+der+Rooi-van+Velzen%0AZuidewijn+-+van+rooien%0Ateggelen+onder+t+boven%0Aguido+op+%27t+drooge%0Afriso+van+drooge%0AZuidewijn+-+van+rooien%0Ateggelen+onder+t+boven%0AZUID-HOLLAND%0A%27s+hertogen-bosch%0ADe+Rooi+Van+Zuidewijn%0Avan+onder%0AVan+Der+Wijk-Zeewuster%0Ade+Vries-van+der+Leest%0ADen+Oudsten+-+van+%27t+Veldt%0AHare+Koninklijke+Hoogheid+Alexia+Juliana+Marcela+Laurentien+Prinses+der+Nederlanden%2C+Prinses+van+Oranje-Nassau%0AHare+Koninklijke+Hoogheid+M%C3%A1xima%2C+Prinses+der+Nederlanden%2C+Prinses+van+Oranje-Nassau%2C+Mevrouw+van+Amsberg%0Avan+Lippe-Biesterfeld+van+Vollenhoven%0A'
  220     1        INIT_FCALL                                                   'var_dump'
          2        INIT_STATIC_METHOD_CALL                                      'str', 'capitalise'
          3        SEND_VAR                                                     !0
          4        DO_FCALL                                          0  $2      
          5        SEND_VAR                                                     $2
          6        DO_ICALL                                                     
          7      > RETURN                                                       1

Class str:
Function capitalise:
Finding entry points
Branch analysis from position: 0
2 jumps found. (Code = 43) Position 1 = 3, Position 2 = 5
Branch analysis from position: 3
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 5
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/SEdf7
function name:  capitalise
number of ops:  57
compiled vars:  !0 = $string, !1 = $pattern
line      #* E I O op                               fetch          ext  return  operands
-----------------------------------------------------------------------------------------
   73     0  E >   RECV                                                 !0      
   75     1        BOOL_NOT                                             ~2      !0
          2      > JMPZ                                                         ~2, ->5
   76     3    >   VERIFY_RETURN_TYPE                                           !0
          4      > RETURN                                                       !0
   80     5    >   FRAMELESS_ICALL_3                preg_replace        ~3      '%2F%5Cs%7B2%2C%7D%2F', '+'
          6        OP_DATA                                                      !0
          7        ASSIGN                                                       !0, ~3
   83     8        FRAMELESS_ICALL_3                preg_replace        ~5      '%2F%2C%28%5B%5E%5Cs%5D%29%2F', '%2C+%241'
          9        OP_DATA                                                      !0
         10        ASSIGN                                                       !0, ~5
   86    11        INIT_FCALL                                                   'preg_replace_callback'
         12        SEND_VAL                                                     '%2F%28%5B%5E%5Cs%5C-%5C.%5D%2B%29%2F'
         13        DECLARE_LAMBDA_FUNCTION                              ~7      [0]
  137    14        SEND_VAL                                                     ~7
         15        SEND_VAR                                                     !0
   86    16        DO_ICALL                                             $8      
         17        ASSIGN                                                       !0, $8
  140    18        ASSIGN                                                       !1, '%2F%28Mc%29%28%5Bb-df-hj-np-tv-z%5D%29%2F'
  142    19        INIT_FCALL                                                   'preg_replace_callback'
         20        SEND_VAR                                                     !1
         21        DECLARE_LAMBDA_FUNCTION                              ~11     [1]
  144    22        SEND_VAL                                                     ~11
         23        SEND_VAR                                                     !0
  142    24        DO_ICALL                                             $12     
         25        ASSIGN                                                       !0, $12
  147    26        ASSIGN                                                       !1, '%2F%5Cb%28%28%3F%3C%21%5BMDCLXVI%5D%29%28%3F%3D%5BMDCLXVI%5D%29M%7B0%2C3%7D%28%3F%3AC%5BMD%5D%7CD%3FC%7B0%2C3%7D%29%28%3F%3AX%5BCL%5D%7CL%3FX%7B0%2C3%7D%29%28%3F%3AI%5BXV%5D%7CV%3FI%7B0%2C3%7D%29%29%5Cb%2Fi'
  148    27        INIT_FCALL                                                   'preg_replace_callback'
         28        SEND_VAR                                                     !1
         29        DECLARE_LAMBDA_FUNCTION                              ~15     [2]
  150    30        SEND_VAL                                                     ~15
         31        SEND_VAR                                                     !0
  148    32        DO_ICALL                                             $16     
         33        ASSIGN                                                       !0, $16
  153    34        FRAMELESS_ICALL_2                implode             ~18     '%7C', <array>
         35        CONCAT                                               ~19     '%2F%5Cb+%28', ~18
         36        CONCAT                                               ~20     ~19, '%29+%5Cb%2Fi'
         37        ASSIGN                                                       !1, ~20
  155    38        INIT_FCALL                                                   'preg_replace_callback'
         39        SEND_VAR                                                     !1
         40        DECLARE_LAMBDA_FUNCTION                              ~22     [3]
  157    41        SEND_VAL                                                     ~22
         42        SEND_VAR                                                     !0
  155    43        DO_ICALL                                             $23     
         44        ASSIGN                                                       !0, $23
  160    45        ASSIGN                                                       !1, '%2F%5Cb%28%5Cd%2B%28%3F%3Ast%7Cnd%7Crd%7Cth%29%29%5Cb%2Fi'
  162    46        INIT_FCALL                                                   'preg_replace_callback'
         47        SEND_VAR                                                     !1
         48        DECLARE_LAMBDA_FUNCTION                              ~26     [4]
  164    49        SEND_VAL                                                     ~26
         50        SEND_VAR                                                     !0
  162    51        DO_ICALL                                             $27     
         52        ASSIGN                                                       !0, $27
  167    53        VERIFY_RETURN_TYPE                                           !0
         54      > RETURN                                                       !0
  168    55*       VERIFY_RETURN_TYPE                                           
         56*     > RETURN                                                       null


Dynamic Functions:
Dynamic Function 0
Finding entry points
Branch analysis from position: 0
2 jumps found. (Code = 43) Position 1 = 9, Position 2 = 13
Branch analysis from position: 9
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 13
2 jumps found. (Code = 43) Position 1 = 19, Position 2 = 23
Branch analysis from position: 19
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 23
2 jumps found. (Code = 43) Position 1 = 35, Position 2 = 39
Branch analysis from position: 35
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 39
2 jumps found. (Code = 43) Position 1 = 42, Position 2 = 53
Branch analysis from position: 42
2 jumps found. (Code = 43) Position 1 = 49, Position 2 = 53
Branch analysis from position: 49
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 53
2 jumps found. (Code = 46) Position 1 = 56, Position 2 = 58
Branch analysis from position: 56
2 jumps found. (Code = 43) Position 1 = 59, Position 2 = 63
Branch analysis from position: 59
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 63
2 jumps found. (Code = 43) Position 1 = 69, Position 2 = 89
Branch analysis from position: 69
2 jumps found. (Code = 43) Position 1 = 72, Position 2 = 79
Branch analysis from position: 72
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 79
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 89
1 jumps found. (Code = 62) Position 1 = -2
Branch analysis from position: 58
Branch analysis from position: 53
filename:       /in/SEdf7
function name:  {closure:str::capitalise():86}
number of ops:  94
compiled vars:  !0 = $matches, !1 = $word, !2 = $transliterator, !3 = $ascii_word, !4 = $match
line      #* E I O op                               fetch          ext  return  operands
-----------------------------------------------------------------------------------------
   86     0  E >   RECV                                                 !0      
   88     1        INIT_FCALL                                                   'mb_strtolower'
          2        FETCH_DIM_R                                          ~5      !0, 1
          3        SEND_VAL                                                     ~5
          4        DO_ICALL                                             $6      
          5        ASSIGN                                                       !1, $6
   91     6        FETCH_CLASS_CONSTANT                                 ~8      'ALL_LOWERCASE'
          7        FRAMELESS_ICALL_2                in_array            ~9      !1, ~8
          8      > JMPZ                                                         ~9, ->13
   92     9    >   INIT_FCALL                                                   'strtolower'
         10        SEND_VAR                                                     !1
         11        DO_ICALL                                             $10     
         12      > RETURN                                                       $10
   96    13    >   INIT_FCALL                                                   'mb_strtoupper'
         14        SEND_VAR                                                     !1
         15        DO_ICALL                                             $11     
         16        FETCH_CLASS_CONSTANT                                 ~12     'ALL_UPPERCASE'
         17        FRAMELESS_ICALL_2                in_array            ~13     $11, ~12
         18      > JMPZ                                                         ~13, ->23
   97    19    >   INIT_FCALL                                                   'strtoupper'
         20        SEND_VAR                                                     !1
         21        DO_ICALL                                             $14     
         22      > RETURN                                                       $14
  101    23    >   INIT_STATIC_METHOD_CALL                                      'Transliterator', 'createFromRules'
         24        SEND_VAL_EX                                                  '%3A%3A+Any-Latin%3B+%3A%3A+Latin-ASCII%3B+%3A%3A+NFD%3B+%3A%3A+%5B%3ANonspacing+Mark%3A%5D+Remove%3B+%3A%3A+Lower%28%29%3B+%3A%3A+NFC%3B'
         25        FETCH_CLASS_CONSTANT                                 ~15     'Transliterator', 'FORWARD'
         26        SEND_VAL_EX                                                  ~15
         27        DO_FCALL                                          0  $16     
         28        ASSIGN                                                       !2, $16
  102    29        INIT_METHOD_CALL                                             !2, 'transliterate'
         30        SEND_VAR_EX                                                  !1
         31        DO_FCALL                                          0  $18     
         32        ASSIGN                                                       !3, $18
  106    33        FRAMELESS_ICALL_2                preg_match          ~20     '%2F%5B%5Ea-z%2C%27%5D%2Fi', !3
         34      > JMPZ                                                         ~20, ->39
  107    35    >   INIT_FCALL                                                   'strtoupper'
         36        SEND_VAR                                                     !1
         37        DO_ICALL                                             $21     
         38      > RETURN                                                       $21
  111    39    >   FRAMELESS_ICALL_2                preg_match          ~22     '%2F%5Baeiouy%5D%2Fi', !3
         40        BOOL_NOT                                             ~23     ~22
         41      > JMPZ                                                         ~23, ->53
  113    42    >   INIT_FCALL                                                   'ucfirst'
         43        SEND_VAR                                                     !1
         44        DO_ICALL                                             $24     
         45        FETCH_CLASS_CONSTANT                                 ~25     'CONSONANT_ONLY_HONORIFICS'
         46        FRAMELESS_ICALL_2                in_array            ~26     $24, ~25
         47        BOOL_NOT                                             ~27     ~26
         48      > JMPZ                                                         ~27, ->53
  114    49    >   INIT_FCALL                                                   'strtoupper'
         50        SEND_VAR                                                     !1
         51        DO_ICALL                                             $28     
         52      > RETURN                                                       $28
  119    53    >   STRLEN                                               ~29     !1
         54        IS_SMALLER_OR_EQUAL                                  ~30     ~29, 3
         55      > JMPZ_EX                                              ~30     ~30, ->58
         56    >   FRAMELESS_ICALL_2                preg_match          ~31     '%2F%28%5Baeiouy%5D%29%01%2F', !1
         57        BOOL                                                 ~30     ~31
         58    > > JMPZ                                                         ~30, ->63
  120    59    >   INIT_FCALL                                                   'strtoupper'
         60        SEND_VAR                                                     !1
         61        DO_ICALL                                             $32     
         62      > RETURN                                                       $32
  124    63    >   INIT_FCALL                                                   'preg_match'
         64        SEND_VAL                                                     '%2F%5Cb%28%5Ba-z%5D%27%29%28%5Cw%2B%29%5Cb%2Fi'
         65        SEND_VAR                                                     !1
         66        SEND_REF                                                     !4
         67        DO_ICALL                                             $33     
         68      > JMPZ                                                         $33, ->89
  126    69    >   FETCH_DIM_R                                          ~34     !4, 1
         70        IN_ARRAY                                                     ~34, <array>
         71      > JMPZ                                                         ~35, ->79
  127    72    >   FETCH_DIM_R                                          ~36     !4, 1
         73        INIT_FCALL                                                   'ucfirst'
         74        FETCH_DIM_R                                          ~37     !4, 2
         75        SEND_VAL                                                     ~37
         76        DO_ICALL                                             $38     
         77        CONCAT                                               ~39     ~36, $38
         78      > RETURN                                                       ~39
  131    79    >   INIT_FCALL                                                   'strtoupper'
         80        FETCH_DIM_R                                          ~40     !4, 1
         81        SEND_VAL                                                     ~40
         82        DO_ICALL                                             $41     
         83        INIT_FCALL                                                   'ucfirst'
         84        FETCH_DIM_R                                          ~42     !4, 2
         85        SEND_VAL                                                     ~42
         86        DO_ICALL                                             $43     
         87        CONCAT                                               ~44     $41, $43
         88      > RETURN                                                       ~44
  135    89    >   INIT_FCALL                                                   'ucfirst'
         90        SEND_VAR                                                     !1
         91        DO_ICALL                                             $45     
         92      > RETURN                                                       $45
  137    93*     > RETURN                                                       null

End of Dynamic Function 0

Dynamic Function 1
Finding entry points
Branch analysis from position: 0
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/SEdf7
function name:  {closure:str::capitalise():142}
number of ops:  8
compiled vars:  !0 = $matches
line      #* E I O op                               fetch          ext  return  operands
-----------------------------------------------------------------------------------------
  142     0  E >   RECV                                                 !0      
  143     1        INIT_FCALL                                                   'ucfirst'
          2        FETCH_DIM_R                                          ~1      !0, 2
          3        SEND_VAL                                                     ~1
          4        DO_ICALL                                             $2      
          5        CONCAT                                               ~3      'Mc', $2
          6      > RETURN                                                       ~3
  144     7*     > RETURN                                                       null

End of Dynamic Function 1

Dynamic Function 2
Finding entry points
Branch analysis from position: 0
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/SEdf7
function name:  {closure:str::capitalise():148}
number of ops:  7
compiled vars:  !0 = $matches
line      #* E I O op                               fetch          ext  return  operands
-----------------------------------------------------------------------------------------
  148     0  E >   RECV                                                 !0      
  149     1        INIT_FCALL                                                   'strtoupper'
          2        FETCH_DIM_R                                          ~1      !0, 1
          3        SEND_VAL                                                     ~1
          4        DO_ICALL                                             $2      
          5      > RETURN                                                       $2
  150     6*     > RETURN                                                       null

End of Dynamic Function 2

Dynamic Function 3
Finding entry points
Branch analysis from position: 0
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/SEdf7
function name:  {closure:str::capitalise():155}
number of ops:  10
compiled vars:  !0 = $matches
line      #* E I O op                               fetch          ext  return  operands
-----------------------------------------------------------------------------------------
  155     0  E >   RECV                                                 !0      
  156     1        INIT_FCALL                                                   'strtolower'
          2        ROPE_INIT                                         3  ~3      '+'
          3        FETCH_DIM_R                                          ~1      !0, 1
          4        ROPE_ADD                                          1  ~3      ~3, ~1
          5        ROPE_END                                          2  ~2      ~3, '+'
          6        SEND_VAL                                                     ~2
          7        DO_ICALL                                             $5      
          8      > RETURN                                                       $5
  157     9*     > RETURN                                                       null

End of Dynamic Function 3

Dynamic Function 4
Finding entry points
Branch analysis from position: 0
1 jumps found. (Code = 62) Position 1 = -2
filename:       /in/SEdf7
function name:  {closure:str::capitalise():162}
number of ops:  7
compiled vars:  !0 = $matches
line      #* E I O op                               fetch          ext  return  operands
-----------------------------------------------------------------------------------------
  162     0  E >   RECV                                                 !0      
  163     1        INIT_FCALL                                                   'strtolower'
          2        FETCH_DIM_R                                          ~1      !0, 1
          3        SEND_VAL                                                     ~1
          4        DO_ICALL                                             $2      
          5      > RETURN                                                       $2
  164     6*     > RETURN                                                       null

End of Dynamic Function 4

End of function capitalise

End of class str.

Generated using Vulcan Logic Dumper, using php 8.5.0


preferences:
162.09 ms | 1596 KiB | 21 Q