<?php
$whitespaces = [
'character tabulation' => "\u{0009}",
'line feed' => "\u{000A}",
'line tabulation' => "\u{000B}",
'form feed' => "\u{000C}",
'carriage return' => "\u{000D}",
'space' => "\u{0020}",
'next line' => "\u{0085}",
'no-break space' => "\u{00A0}",
'ogham space mark' => "\u{1680}",
'mongolian vowel separator' => "\u{180E}",
'en quad' => "\u{2000}",
'em quad' => "\u{2001}",
'en space' => "\u{2002}",
'em space' => "\u{2003}",
'three-per-em space' => "\u{2004}",
'four-per-em space' => "\u{2005}",
'six-per-em space' => "\u{2006}",
'figure space' => "\u{2007}",
'punctuation space' => "\u{2008}",
'thin space' => "\u{2009}",
'hair space' => "\u{200A}",
'zero width space' => "\u{200B}",
'zero width non-joiner' => "\u{200C}",
'zero width joiner' => "\u{200D}",
'line separator' => "\u{2028}",
'paragraph separator' => "\u{2029}",
'narrow no-break space' => "\u{202F}",
'medium mathematical space' => "\u{205F}",
'word joiner' => "\u{2060}",
'ideographic space' => "\u{3000}",
'zero width non-breaking space' => "\u{FEFF}"
];
$patterns = [
'/\s+/',
'/\s+/u',
'/\pZ+/u',
'/\pC+/u',
'/[\pZ\pC]+/u',
'/[\x{0009}-\x{000D}\x{0020}\x{0085}\x{00A0}\x{1680}\x{180E}\x{2000}-\x{200D}\x{2028}-\x{202F}\x{205F}\x{2060}\x{3000}\x{FEFF}]+/ux',
];
printf("\n|%-30s |", 'whitespace characters');
foreach ($patterns as $pattern) {
printf(" %-12s |", $pattern);
}
printf("\n|%s%s|", str_repeat('-', 31), str_repeat('|:' . str_repeat('-', 12) . ':', count($patterns)));
foreach ($whitespaces as $name => $char) {
printf("\n|%30s |", $name);
foreach ($patterns as $pattern) {
printf(" %-13s |", mb_strlen(preg_replace($pattern, '', $char)) ? '❌' : '✅');
}
}
- Output for 8.0.1 - 8.0.30, 8.1.0 - 8.1.28, 8.2.0 - 8.2.19, 8.3.0 - 8.3.7
- |whitespace characters | /\s+/ | /\s+/u | /\pZ+/u | /\pC+/u | /[\pZ\pC]+/u | /[\x{0009}-\x{000D}\x{0020}\x{0085}\x{00A0}\x{1680}\x{180E}\x{2000}-\x{200D}\x{2028}-\x{202F}\x{205F}\x{2060}\x{3000}\x{FEFF}]+/ux |
|-------------------------------|:------------:|:------------:|:------------:|:------------:|:------------:|:------------:|
| character tabulation | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ |
| line feed | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ |
| line tabulation | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ |
| form feed | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ |
| carriage return | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ |
| space | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
| next line | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ |
| no-break space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| ogham space mark | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| mongolian vowel separator | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ |
| en quad | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| em quad | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| en space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| em space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| three-per-em space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| four-per-em space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| six-per-em space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| figure space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| punctuation space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| thin space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| hair space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| zero width space | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ |
| zero width non-joiner | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ |
| zero width joiner | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ |
| line separator | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| paragraph separator | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| narrow no-break space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| medium mathematical space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| word joiner | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ |
| ideographic space | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ |
| zero width non-breaking space | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ |
preferences:
108.97 ms | 414 KiB | 91 Q