<?php
$string = '#️⃣';
echo '=== UTF-8 =============================', PHP_EOL;
echo 'STRING: ', $string, PHP_EOL;
echo 'CHARACTERS: ', mb_strlen($string), PHP_EOL; // 3
echo 'BYTE COUNT: ', strlen($string), PHP_EOL; // 7
foreach(range(0, mb_strlen($string) - 1) as $index) {
echo PHP_EOL;
echo 'CHARACTER #', $index + 1, ' CODE POINT: ', '0x', dechex(mb_ord(mb_substr($string, $index, 1))), PHP_EOL; // [0x23, 0xfe0f, 0x20e3]
echo 'CHARACTER #', $index + 1, ' BYTE COUNT: ', mb_strlen(mb_substr($string, $index, 1), '8bit'), PHP_EOL; // [1, 3, 3]
}
echo '=======================================', PHP_EOL, PHP_EOL;
$string = mb_convert_encoding($string, 'UTF-16', 'UTF-8');
echo '=== UTF-16 ============================', PHP_EOL;
echo 'STRING: ', $string, PHP_EOL;
echo 'CHARACTERS: ', mb_strlen($string), PHP_EOL; // should be 3, is 6
echo 'BYTE COUNT: ', strlen($string), PHP_EOL; // should 6, is 6
foreach(range(0, mb_strlen($string) - 1) as $index) {
echo PHP_EOL;
echo 'CHARACTER #', $index + 1, ' CODE POINT: ', '0x', dechex(mb_ord(mb_substr($string, $index, 1))), PHP_EOL; // should be [0x23, 0xfe0f, 0x20e3], is [0x0, 0x23, 0x0, 0xf, 0x20, 0x0]; it actually gets kinda close here, if you recompose two by two bytes, but no cigar
echo 'CHARACTER #', $index + 1, ' BYTE COUNT: ', mb_strlen(mb_substr($string, $index, 1), '8bit'), PHP_EOL; // should be [2, 2, 2], is [1, 1, 1, 1, 1, 1]
}
echo '=======================================', PHP_EOL;
- Output for 8.3.2 - 8.3.4, 8.3.6
- === UTF-8 =============================
STRING: #️⃣
CHARACTERS: 3
BYTE COUNT: 7
CHARACTER #1 CODE POINT: 0x23
CHARACTER #1 BYTE COUNT: 1
CHARACTER #2 CODE POINT: 0xfe0f
CHARACTER #2 BYTE COUNT: 3
CHARACTER #3 CODE POINT: 0x20e3
CHARACTER #3 BYTE COUNT: 3
=======================================
=== UTF-16 ============================
STRING: #� �
CHARACTERS: 6
BYTE COUNT: 6
CHARACTER #1 CODE POINT: 0x0
CHARACTER #1 BYTE COUNT: 1
CHARACTER #2 CODE POINT: 0x23
CHARACTER #2 BYTE COUNT: 1
CHARACTER #3 CODE POINT: 0x3f
CHARACTER #3 BYTE COUNT: 1
CHARACTER #4 CODE POINT: 0xf
CHARACTER #4 BYTE COUNT: 1
CHARACTER #5 CODE POINT: 0x20
CHARACTER #5 BYTE COUNT: 1
CHARACTER #6 CODE POINT: 0x3f
CHARACTER #6 BYTE COUNT: 1
=======================================
- Output for 8.3.5
- Warning: PHP Startup: Unable to load dynamic library 'sodium.so' (tried: /usr/lib/php/8.3.5/modules/sodium.so (libsodium.so.23: cannot open shared object file: No such file or directory), /usr/lib/php/8.3.5/modules/sodium.so.so (/usr/lib/php/8.3.5/modules/sodium.so.so: cannot open shared object file: No such file or directory)) in Unknown on line 0
=== UTF-8 =============================
STRING: #️⃣
CHARACTERS: 3
BYTE COUNT: 7
CHARACTER #1 CODE POINT: 0x23
CHARACTER #1 BYTE COUNT: 1
CHARACTER #2 CODE POINT: 0xfe0f
CHARACTER #2 BYTE COUNT: 3
CHARACTER #3 CODE POINT: 0x20e3
CHARACTER #3 BYTE COUNT: 3
=======================================
=== UTF-16 ============================
STRING: #� �
CHARACTERS: 6
BYTE COUNT: 6
CHARACTER #1 CODE POINT: 0x0
CHARACTER #1 BYTE COUNT: 1
CHARACTER #2 CODE POINT: 0x23
CHARACTER #2 BYTE COUNT: 1
CHARACTER #3 CODE POINT: 0x3f
CHARACTER #3 BYTE COUNT: 1
CHARACTER #4 CODE POINT: 0xf
CHARACTER #4 BYTE COUNT: 1
CHARACTER #5 CODE POINT: 0x20
CHARACTER #5 BYTE COUNT: 1
CHARACTER #6 CODE POINT: 0x3f
CHARACTER #6 BYTE COUNT: 1
=======================================
- Output for 7.2.0 - 7.2.33, 7.3.0 - 7.3.33, 7.4.0 - 7.4.33, 8.0.0 - 8.0.30, 8.1.0 - 8.1.28, 8.2.0 - 8.2.18, 8.3.0 - 8.3.1
- === UTF-8 =============================
STRING: #️⃣
CHARACTERS: 3
BYTE COUNT: 7
CHARACTER #1 CODE POINT: 0x23
CHARACTER #1 BYTE COUNT: 1
CHARACTER #2 CODE POINT: 0xfe0f
CHARACTER #2 BYTE COUNT: 3
CHARACTER #3 CODE POINT: 0x20e3
CHARACTER #3 BYTE COUNT: 3
=======================================
=== UTF-16 ============================
STRING: #� �
CHARACTERS: 6
BYTE COUNT: 6
CHARACTER #1 CODE POINT: 0x0
CHARACTER #1 BYTE COUNT: 1
CHARACTER #2 CODE POINT: 0x23
CHARACTER #2 BYTE COUNT: 1
CHARACTER #3 CODE POINT: 0x0
CHARACTER #3 BYTE COUNT: 1
CHARACTER #4 CODE POINT: 0xf
CHARACTER #4 BYTE COUNT: 1
CHARACTER #5 CODE POINT: 0x20
CHARACTER #5 BYTE COUNT: 1
CHARACTER #6 CODE POINT: 0x0
CHARACTER #6 BYTE COUNT: 1
=======================================
preferences:
148.22 ms | 404 KiB | 176 Q