- var_dump: documentation ( source)
- mb_split: documentation ( source)
- mb_regex_encoding: documentation ( source)
- mb_internal_encoding: documentation ( source)
<?php
mb_internal_encoding('UTF-8');
mb_regex_encoding('UTF-8');
$string = "Some long tokenisable string";
$splitTokens = mb_split('[^\p{N}\p{L}]|\p{Z}', $string);
var_dump($splitTokens);
php_info();