run code in 150+ php & hhvm versions
Bugs & Features
<?php $map = array(); var_dump(levenshtein(utf8_to_extended_ascii('прив', $map), utf8_to_extended_ascii('привет', $map))); function utf8_to_extended_ascii($str, &$map) { // find all multibyte characters (cf. utf-8 encoding specs) $matches = array(); if (!preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) return $str; // plain ascii string // update the encoding map with the characters not already met foreach ($matches[0] as $mbc) if (!isset($map[$mbc])) $map[$mbc] = chr(128 + count($map)); // finally remap non-ascii characters return strtr($str, $map); }
Output for 4.3.0 - 7.1.0