<?php
$myVar = "My sister alannis Is not That blonde, here is a good place. I know Ariane is not MY SISTER!";
echo "$myVar\n";
$myWords = [
["is", "é"],
["on", "no"],
["that", "aquela"],
["sister", "irmã"],
["my", "minha"],
["myth", "mito"],
["he", "ele"],
["good", "bom"],
["ace", "perito"],
["i", "eu"] // notice I must be lowercase
];
$translations = array_column($myWords, 1, 0); // or skip this step and just declare $myWords as key-value pairs
// length sorting is not necessary
// preg_quote() and \Q\E are not used because dealing with words only (no danger of misinterpretation by regex)
$pattern = '/\b(?>' . implode('|', array_keys($translations)) . ')\b/i'; // atomic group is slightly faster (no backtracking)
/* echo $pattern;
makes: /\b(?>is|on|that|sister|my|myth|he|good|ace)\b/i
demo: https://regex101.com/r/DXTtDf/1
*/
$translated = preg_replace_callback(
$pattern,
function($m) use($translations) { // bring $translations (lookup) array to function
$encoding = 'UTF-8'; // default setting
$key = mb_strtolower($m[0], $encoding); // standardize keys' case for lookup accessibility
if (ctype_lower($m[0])) { // treat as all lower
return $translations[$m[0]];
} elseif (mb_strlen($m[0], $encoding) > 1 && ctype_upper($m[0])) { // treat as all uppercase
return mb_strtoupper($translations[$key], $encoding);
} else { // treat as only first character uppercase
return mb_strtoupper(mb_substr($translations[$key], 0, 1, $encoding), $encoding) // uppercase first
. mb_substr($translations[$key], 1, mb_strlen($translations[$key], $encoding) - 1, $encoding); // append remaining lowercase
}
},
$myVar
);
echo $translated;