3v4l.org

run code in 150+ php & hhvm versions
Bugs & Features
<?php function utf8_to_latin1( $str ) { //Removing Invalid UTF-8 $str = utf8_clean( $str ); return $str; //Removing UTF-8 charactors that can not be accurately translated to Latin-1 $regx = '/[\xC4-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3}/'; $str = preg_replace( $regx , '' , $str ); //Removing invalid Latin-1, if any $regx = '/([\x80-\x9F]+)/'; $str = preg_replace( $regx , '' , $str ); //Translating UTF-8 to Latin-1 $str = strtr( $str , utf8_latin1_table( ) ); return $str; //utf8_decode and utf8_encode take the code points 128 to 159 as valid Latin 1 characters. This is not standard practice. http://en.wikipedia.org/wiki/ISO/IEC_8859-1#Similar_character_sets } function utf8_latin1_table( ) { static $tbl = array( "\x0" => "\x0", "\x1" => "\x1", "\x2" => "\x2", "\x3" => "\x3", "\x4" => "\x4", "\x5" => "\x5", "\x6" => "\x6", "\x7" => "\x7", "\x8" => "\x8", "\x9" => "\x9", "\xa" => "\xa", "\xb" => "\xb", "\xc" => "\xc", "\xd" => "\xd", "\xe" => "\xe", "\xf" => "\xf", "\x10" => "\x10", "\x11" => "\x11", "\x12" => "\x12", "\x13" => "\x13", "\x14" => "\x14", "\x15" => "\x15", "\x16" => "\x16", "\x17" => "\x17", "\x18" => "\x18", "\x19" => "\x19", "\x1a" => "\x1a", "\x1b" => "\x1b", "\x1c" => "\x1c", "\x1d" => "\x1d", "\x1e" => "\x1e", "\x1f" => "\x1f", "\x20" => "\x20", "\x21" => "\x21", "\x22" => "\x22", "\x23" => "\x23", "\x24" => "\x24", "\x25" => "\x25", "\x26" => "\x26", "\x27" => "\x27", "\x28" => "\x28", "\x29" => "\x29", "\x2a" => "\x2a", "\x2b" => "\x2b", "\x2c" => "\x2c", "\x2d" => "\x2d", "\x2e" => "\x2e", "\x2f" => "\x2f", "\x30" => "\x30", "\x31" => "\x31", "\x32" => "\x32", "\x33" => "\x33", "\x34" => "\x34", "\x35" => "\x35", "\x36" => "\x36", "\x37" => "\x37", "\x38" => "\x38", "\x39" => "\x39", "\x3a" => "\x3a", "\x3b" => "\x3b", "\x3c" => "\x3c", "\x3d" => "\x3d", "\x3e" => "\x3e", "\x3f" => "\x3f", "\x40" => "\x40", "\x41" => "\x41", "\x42" => "\x42", "\x43" => "\x43", "\x44" => "\x44", "\x45" => "\x45", "\x46" => "\x46", "\x47" => "\x47", "\x48" => "\x48", "\x49" => "\x49", "\x4a" => "\x4a", "\x4b" => "\x4b", "\x4c" => "\x4c", "\x4d" => "\x4d", "\x4e" => "\x4e", "\x4f" => "\x4f", "\x50" => "\x50", "\x51" => "\x51", "\x52" => "\x52", "\x53" => "\x53", "\x54" => "\x54", "\x55" => "\x55", "\x56" => "\x56", "\x57" => "\x57", "\x58" => "\x58", "\x59" => "\x59", "\x5a" => "\x5a", "\x5b" => "\x5b", "\x5c" => "\x5c", "\x5d" => "\x5d", "\x5e" => "\x5e", "\x5f" => "\x5f", "\x60" => "\x60", "\x61" => "\x61", "\x62" => "\x62", "\x63" => "\x63", "\x64" => "\x64", "\x65" => "\x65", "\x66" => "\x66", "\x67" => "\x67", "\x68" => "\x68", "\x69" => "\x69", "\x6a" => "\x6a", "\x6b" => "\x6b", "\x6c" => "\x6c", "\x6d" => "\x6d", "\x6e" => "\x6e", "\x6f" => "\x6f", "\x70" => "\x70", "\x71" => "\x71", "\x72" => "\x72", "\x73" => "\x73", "\x74" => "\x74", "\x75" => "\x75", "\x76" => "\x76", "\x77" => "\x77", "\x78" => "\x78", "\x79" => "\x79", "\x7a" => "\x7a", "\x7b" => "\x7b", "\x7c" => "\x7c", "\x7d" => "\x7d", "\x7e" => "\x7e", "\x7f" => "\x7f", //Commonly used, but actually Invalid Latin-1 Characters //"\xc2\x80" => "\x80", "\xc2\x81" => "\x81", "\xc2\x82" => "\x82", "\xc2\x83" => "\x83", //"\xc2\x84" => "\x84", "\xc2\x85" => "\x85", "\xc2\x86" => "\x86", "\xc2\x87" => "\x87", //"\xc2\x88" => "\x88", "\xc2\x89" => "\x89", "\xc2\x8a" => "\x8a", "\xc2\x8b" => "\x8b", //"\xc2\x8c" => "\x8c", "\xc2\x8d" => "\x8d", "\xc2\x8e" => "\x8e", "\xc2\x8f" => "\x8f", //"\xc2\x90" => "\x90", "\xc2\x91" => "\x91", "\xc2\x92" => "\x92", "\xc2\x93" => "\x93", //"\xc2\x94" => "\x94", "\xc2\x95" => "\x95", "\xc2\x96" => "\x96", "\xc2\x97" => "\x97", //"\xc2\x98" => "\x98", "\xc2\x99" => "\x99", "\xc2\x9a" => "\x9a", "\xc2\x9b" => "\x9b", //"\xc2\x9c" => "\x9c", "\xc2\x9d" => "\x9d", "\xc2\x9e" => "\x9e", "\xc2\x9f" => "\x9f", "\xc2\xa0" => "\xa0", "\xc2\xa1" => "\xa1", "\xc2\xa2" => "\xa2", "\xc2\xa3" => "\xa3", "\xc2\xa4" => "\xa4", "\xc2\xa5" => "\xa5", "\xc2\xa6" => "\xa6", "\xc2\xa7" => "\xa7", "\xc2\xa8" => "\xa8", "\xc2\xa9" => "\xa9", "\xc2\xaa" => "\xaa", "\xc2\xab" => "\xab", "\xc2\xac" => "\xac", "\xc2\xad" => "\xad", "\xc2\xae" => "\xae", "\xc2\xaf" => "\xaf", "\xc2\xb0" => "\xb0", "\xc2\xb1" => "\xb1", "\xc2\xb2" => "\xb2", "\xc2\xb3" => "\xb3", "\xc2\xb4" => "\xb4", "\xc2\xb5" => "\xb5", "\xc2\xb6" => "\xb6", "\xc2\xb7" => "\xb7", "\xc2\xb8" => "\xb8", "\xc2\xb9" => "\xb9", "\xc2\xba" => "\xba", "\xc2\xbb" => "\xbb", "\xc2\xbc" => "\xbc", "\xc2\xbd" => "\xbd", "\xc2\xbe" => "\xbe", "\xc2\xbf" => "\xbf", "\xc3\x80" => "\xc0", "\xc3\x81" => "\xc1", "\xc3\x82" => "\xc2", "\xc3\x83" => "\xc3", "\xc3\x84" => "\xc4", "\xc3\x85" => "\xc5", "\xc3\x86" => "\xc6", "\xc3\x87" => "\xc7", "\xc3\x88" => "\xc8", "\xc3\x89" => "\xc9", "\xc3\x8a" => "\xca", "\xc3\x8b" => "\xcb", "\xc3\x8c" => "\xcc", "\xc3\x8d" => "\xcd", "\xc3\x8e" => "\xce", "\xc3\x8f" => "\xcf", "\xc3\x90" => "\xd0", "\xc3\x91" => "\xd1", "\xc3\x92" => "\xd2", "\xc3\x93" => "\xd3", "\xc3\x94" => "\xd4", "\xc3\x95" => "\xd5", "\xc3\x96" => "\xd6", "\xc3\x97" => "\xd7", "\xc3\x98" => "\xd8", "\xc3\x99" => "\xd9", "\xc3\x9a" => "\xda", "\xc3\x9b" => "\xdb", "\xc3\x9c" => "\xdc", "\xc3\x9d" => "\xdd", "\xc3\x9e" => "\xde", "\xc3\x9f" => "\xdf", "\xc3\xa0" => "\xe0", "\xc3\xa1" => "\xe1", "\xc3\xa2" => "\xe2", "\xc3\xa3" => "\xe3", "\xc3\xa4" => "\xe4", "\xc3\xa5" => "\xe5", "\xc3\xa6" => "\xe6", "\xc3\xa7" => "\xe7", "\xc3\xa8" => "\xe8", "\xc3\xa9" => "\xe9", "\xc3\xaa" => "\xea", "\xc3\xab" => "\xeb", "\xc3\xac" => "\xec", "\xc3\xad" => "\xed", "\xc3\xae" => "\xee", "\xc3\xaf" => "\xef", "\xc3\xb0" => "\xf0", "\xc3\xb1" => "\xf1", "\xc3\xb2" => "\xf2", "\xc3\xb3" => "\xf3", "\xc3\xb4" => "\xf4", "\xc3\xb5" => "\xf5", "\xc3\xb6" => "\xf6", "\xc3\xb7" => "\xf7", "\xc3\xb8" => "\xf8", "\xc3\xb9" => "\xf9", "\xc3\xba" => "\xfa", "\xc3\xbb" => "\xfb", "\xc3\xbc" => "\xfc", "\xc3\xbd" => "\xfd", "\xc3\xbe" => "\xfe", "\xc3\xbf" => "\xff" ); return $tbl; } function utf8_clean( $str ) { //http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string $regx = '/((?:[\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3}){1,})|./'; //$regx = '/((?:[\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3}){1,})|./'; return preg_replace( $regx , '$1' , $str ); } echo utf8_to_latin1( 'ISO/IEC 8859-1 is missing some characters for French and Finnish text and the euro sign. In order to provide some of these characters, ISO/IEC 8859-15 was developed as an update of ISO/IEC 8859-1. This required, however, the removal of some infrequently used characters from ISO/IEC 8859-1, including fraction symbols and letter-free diacritics: ¤, ¦, ¨, ´, ¸, ¼, ½, and ¾.' );
Output for 4.3.0 - 5.6.28, hhvm-3.10.0 - 3.12.0, 7.0.0 - 7.1.0
ISO/IEC 8859-1 is missing some characters for French and Finnish text and the euro sign. In order to provide some of these characters, ISO/IEC 8859-15 was developed as an update of ISO/IEC 8859-1. This required, however, the removal of some infrequently used characters from ISO/IEC 8859-1, including fraction symbols and letter-free diacritics: ¤, ¦, ¨, ´, ¸, ¼, ½, and ¾.