@ 2018-05-01T22:44:14Z <?php
/*
Copyright (c) 2008 Sebastián Grignoli
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of copyright holders nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @author "Sebastián Grignoli" <grignoli@gmail.com>
* @package Encoding
* @version 2.0
* @link https://github.com/neitanod/forceutf8
* @example https://github.com/neitanod/forceutf8
* @license Revised BSD
*/
namespace ForceUTF8;
class Encoding {
const ICONV_TRANSLIT = "TRANSLIT";
const ICONV_IGNORE = "IGNORE";
const WITHOUT_ICONV = "";
protected static $win1252ToUtf8 = array(
128 => "\xe2\x82\xac",
130 => "\xe2\x80\x9a",
131 => "\xc6\x92",
132 => "\xe2\x80\x9e",
133 => "\xe2\x80\xa6",
134 => "\xe2\x80\xa0",
135 => "\xe2\x80\xa1",
136 => "\xcb\x86",
137 => "\xe2\x80\xb0",
138 => "\xc5\xa0",
139 => "\xe2\x80\xb9",
140 => "\xc5\x92",
142 => "\xc5\xbd",
145 => "\xe2\x80\x98",
146 => "\xe2\x80\x99",
147 => "\xe2\x80\x9c",
148 => "\xe2\x80\x9d",
149 => "\xe2\x80\xa2",
150 => "\xe2\x80\x93",
151 => "\xe2\x80\x94",
152 => "\xcb\x9c",
153 => "\xe2\x84\xa2",
154 => "\xc5\xa1",
155 => "\xe2\x80\xba",
156 => "\xc5\x93",
158 => "\xc5\xbe",
159 => "\xc5\xb8"
);
protected static $brokenUtf8ToUtf8 = array(
"\xc2\x80" => "\xe2\x82\xac",
"\xc2\x82" => "\xe2\x80\x9a",
"\xc2\x83" => "\xc6\x92",
"\xc2\x84" => "\xe2\x80\x9e",
"\xc2\x85" => "\xe2\x80\xa6",
"\xc2\x86" => "\xe2\x80\xa0",
"\xc2\x87" => "\xe2\x80\xa1",
"\xc2\x88" => "\xcb\x86",
"\xc2\x89" => "\xe2\x80\xb0",
"\xc2\x8a" => "\xc5\xa0",
"\xc2\x8b" => "\xe2\x80\xb9",
"\xc2\x8c" => "\xc5\x92",
"\xc2\x8e" => "\xc5\xbd",
"\xc2\x91" => "\xe2\x80\x98",
"\xc2\x92" => "\xe2\x80\x99",
"\xc2\x93" => "\xe2\x80\x9c",
"\xc2\x94" => "\xe2\x80\x9d",
"\xc2\x95" => "\xe2\x80\xa2",
"\xc2\x96" => "\xe2\x80\x93",
"\xc2\x97" => "\xe2\x80\x94",
"\xc2\x98" => "\xcb\x9c",
"\xc2\x99" => "\xe2\x84\xa2",
"\xc2\x9a" => "\xc5\xa1",
"\xc2\x9b" => "\xe2\x80\xba",
"\xc2\x9c" => "\xc5\x93",
"\xc2\x9e" => "\xc5\xbe",
"\xc2\x9f" => "\xc5\xb8"
);
protected static $utf8ToWin1252 = array(
"\xe2\x82\xac" => "\x80",
"\xe2\x80\x9a" => "\x82",
"\xc6\x92" => "\x83",
"\xe2\x80\x9e" => "\x84",
"\xe2\x80\xa6" => "\x85",
"\xe2\x80\xa0" => "\x86",
"\xe2\x80\xa1" => "\x87",
"\xcb\x86" => "\x88",
"\xe2\x80\xb0" => "\x89",
"\xc5\xa0" => "\x8a",
"\xe2\x80\xb9" => "\x8b",
"\xc5\x92" => "\x8c",
"\xc5\xbd" => "\x8e",
"\xe2\x80\x98" => "\x91",
"\xe2\x80\x99" => "\x92",
"\xe2\x80\x9c" => "\x93",
"\xe2\x80\x9d" => "\x94",
"\xe2\x80\xa2" => "\x95",
"\xe2\x80\x93" => "\x96",
"\xe2\x80\x94" => "\x97",
"\xcb\x9c" => "\x98",
"\xe2\x84\xa2" => "\x99",
"\xc5\xa1" => "\x9a",
"\xe2\x80\xba" => "\x9b",
"\xc5\x93" => "\x9c",
"\xc5\xbe" => "\x9e",
"\xc5\xb8" => "\x9f"
);
static function toUTF8($text){
/**
* Function \ForceUTF8\Encoding::toUTF8
*
* This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
*
* It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
*
* It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
*
* 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
* are followed by any of these: ("group B")
* ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿
* For example: %ABREPRESENT%C9%BB. «REPRESENTÉ»
* The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
* is also a valid unicode character, and will be left unchanged.
*
* 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B,
* 3) when any of these: ðñòó are followed by THREE chars from group B.
*
* @name toUTF8
* @param string $text Any string.
* @return string The same string, UTF8 encoded
*
*/
if(is_array($text))
{
foreach($text as $k => $v)
{
$text[$k] = self::toUTF8($v);
}
return $text;
}
if(!is_string($text)) {
return $text;
}
$max = self::strlen($text);
$buf = "";
for($i = 0; $i < $max; $i++){
$c1 = $text{$i};
if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already
$c2 = $i+1 >= $max? "\x00" : $text{$i+1};
$c3 = $i+2 >= $max? "\x00" : $text{$i+2};
$c4 = $i+3 >= $max? "\x00" : $text{$i+3};
if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8
if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2;
$i++;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = ($c1 & "\x3f") | "\x80";
$buf .= $cc1 . $cc2;
}
} elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2 . $c3;
$i = $i + 2;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = ($c1 & "\x3f") | "\x80";
$buf .= $cc1 . $cc2;
}
} elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2 . $c3 . $c4;
$i = $i + 3;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = ($c1 & "\x3f") | "\x80";
$buf .= $cc1 . $cc2;
}
} else { //doesn't look like UTF8, but should be converted
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = (($c1 & "\x3f") | "\x80");
$buf .= $cc1 . $cc2;
}
} elseif(($c1 & "\xc0") == "\x80"){ // needs conversion
if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
$buf .= self::$win1252ToUtf8[ord($c1)];
} else {
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = (($c1 & "\x3f") | "\x80");
$buf .= $cc1 . $cc2;
}
} else { // it doesn't need conversion
$buf .= $c1;
}
}
return $buf;
}
static function toWin1252($text, $option = self::WITHOUT_ICONV) {
if(is_array($text)) {
foreach($text as $k => $v) {
$text[$k] = self::toWin1252($v, $option);
}
return $text;
} elseif(is_string($text)) {
return static::utf8_decode($text, $option);
} else {
return $text;
}
}
static function toISO8859($text) {
return self::toWin1252($text);
}
static function toLatin1($text) {
return self::toWin1252($text);
}
static function fixUTF8($text, $option = self::WITHOUT_ICONV){
if(is_array($text)) {
foreach($text as $k => $v) {
$text[$k] = self::fixUTF8($v, $option);
}
return $text;
}
$last = "";
while($last <> $text){
$last = $text;
$text = self::toUTF8(static::utf8_decode($text, $option));
}
$text = self::toUTF8(static::utf8_decode($text, $option));
return $text;
}
static function UTF8FixWin1252Chars($text){
// If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
// (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
// See: http://en.wikipedia.org/wiki/Windows-1252
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
}
static function removeBOM($str=""){
if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) {
$str=substr($str, 3);
}
return $str;
}
protected static function strlen($text){
return (function_exists('mb_strlen') && ((int) ini_get('mbstring.func_overload')) & 2) ?
mb_strlen($text,'8bit') : strlen($text);
}
public static function normalizeEncoding($encodingLabel)
{
$encoding = strtoupper($encodingLabel);
$encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
$equivalences = array(
'ISO88591' => 'ISO-8859-1',
'ISO8859' => 'ISO-8859-1',
'ISO' => 'ISO-8859-1',
'LATIN1' => 'ISO-8859-1',
'LATIN' => 'ISO-8859-1',
'UTF8' => 'UTF-8',
'UTF' => 'UTF-8',
'WIN1252' => 'ISO-8859-1',
'WINDOWS1252' => 'ISO-8859-1'
);
if(empty($equivalences[$encoding])){
return 'UTF-8';
}
return $equivalences[$encoding];
}
public static function encode($encodingLabel, $text)
{
$encodingLabel = self::normalizeEncoding($encodingLabel);
if($encodingLabel == 'ISO-8859-1') return self::toLatin1($text);
return self::toUTF8($text);
}
protected static function utf8_decode($text, $option)
{
if ($option == self::WITHOUT_ICONV || !function_exists('iconv')) {
$o = utf8_decode(
str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))
);
} else {
$o = iconv("UTF-8", "Windows-1252" . ($option == self::ICONV_TRANSLIT ? '//TRANSLIT' : ($option == self::ICONV_IGNORE ? '//IGNORE' : '')), $text);
}
return $o;
}
}
return Encoding::toUTF8('!@#wer');
Enable javascript to submit You have javascript disabled. You will not be able to edit any code.
Here you find the average performance (time & memory) of each version. A grayed out version indicates it didn't complete successfully (based on exit-code).
Version System time (s) User time (s) Memory (MiB) 8.3.6 0.012 0.003 18.68 8.3.5 0.009 0.006 18.04 8.3.4 0.007 0.011 18.71 8.3.3 0.010 0.010 18.51 8.3.2 0.002 0.005 21.76 8.3.1 0.006 0.003 21.66 8.3.0 0.004 0.004 23.48 8.2.18 0.012 0.008 18.29 8.2.17 0.006 0.009 18.61 8.2.16 0.007 0.007 22.96 8.2.15 0.005 0.002 24.18 8.2.14 0.009 0.000 24.66 8.2.13 0.005 0.003 20.89 8.2.12 0.008 0.000 26.35 8.2.11 0.010 0.000 21.04 8.2.10 0.008 0.004 17.78 8.2.9 0.003 0.006 18.09 8.2.8 0.004 0.004 19.76 8.2.7 0.000 0.009 17.91 8.2.6 0.004 0.004 17.78 8.2.5 0.006 0.003 18.10 8.2.4 0.000 0.008 19.09 8.2.3 0.006 0.003 20.82 8.2.2 0.000 0.008 17.96 8.2.1 0.004 0.004 18.02 8.2.0 0.003 0.005 17.87 8.1.28 0.011 0.004 25.92 8.1.27 0.007 0.000 23.87 8.1.26 0.008 0.000 26.35 8.1.25 0.004 0.004 28.09 8.1.24 0.008 0.000 23.92 8.1.23 0.011 0.000 22.82 8.1.22 0.006 0.003 17.74 8.1.21 0.008 0.000 18.77 8.1.20 0.003 0.006 17.25 8.1.19 0.004 0.004 17.10 8.1.18 0.006 0.003 18.10 8.1.17 0.006 0.003 18.46 8.1.16 0.000 0.007 18.71 8.1.15 0.000 0.007 18.96 8.1.14 0.004 0.004 17.61 8.1.13 0.007 0.000 19.10 8.1.12 0.006 0.003 17.41 8.1.11 0.003 0.005 17.37 8.1.10 0.000 0.008 17.27 8.1.9 0.006 0.003 17.37 8.1.8 0.003 0.007 17.36 8.1.7 0.003 0.009 17.44 8.1.6 0.005 0.003 17.55 8.1.5 0.005 0.003 17.47 8.1.4 0.004 0.004 17.40 8.1.3 0.000 0.008 17.41 8.1.2 0.004 0.004 17.41 8.1.1 0.005 0.003 17.30 8.1.0 0.004 0.004 17.26 8.0.30 0.000 0.007 19.76 8.0.29 0.006 0.003 16.75 8.0.28 0.003 0.003 18.27 8.0.27 0.003 0.003 17.10 8.0.26 0.004 0.004 17.13 8.0.25 0.003 0.003 16.84 8.0.24 0.000 0.008 16.98 8.0.23 0.003 0.003 16.97 8.0.22 0.008 0.003 16.88 8.0.21 0.004 0.004 16.77 8.0.20 0.003 0.003 16.85 8.0.19 0.003 0.006 16.80 8.0.18 0.000 0.007 16.88 8.0.17 0.000 0.008 16.91 8.0.16 0.007 0.000 16.87 8.0.15 0.007 0.000 16.77 8.0.14 0.003 0.006 16.71 8.0.13 0.000 0.007 13.31 8.0.12 0.003 0.005 16.84 8.0.11 0.000 0.008 16.71 8.0.10 0.004 0.004 16.81 8.0.9 0.000 0.007 16.75 8.0.8 0.000 0.014 16.86 8.0.7 0.004 0.004 16.75 8.0.6 0.004 0.004 16.99 8.0.5 0.004 0.004 16.70 8.0.3 0.010 0.008 16.97 8.0.2 0.013 0.009 17.40 8.0.1 0.000 0.007 17.02 8.0.0 0.008 0.010 16.54 7.4.33 0.000 0.005 15.55 7.4.32 0.000 0.007 16.74 7.4.30 0.004 0.004 16.63 7.4.29 0.003 0.003 16.69 7.4.28 0.004 0.004 16.54 7.4.27 0.000 0.007 16.68 7.4.26 0.003 0.007 16.72 7.4.25 0.004 0.004 16.70 7.4.24 0.004 0.004 16.64 7.4.23 0.004 0.004 16.80 7.4.22 0.004 0.004 16.54 7.4.21 0.010 0.006 16.63 7.4.20 0.007 0.000 16.81 7.4.16 0.013 0.004 16.54 7.4.14 0.011 0.007 17.86 7.4.13 0.014 0.006 16.46 7.4.12 0.010 0.009 16.68 7.4.11 0.005 0.014 16.85 7.4.10 0.010 0.013 16.68 7.4.9 0.009 0.009 16.70 7.4.8 0.006 0.016 19.39 7.4.7 0.011 0.007 16.73 7.4.6 0.003 0.013 16.69 7.4.5 0.004 0.015 16.61 7.4.4 0.007 0.010 16.47 7.4.0 0.008 0.008 15.10 7.3.33 0.000 0.005 13.53 7.3.32 0.003 0.003 13.44 7.3.31 0.007 0.000 16.26 7.3.30 0.003 0.003 16.50 7.3.29 0.003 0.003 16.36 7.3.28 0.008 0.010 16.41 7.3.26 0.015 0.003 16.67 7.3.24 0.010 0.008 16.66 7.3.23 0.016 0.003 16.54 7.3.21 0.003 0.013 16.45 7.3.20 0.012 0.006 16.76 7.3.19 0.012 0.006 16.56 7.3.18 0.010 0.010 16.45 7.3.17 0.012 0.009 16.68 7.3.16 0.011 0.012 16.55 7.3.12 0.000 0.017 14.82 7.3.11 0.009 0.009 15.08 7.3.10 0.009 0.006 15.13 7.3.9 0.006 0.003 14.94 7.3.8 0.006 0.003 14.80 7.3.7 0.007 0.004 15.02 7.3.6 0.000 0.009 15.14 7.3.5 0.004 0.007 14.76 7.3.4 0.000 0.015 15.02 7.3.3 0.006 0.010 15.08 7.3.2 0.010 0.007 16.51 7.3.1 0.005 0.009 16.57 7.3.0 0.008 0.003 16.64 7.2.33 0.010 0.010 16.70 7.2.32 0.003 0.016 16.83 7.2.31 0.006 0.019 16.77 7.2.30 0.010 0.007 16.81 7.2.29 0.009 0.009 16.69 7.2.25 0.007 0.014 15.45 7.2.24 0.004 0.015 15.40 7.2.23 0.011 0.004 15.15 7.2.22 0.003 0.010 15.15 7.2.21 0.004 0.008 15.29 7.2.20 0.010 0.003 15.13 7.2.19 0.013 0.000 15.33 7.2.18 0.006 0.009 14.98 7.2.17 0.007 0.010 15.34 7.2.13 0.009 0.006 17.03 7.2.12 0.007 0.007 16.93 7.2.11 0.008 0.003 16.73 7.2.10 0.010 0.007 17.04 7.2.9 0.003 0.013 16.99 7.2.8 0.003 0.010 16.95 7.2.7 0.003 0.007 16.80 7.2.6 0.004 0.012 16.90 7.2.5 0.003 0.010 17.11 7.2.4 0.007 0.008 17.43 7.2.3 0.039 0.012 17.41 7.2.2 0.095 0.014 17.59 7.2.1 0.032 0.009 17.70 7.2.0 0.183 0.007 17.66 7.1.33 0.003 0.010 15.61 7.1.32 0.007 0.007 15.57 7.1.31 0.003 0.006 15.88 7.1.30 0.003 0.010 15.76 7.1.29 0.011 0.004 15.82 7.1.28 0.003 0.010 15.96 7.1.27 0.011 0.004 15.73 7.1.26 0.007 0.007 15.83 7.1.25 0.006 0.006 15.72 7.1.20 0.006 0.009 15.50 7.1.16 0.064 0.010 17.02 7.1.15 0.076 0.013 17.04 7.1.14 0.091 0.013 16.87 7.1.13 0.074 0.007 17.17 7.1.12 0.053 0.012 17.00 7.1.11 0.052 0.010 16.23 7.1.10 0.071 0.009 16.12 7.1.9 0.122 0.018 16.40 7.1.8 0.079 0.009 16.31 7.1.7 0.050 0.012 15.54 7.1.6 0.074 0.007 33.29 7.1.5 0.075 0.012 33.06 7.1.4 0.079 0.012 32.82 7.1.3 0.078 0.010 32.67 7.1.2 0.077 0.010 32.99 7.1.1 0.077 0.009 14.97 7.1.0 0.075 0.014 14.88
preferences:dark mode live preview
65.9 ms | 400 KiB | 5 Q