<?php
class Punycode
{
private function decodeUtf8Character($input, $i, &$codePoint = null)
{
$input = array_values(unpack('C*', substr($input, $i, 4)));
$count = count($input);
switch (true) {
case $count >= 2 && ($input[0] & 0xE0) === 0xC0 && ($input[1] & 0xC0) === 0x80:
$codePoint = (($input[0] & 0x1F) << 6) | ($input[1] & 0x3F);
return 2;
case $count >= 3 && ($input[0] & 0xF0) === 0xE0 && (($input[1] ^ 0x40) & ($input[2] ^ 0x40) & 0xC0) === 0xC0:
$codePoint = (($input[0] & 0x0F) << 12) | (($input[1] & 0x3F) << 6) | ($input[2] & 0x3F);
return 3;
case $count >= 4 && ($input[0] & 0xF8) === 0xF0 && (($input[1] ^ 0x40) & ($input[2] ^ 0x40) & ($input[3] ^ 0x40) & 0xC0) === 0xC0:
$codePoint = (($input[0] & 0x07) << 18) | (($input[1] & 0x3F) << 12) | (($input[2] & 0x3F) << 6) | ($input[3] & 0x3F);
return 4;
}
return 0;
}
private function encodeUtf8CodePoint($codePoint)
{
switch (true) {
case $codePoint < 0x80:
return pack('C*', $codePoint & 0x7F);
case $codePoint < 0x0800:
return pack('C*', (($codePoint & 0x07C0) >> 6) | 0xC0, ($codePoint & 0x3F) | 0x80);
case $codePoint < 0x010000:
return pack('C*', (($codePoint & 0xF000) >> 12) | 0xE0, (($codePoint & 0x0FC0) >> 6) | 0x80, ($codePoint & 0x3F) | 0x80);
case $codePoint < 0x110000:
return pack('C*', (($codePoint & 0x1C0000) >> 18) | 0xF0, (($codePoint & 0x03F000) >> 12) | 0x80, (($codePoint & 0x0FC0) >> 6) | 0x80, ($codePoint & 0x3F) | 0x80);
}
return false;
}
private function isDnsLabelChar($charCode)
{
return ($charCode >= 0x61 && $charCode <= 0x7A) // lower-case letter
|| ($charCode >= 0x30 && $charCode <= 0x39) // digit
|| $charCode === 0x2D // -
|| ($charCode >= 0x41 && $charCode <= 0x5A); // upper-case letter;
}
private function isPrintableLatin1Char($charCode)
{
return $charCode >= 0xA0;
}
private function getEncodingParts($input, &$output = [], &$nonBasicChars = [])
{
$isUtf8 = true;
for ($i = $p = 0, $l = strlen($input); $i < $l; $p++) {
$charCode = ord($input[$i]);
if ($charCode & 0x80) {
if ($isUtf8) {
if ($len = $this->decodeUtf8Character($input, $i, $codePoint)) {
$nonBasicChars[] = [$p, $codePoint, substr($input, $i, $len), $len]; // undecoded data is stored for UTF-8 chars to facilitate conversion to latin1 if necessary
$i += $len;
} else {
// not a valid UTF-8 code point, convert $nonBasicChars to latin1 representation
if (!$this->isPrintableLatin1Char($charCode)) {
return false;
}
$isUtf8 = false;
if (!empty($nonBasicChars)) {
$offset = 0;
for ($j = 0, $l = count($nonBasicChars); $j < $l; $j++) {
$base = $nonBasicChars[$j][0];
$nonBasicChars[$j][0] += $offset;
$nonBasicChars[$j][1] = ord($nonBasicChars[$j][2][0]);
for ($k = 1; $k < $nonBasicChars[$j][3]; $k++) {
$nonBasicChars[] = [$base + ++$offset, ord($nonBasicChars[$j][2][$k])];
}
}
$nonBasicChars[] = [$i++, $charCode];
usort($nonBasicChars, function($a, $b) {
return $a[0] - $b[0];
});
} else {
$nonBasicChars[] = [$i++, $charCode];
}
}
} else if ($this->isPrintableLatin1Char($charCode)) {
$nonBasicChars[] = [$i++, $charCode];
} else {
return false;
}
} else if ($this->isDnsLabelChar($charCode)) {
$output[] = $input[$i++];
} else {
return false;
}
}
return true;
}
private function adaptBias($delta, $numPoints, $first)
{
$delta = $first ? $delta / 700 : $delta / 2;
$delta += $delta / $numPoints;
for ($k = 0; $delta > 455; $k += 36) {
$delta = intval($delta / 35);
}
return $k + (36 * $delta) / ($delta + 38);
}
private function decodeLabel($input)
{
if (substr($input, 0, 4) !== 'xn--') {
return $input;
}
$input = substr($input, 4);
if (false !== $nonBasicCharsStart = strrpos($input, '-')) {
$nonBasicChars = substr($input, $nonBasicCharsStart + 1);
$output = str_split(substr($input, 0, $nonBasicCharsStart), 1);
} else {
$nonBasicChars = $input;
$output = [];
}
$n = 128;
$i = 0;
$bias = 72;
for ($j = 0, $l = strlen($nonBasicChars); $j < $l; $j++) {
$oldi = $i;
$w = 1;
$k = 36;
do {
$digit = ord($nonBasicChars[$j++]);
if ($digit >= 0x61 && $digit <= 0x7A) {
$digit -= 97;
} else if ($digit >= 0x30 && $digit <= 0x39) {
$digit -= 22;
} else {
return false;
}
if ($k <= $bias) {
$t = 1;
} else if ($k >= $bias + 26) {
$t = 26;
} else {
$t = $k - $bias;
}
$i += $digit * $w;
$w *= 36 - $t;
$k += 36;
} while($digit >= $t);
$c = count($output) + 1;
$bias = $this->adaptBias($i - $oldi, count($output) + 1, $oldi === 0);
$n += (int) ($i / $c);
$i %= $c;
array_splice($output, $i, 0, $this->encodeUtf8CodePoint($n));
$i++;
}
return implode('', $output);
}
private function encodeLabel($input)
{
if (!$this->getEncodingParts($input, $output, $nonBasicChars)) {
return false;
}
if (empty($nonBasicChars)) {
return $input;
}
$output = 'xn--' . $output;
}
public function decode($input)
{
$output = [];
foreach (explode('.', strtolower($input)) as $label) {
if (false === $label = $this->decodeLabel($label)) {
return false;
}
$output[] = $label;
}
return implode('.', $output);
}
public function encode($input)
{
$output = [];
foreach (explode('.', strtolower($input)) as $label) {
if (false === $label = $this->encodeLabel($label)) {
return false;
}
$output[] = $label;
}
return implode('.', $output);
}
}
echo (new Punycode)->decode("xn--Hello-Another-Way--fc4qua05auwb3674vfr0b");
preferences:
39.71 ms | 402 KiB | 5 Q