<?php
// $base = 'test image with Ümlaäute und spaäe.jpg';
$nameEncodedFromGWT = 'test%20image%20with%20U%CC%88mlaa%CC%88ute%20und%20spaa%CC%88e.jpg';
$nameEncodedFromPHP = 'test%20image%20with%20%C3%9Cmla%C3%A4ute%20und%20spa%C3%A4e.jpg'; // this is encoded with urlencode() / rawurlencode()
// using rawurldecode
$nameDecodedFromGWTRaw = to_utf8(rawurldecode($nameEncodedFromGWT));
$nameDecodedFromPHPRaw = rawurldecode($nameEncodedFromPHP);
// using urldecode
$nameDecodedFromGWTSimple = urldecode($nameEncodedFromGWT);
$nameDecodedFromPHPSimple = urldecode($nameEncodedFromPHP);
echo 'Name from GWT: ' . $nameEncodedFromGWT . PHP_EOL .
'Decoded using rawurldecode(): ' . $nameDecodedFromGWTRaw . PHP_EOL .
'Decoded using urldecode(): ' . $nameDecodedFromGWTSimple . PHP_EOL .
'String length for rawurldecode(): ' . strlen($nameDecodedFromGWTRaw) . PHP_EOL .
'String length for urldeocde(): ' . strlen($nameDecodedFromGWTSimple) . PHP_EOL .
'Cleaned name for rawurldecode(): ' . cleanImageName($nameDecodedFromGWTRaw) . PHP_EOL .
'Cleaned name for urldecode(): ' . cleanImageName($nameDecodedFromGWTSimple);
echo PHP_EOL . '===============================================' . PHP_EOL;
echo 'Name from PHP: ' . $nameEncodedFromPHP . PHP_EOL .
'Decoded using rawurldecode(): ' . $nameDecodedFromPHPRaw . PHP_EOL .
'Decoded using urldecode(): ' . $nameDecodedFromPHPSimple . PHP_EOL .
'String length for rawurldecode(): ' . strlen($nameDecodedFromPHPRaw) . PHP_EOL .
'String length for urldecode(): ' . strlen($nameDecodedFromPHPSimple) . PHP_EOL .
'Cleaned name for rawurldecode(): ' . cleanImageName($nameDecodedFromPHPRaw) . PHP_EOL .
'Cleaned name for urldecode(): ' . cleanImageName($nameDecodedFromPHPSimple);
function to_utf8( $string ) {
// From http://w3.org/International/questions/qa-forms-utf-8.html
if ( preg_match('%^(?:
[\x09\x0A\x0D\x20-\x7E] # ASCII
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)*$%xs', $string) ) {
return $string;
} else {
return iconv( 'CP1252', 'UTF-8', $string);
}
}
/**
* Uses preg_replace to replace german umlauts
*/
function cleanImageName($name)
{
$clean = preg_replace(
array(
'/\|/',
'/[äÄ]/u',
'/[öÖ]/u',
'/[üÜ]/u',
'/ß/',
'/([a-zA-Z0-9]+)/i',
'/ - /','/ /','/-/','/&/',
'/[á|à|À|Á]/u',
'/[é|è|É|È]/u',
'/[_\x7c-\xff\$\%\=\?\@\(\)§!\|`´\*#;,:\.<>\/"\'#\t\r\n]/'
),
array(
' ',
'ae',
'oe',
'ue',
'ss',
'$1',
' ',
' ',
' ',
'und',
'a',
'e',
' ',
),
$name);
return str_replace(' ', '-', $clean);
}
?>
preferences:
37.16 ms | 402 KiB | 5 Q