<?php
function new_parse_url($url, $component = -1, $strict = false)
{
static $map = [
PHP_URL_SCHEME => 'scheme',
PHP_URL_USER => 'user',
PHP_URL_PASS => 'pass',
PHP_URL_HOST => 'host',
PHP_URL_PORT => 'port',
PHP_URL_PATH => 'path',
PHP_URL_QUERY => 'query',
PHP_URL_FRAGMENT => 'fragment',
];
static $pattern = <<<REGEXP
!
^
(?P<scheme> [a-z][a-z0-9+.\-]* ) [:] # scheme is mandatory, and followed by a :
(?: [/][/] # // indicates that this component is the authority
(?:
(?P<user> [^:@/]+ )
(?: [:] (?P<pass> [^:@/]+ ) )?
[@]
)? # auth details are optional
(?P<host> [^:/]* ) # host is mandatory. Technically it should be at least 1 char, but PHP has
# internal schemes that violate this rule and as much as I dislike it, I
# feel that we should pass this
(?: [:] (?P<port> [0-9]+ ) )? # port is simply a sequence of decimal digits
(?= [/?#]|$ ) # path must be missing or begin with / if authority is present
)?
(?P<path> [^?#]+ )? # path is everything up to the query/fragment
(?: [?] (?P<query> [^#]+ ) )? # query is optional
(?: [#] (?P<fragment> .+ ) )? # fragment is optional
$
!xi
REGEXP;
if (!$strict) {
return parse_url($url, $component);
}
if (!preg_match($pattern, $url, $matches)) {
return false;
}
if (isset($map[$component])) {
return $matches[$map[$component]];
}
$result = [];
foreach ($map as $component) {
if (isset($matches[$component]) && $matches[$component] !== '') {
$result[] = $matches[$component];
}
}
return $result;
}
$url = 'http://user:pass@foo.com/test?thing=stuff#fragment';
print_r(new_parse_url($url, -1, true));