<?php
class URI
{
public static function getRegex()
{
static $cache;
if ($cache) return $cache;
###################################################################################
#
# pct-encoded := "%" HEXDIG HEXDIG
# unreserved := ALPHA / DIGIT / "-" / "." / "_" / "~"
# reserved := gen-delims / sub-delims
# gen-delims := "#" / "/" / ":" / "?" / "@" / "[" / "]"
# sub-delims := "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
# pchar := unreserved / pct-encoded / sub-delims / ":" / "@"
$pct_encoded = '\x25[0-9A-Fa-f][0-9A-Fa-f]';
$unreserved = '[\x2d\x2e\x30-\x39\x41-\x5a\x5f\x61-\x7a\x7e\p{L}]';
$gen_dims = '[\x23\x2f\x3a\x3f]';
$sub_delims = '[\x21\x24\x26-\x2c\x3b\x3d\x40\x5b\x5d]';
$reserved = '[\x21\x23\x24\x26-\x2c\x2f\x3a\x3b\x3d\x3f\x40\x5b\x5d]';
$pchar = "(?:[\\x21\\x24\\x26-\\x2e\\x30-\\x3b\\x3d\\x40-\\x5a\\x5f\\x61-\\x7a\\x7e\\p{L}]|$pct_encoded)";
#################################################################################
#
# segment := *pchar
# segment-nz := 1*pchar
# segment-nz-nc := 1*(unreserved / pct-encoded / sub-delims / "@")
# ; non-zero-length segment without any colon ":"
$segment = "$pchar*";
$segment_nz = "$pchar+";
$segment_nx_nc = "(?:$unreserve|$pct_encoded|$sub_delims|\\x40)+";
#################################################################################
#
# path-abempty := *( "/" segment )
# path-absolute := "/" [ segment-nz *( "/" segment ) ]
# path-noscheme := segment-nz-nc *( "/" segment )
# path-rootless := segment-nz *( "/" segment )
# path-empty := 0<pchar>
$path_abempty = "(P<Path>(?:\\x2f$segment)*)";
$path_absolute = "(P<Path>\\x2f(?:$segment(?:\\x2f$segment)*)?)";
$path_noscheme = "(P<Path>$setment_nz_nc(?:\\x2f$segment)*)";
$path_rootless = "(P<Path>$segment_nz(?:\\x2f$segment)*)";
$path_empty = "(P<Path>)";
##################################################################################
#
# IP-literal := "[" (IPv6address / IPvFuture) "]"
#
# IPvFuture := "v" 1*HEXDIG "." 1*(unreserved / sub-delims / ":")
#
# IPv6address := 6(h16 ":") ls32
# / "::" 5(h16 ":") ls32
# / [h16] "::" 4(h16 ":") ls32
# / [*1(h16 ":") h16] "::" 3(h16 ":") ls32
# / [*2( h16 ":") h16] "::" 2(h16 ":") ls32
# / [*3(h16 ":") h16] "::" h16 ":" ls32
# / [*4(h16 ":") h16] "::" ls32
# / [*5(h16 ":") h16] "::" h16
# / [*6(h16 ":") h16] "::"
#
# IPv4address := dec-octet "." dec-octet "." dec-octet "." dec-octet
#
# h16 := 1*4HEXDIG
# ls32 := (h16 ":" h16) / IPv4address
# dec-octet := DIGIT ; 0-9
# / %x31-39 DIGIT ; 10-99
# / "1" 2DIGIT ; 100-199
# / "2" %x30-34 DIGIT ; 200-249
# / "25" %x30-35 ; 250-255
$dec_octet = '(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]';
$IPv4Address = "(P<IPv4Address>$dec_octet\\x2e$dec_octet\\x2e$dec_octet\\x2e$dec_octet)";
$h16 = '[0-9A-Fa-f]{1,4}';
$ls32 = "(?:$h16\\x3a$h16|$IPv4Address)";
$IPv6Address = "(P<IPv6Address>"
. "(?:$h16\\x3a){6}$ls32|"
. "\\x3a\\x3a(?:$h16\\x3a){5}$ls32|"
. "(?:$h16)?\\x3a\\x3a(?:$h16\\x3a){5}$ls32|"
. "(?:(?:$h16\\x3a){,1}$h16)?\\x3a\\x3a(?:$h16\\x3a){3}$ls32|"
. "(?:(?:$h16\\x3a){,2}$h16)?\\x3a\\x3a(?:$h16\\x3a){2}$ls32|"
. "(?:(?:$h16\\x3a){,3}$h16)?\\x3a\\x3a(?:$h16\\x3a){1}$ls32|"
. "(?:(?:$h16\\x3a){,4}$h16)?\\x3a\\x3a$ls32|"
. "(?:(?:$h16\\x3a){,5}$h16)?\\x3a\\x3a$h16|"
. "(?:(?:$h16\\x3a){,6}$h16)?\\x3a\\x3a"
. ")";
$IPvFuture = "v[0-9A-Fa-f]+\\x2e(?:$unreserved|$sub_delims|\\x3a)";
$IPvLiteral = "(?:\\x5b$IPv6Address|$IPvFuture\\x5d)";
################################################################################
#
# reg-name := *(unreserved / pct-encoded / sub-delims)
$reg_name = "(?:$unreserved|$pct_encoded|$sub_delims)*";
################################################################################
#
# authority := [userinfo "@"] host [":" port]
# userinfo := *(unreserved / pct-encoded / sub-delims / ":")
# host := IP-literal / IPv4address / reg-name
# port := *DIGIT
$userninfo = "(P<Userinfo>(?:$unreserved|$pct_encoded|$sub_delims|\\x3a)*)";
$host = "(P<Host>$reg_name|$IPvLiteral|$IPv4Address)";
$port = "(P<Port>[0-9]*)";
$authoritry = "(?:$userinfo\\x40)?$host(?:\\x3a$port)?";
################################################################################
#
# scheme := ALPHA *(ALPHA / DIGIT / "+" / "-" / ".")
# hier-part := "//" authority path-abempty
# / path-absolute
# / path-rootless
# / path-empty
#
# relative-part := "//" authority path-abempty
# / path-absolute
# / path-rootless
# / path-empty
# query := *(pchar / "/" / "?")
# fragment := *(pchar / "/" / "?")
$scheme = '(P<Scheme>[A-Za-z][\x2b\x2d\x2e0-9A-Za-z]*)';
$hier_part = "(?:\\x2f\\x2f$authority$path_abemtpy|$path_absolute|$path_rootless|$path_empty)";
$relative_part = "(?:\\x2f\\x2f$authority$path_abemtpy|$path_absolute|$path_noscheme|$path_empty)";
$query = "(P<Query>(?:$pchar|\x2f|\x3f)*)";
$fragment = "(P<Fragment>(?:$pchar|\x2f|\x3f)*)";
################################################################################
#
# URI-reference := URI / relative-ref
#
# URI := scheme ":" hier-part ["?" query] ["#" fragment]
#
# relative-ref := relative-part ["?" query] ["#" fragment]
$URI = "(?:$scheme\\x3a$hier_part(?:\\x3f$query)?(?:\\x23$fragment)?";
$relative_ref = "$relative_part(?:\\x3f$query)?(?:\\x23$fragment)?";
$URI_spec = "(P<URI>:$URI|$relative_ref)";
return $cache = $URI_spec;
}
/**
* Resolves dot segments in a path.
*
* <h3>Introduction</h3>
*
* <p>This function takes a valid url path and nomalizes it into
* the simplest form possible.</p>
*
* <hr>
*
* @throws \BLW\Model\InvalidArgumentException If <code>$Path</code> is not a string or is empty.
*
* @param string $Path Path to normalize.
* @param bool $isRelative Whether path is relative or absolute.
* @return string Normailized path.
*/
public static function removeDotSegments($Path, $isRelative = false)
{
# Not a string or empty
if (is_string($Path)? !empty($Path) : false) {
# Does path start with "/"
$isAbsolute = $Path[0] == '/';
# Does path end with "/"
$isTrailingSlash = $Path[strlen($Path) - 1] == '/';
# Split path into segments
$Segments = array_values(array_filter(explode("/", $Path), function($v) {return !!$v;}));
# Go through parts and resolve dots ("." & "..")
for ($up=0, $i=count($Segments)-1; $i>=0; $i--) {
# Part is a single dot
if ($Segments[$i] == '.') {
# Remove it
unset($Segments[$i]);
}
# Part is a double dot
elseif ($Segments[$i] == '..') {
# Remove it
unset($Segments[$i]);
# Move up
$up++;
}
# Part is a directory / file
else {
# did we move up?
if ($up) {
# Remove it
unset($Segments[$i]);
#move down
$up--;
}
}
}
# Recreate path
if ($isRelative) { $Path = str_repeat('../', $up) . implode('/', $Segments); }
else { $Path = implode('/', $Segments); }
# Check results
if (empty($Path) && !$isAbsolute) $Path = '';
# Restore trailing slash
if (!empty($Path) && $isTrailingSlash) $Path .= '/';
# Restore starting slash
if ($isAbsolute) $Path = '/' . $Path;
# Done
return $Path;
}
# Path is empty or not a string
else throw new InvalidArgumentException(0);
# Done
return '';
}
public static function parse($URI)
{
# Default return value
$return = array(
'Scheme' => ''
,'Userinfo' => ''
,'Host' => ''
,'Port' => ''
,'Path' => ''
,'Query' => ''
,'Fragment' => ''
,'IPv4Address' => ''
,'IPv6Address' => ''
);
# Is URI a string?
if (is_string($URL) ?: is_callable(array($URI, '__toString'))) {
# Run regex
if (preg_match('!^'. self::getRegex() .'$!', $URI, $m)) {
var_dump($m);
}
}
# Invalid URI
else throw new InvalidArgumentException(0);
# Done
return $return;
}
}
$Test = array(
'ftp://ftp.is.co.za/rfc/rfc1808.txt'
,'http://www.ietf.org/rfc/rfc2396.txt'
,'ldap://[2001:db8::7]/c=GB?objectClass?one'
,'mailto:John.Doe@example.com'
,'news:comp.infosystems.www.servers.unix'
,'tel:+1-816-555-1212'
,'telnet://192.0.2.16:80/'
,'urn:oasis:names:specification:docbook:dtd:xml:4.1.2'
);
foreach ($Test as $URI) { URI::parse($URI); }
preferences:
42.35 ms | 402 KiB | 5 Q