<?php
$string = /*$_GET['input'];*/ "Timothy Gentet-O'Brien";
$simpleBuilder = simpleBuilder($string);
$advancedBuilder = advancedBuilder($simpleBuilder);
echo "<script>console.log('advancedBuilder: ' + advancedBuilder);</script>";
echo "\r\n";
echo $advancedBuilder;
// simpleBuilder() takes in an arrray of strings or a comma separated string, this should return
// an Array of basic RegEx's
function simpleBuilder($input) {
// Check to see if it is an array, if it is not then we use .split(',') to convert it to an array.
$input = is_array($input) ? $input : explode(',', $input);
$simpleRegEx = [];
// Iterate over all of the values in the array.
for ($i = 0; $i < count($input); $i++) {
// Set simpleRegEx[$i] to '' to ensure the code doesn't set it to "undefinedXXXX"
$simpleRegEx[$i] = '';
// Run ltrim() and rtrim() on the current string ton ensure we have to extra and unneeded white spaces.
$input[$i] = ltrim(rtrim($input[$i]));
// Iterate over the current string to work out what each character is and assign simpleRegEx[$i] the relevant RegEx character
for ($j = 0; $j < count($input[$i]); $j++) {
if (preg_match('/[ \f\n\r\t\v\u00a0\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]/', $input[$i][$j])) {
$simpleRegEx[$i] += '\\s';
} else if (preg_match('/[0-9]/', $input[$i][$j])) {
$simpleRegEx[$i] += '\\d';
} else if (preg_match('/[A-Za-z0-9_]/', $input[$i][$j])) {
$simpleRegEx[$i] += '\\w';
} else if (preg_match('/[^A-Za-z0-9_]/', $input[$i][$j])) {
$simpleRegEx[$i] += '\\W';
} else if (preg_match('/[^ \f\n\r\t\v\u00a0\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]/', $input[$i][$j])) {
$simpleRegEx[$i] += '\\S';
}
}
}
// Return the completed Array, which should look something like: ['\w\w\w\w\d\d\d','\w\w\w']
return $simpleRegEx;
}
// advancedBuilder() takes the output from simpleBuilder(), which is an Array of RegEx Strings, this should return
// a more Array of complex set of RegEx's
function advancedBuilder($simpleRegEx) {
$prev = $curr = $next = null;
$rcount = 1;
$advancedRegex = [];
// Iterate over the array
for ($t = 0; $t < count($simpleRegEx); $t++) {
// Again set advancedRegex[$t] to '' to ensure the code doesn't set it to "undefinedXXXX"
$advancedRegex[$t] = '';
// Iterate over the current string, check the current, next and previous characters to
// calculate whether or not we should be joining these, if everything if good, it should
// then add the RegEx character '\d' and work out how many times this appears, it should
// then output something like: '\d{4}'.
for ($s = 0; $s < count($simpleRegEx[$t]); $s += 2) {
$curr = $simpleRegEx[$t][$s] + $simpleRegEx[$t][$s + 1];
$next = $simpleRegEx[$t][$s + 2] + $simpleRegEx[$t][$s + 3];
$prev = $simpleRegEx[$t][$s - 2] + $simpleRegEx[$t][$s - 1];
if ($curr == $next) {
if ($prev != $curr) {
$advancedRegex[$t] += $curr;
}
$rcount += 1;
} else {
$advancedRegex[$t] += ($rcount == 1 ? $curr : '{' + $rcount + '}');
$rcount = 1;
}
}
}
// The Array of RegEx's, this should look something like: ["\w{7}\d{3}", "\w{3}"]
// so we use .join('|') to add in an OR operator and add a caret and a dollar symbol
// to show the start and end of the newly formed string to return a full RegEx
$advancedRegex = '^' + join('|', $advancedRegex) + '$';
return $advancedRegex;
}
?>