<?php
function find_words($in) {
// stateful stream functions
$len = strlen($in);
if ($len === 0) {
return array(); // nothing to lex
}
$pos = 0;
$char = $in[0];
$peek = function() use ($in, &$pos, $len) {
if ($pos+1 >= $len) {
return null;
}
return $in[$pos+1];
};
$next = function() use ($peek, &$pos, &$char) {
$char = $peek();
$pos++;
return $char !== null;
};
// stateless helper functions
$upper = function ($c) {
return 'A' <= $c && $c <= 'Z';
};
$lower = function ($c) {
return 'a' <= $c && $c <= 'z';
};
$alpha = function ($c) use ($upper, $lower) {
return $upper($c) || $lower($c);
};
$num = function ($c) {
return '0' <= $c && $c <= '9';
};
$alpha_num = function ($c) use ($alpha, $num) {
return $alpha($c) || $num($c);
};
// lexer states do action and return next lexer state
$out = array();
$find_word = null; // cyclic dependency, declare first and pass by reference to dependents
$upr_word = function() use (&$out, &$char, $next, $peek, $upper, $lower, &$find_word) {
$word = $char;
while ($next() && $upper($char)) {
if ($lower($peek())) { // start of capitalized word (e.g. FOOBar at B)
break;
}
$word .= $char;
}
$out[] = $word;
return $find_word;
};
$std_word = function() use (&$out, &$char, $next, $lower, &$find_word) {
$word = $char;
while ($next() && $lower($char)) {
$word .= $char;
}
$out[] = $word;
return $find_word;
};
$num_word = function() use (&$out, &$char, $next, $num, &$find_word) {
$word = $char;
while ($next() && $num($char)) {
$word .= $char;
}
$out[] = $word;
return $find_word;
};
$find_word = function () use (&$char, $alpha_num, $upper, $lower, $next, $peek, $std_word, $upr_word, $num_word) {
// consume all non-alphanumeric characters
while (!$alpha_num($char)) {
if (!$next()) {
return null; // nothing left
}
}
if ($upper($char)) {
if ($upper($peek())) { // uppercase word
return $upr_word;
}
return $std_word; // capitalized word
}
if ($lower($char)) {
return $std_word; // lowercase word
}
return $num_word; // number
};
// churn through states
$state = $find_word;
while ($state !== null) {
$state = $state();
}
return $out;
}
print_r(array_map(function ($in) {
return array(
'in' => $in,
'out' => find_words($in),
);
}, array(
'FooBar123',
'FooBAR123',
'FOOBar123',
'foo_bar_123_Baz',
'FOO_bar_123BAZ',
'FOO_Bar_123baz',
'foo_BAR_123_baz',
'foo_Bar_123_Baz',
'Foo_bar_123_BAZ',
)));
preferences:
37.49 ms | 402 KiB | 5 Q