';
$totalN = 0;
$totalW = 0;
$words = array();
$arr = explode_each(' ', $ff, function($f) {
global $words;
global $totalN;
global $totalW;
$pos = 0;
while (true)
{
$b = preg_match("/\b[a-z\x{00C0}-\x{017F}]+\b/mui", $f, $m, PREG_OFFSET_CAPTURE, $pos);
if (!$b) break;
$w = $m[0][0];
$pos = $m[0][1] + strlen($w);
if (!$words[$w]) { $words[$w] = 0; $totalW++; }
$words[$w]++;
$totalN++;
}
if (!($totalN%1000000)) file_put_contents("total-$totalN-$totalW.txt", '1');
});
file_put_contents("total-$totalN-$totalW-finished.txt", '1');
echo 'mathing took '.timer().'s
';
arsort($words);
echo 'sorting took '.timer().'s
';
echo 'Total words: '.count($words).'
';
$out = '';
foreach ($words as $w=>$n)
{
$out .= "$w\t$n\n";
}
file_put_contents('wiki-words.tab', $out);
?>