'; $totalN = 0; $totalW = 0; $words = array(); $arr = explode_each(' ', $ff, function($f) { global $words; global $totalN; global $totalW; $pos = 0; while (true) { $b = preg_match("/\b[a-z\x{00C0}-\x{017F}]+\b/mui", $f, $m, PREG_OFFSET_CAPTURE, $pos); if (!$b) break; $w = $m[0][0]; $pos = $m[0][1] + strlen($w); if (!$words[$w]) { $words[$w] = 0; $totalW++; } $words[$w]++; $totalN++; } if (!($totalN%1000000)) file_put_contents("total-$totalN-$totalW.txt", '1'); }); file_put_contents("total-$totalN-$totalW-finished.txt", '1'); echo 'mathing took '.timer().'s
'; arsort($words); echo 'sorting took '.timer().'s
'; echo 'Total words: '.count($words).'
'; $out = ''; foreach ($words as $w=>$n) { $out .= "$w\t$n\n"; } file_put_contents('wiki-words.tab', $out); ?>