Fun with mb strlen/code
Appearance
<?php
mb_internal_encoding('UTF-8');
function old_mb_strlen( $str ) {
preg_match_all( '/(.)/us', $str, $matches );
//return count($matches);
return count($matches[1]);
}
function new_mb_strlen( $str ) {
$counts = count_chars( $str );
$total = 0;
// Count ASCII bytes
for( $i = 0; $i < 0x80; $i++ ) {
$total += $counts[$i];
}
// Count multibyte sequence heads
for( $i = 0xc0; $i < 0xff; $i++ ) {
$total += $counts[$i];
}
return $total;
}
$benchme = array(
'strlen',
'mb_strlen',
'old_mb_strlen',
'new_mb_strlen' );
$testfiles = array(
'washington.txt',
'berlin.txt',
'bulgakov.txt',
'tokyo.txt',
'young.txt' );
$rounds = 5;
foreach( $testfiles as $filename ) {
$data = file_get_contents( $filename );
print "Testing $filename:\n";
foreach( $benchme as $function ) {
$start = microtime( true );
for( $i = 0; $i < $rounds; $i++ ) {
$result = $function( $data );
}
$delta = ((microtime( true ) - $start) / $rounds) * 1000.0;
printf( "%20s %10d chars %8.3fms\n", $function, $result, $delta );
}
print "\n";
}
?>