- 2009/1/15 木曜日 2:22:59
- javascript | mysql | perl | vimperator
前回のエントリで書いたように、ucs2をcharsetにするとascii部分がutf8に比べ2倍になってしまう。これを一部解決するために、asciiのみで構成されることが保障されているフィールドはそのフィールドだけasciiにしてしまえばいい。そうすることによって、asciiのみで構成されたフィールドをkeyに検索といった場合でもutf8に比べ遜色のないパフォーマンスを得ることができる。
まずは、ベンチコードを例の如くはりつけ。環境等は前エントリと同一。
ベンチコード
insert用
#!/usr/bin/perl use strict; use warnings; use feature qw/say/; use utf8; use DBI; use Encode; use List::MoreUtils qw/apply/; use Benchmark; use Digest::MD5; my @engines = qw/myisam innodb/; my @charsets = qw/utf8 ucs2/; my $bench_count = 100000; my $client_dsn = 'DBI:mysql:database=sandbox;host=localhost;mysql_read_default_file=/etc/mysql/my.cnf'; my $server_dsn = $client_dsn . ';mysql_server_prepare=1'; my @idpass = qw/sandbox sandbox/; my @init_query = ( 'drop table if exists %table', q{ create table %table( id int unsigned not null auto_increment, name char(3) not null, b64digest char(22) %field_charset not null, created datetime not null, primary key(id), index(b64digest) ) ENGINE=%engine ROW_FORMAT=%row_format DEFAULT CHARSET=%charset }, ); my $query = Encode::encode('utf8', q{insert into %table values(null, 'たなか', ?, now())}); my %target = (); for my $engine ( @engines ) { for my $charset ( @charsets ) { for my $field_charset ( 'ascii', '' ) { for my $row_format ( 'DEFAULT', 'REDUNDANT') { if ( $engine eq 'myisam' && $row_format ne 'DEFAULT' ) { next; } my $key = join '_', ($engine, $charset, $field_charset || 'non', $row_format); $target{$key} = { dbh => DBI->connect( $server_dsn, @idpass, { AutoCommit => 0, }, ), engine => $engine, charset => $charset, field_charset => $field_charset, row_format => $row_format, table => 'sandbox_' . $key, }; for my $query ( @init_query ) { $target{$key}->{dbh}->do(apply { s/%(table|charset|engine|field_charset|row_format)/$target{$key}->{$1}/g } $query); } $target{$key}->{sth} = $target{$key}->{dbh}->prepare(apply { s/%(table|charset|engine)/$target{$key}->{$1}/g } $query); } } } } my $md5 = Digest::MD5->new(); Benchmark::cmpthese($bench_count,{ map { my $target_sth = $target{$_}->{sth}; $_ => sub { $md5->reset; $md5->add(int(rand(10000))); $target_sth->execute($md5->b64digest); } } keys %target }); $_->{dbh}->commit for values %target; $_->{dbh}->disconnect for values %target;
select用
#!/usr/bin/perl use strict; use warnings; use feature qw/say/; use utf8; use DBI; use List::MoreUtils qw/apply/; use Benchmark; use Digest::MD5; my @engines = qw/myisam innodb/; my @charsets = qw/utf8 ucs2/; my $bench_count = 30000; my $client_dsn = 'DBI:mysql:database=sandbox;host=localhost;mysql_read_default_file=/etc/mysql/my.cnf'; my $server_dsn = $client_dsn . ';mysql_server_prepare=1'; my @idpass = qw/sandbox sandbox/; my $query = q{select * from %table where b64digest = ?}; my %target = (); for my $engine ( @engines ) { for my $charset ( @charsets ) { for my $field_charset ( 'ascii', '' ) { for my $row_format ( 'DEFAULT', 'REDUNDANT') { if ( $engine eq 'myisam' && $row_format ne 'DEFAULT' ) { next; } my $key = join '_', ($engine, $charset, $field_charset || 'non', $row_format); $target{$key} = { dbh => DBI->connect( $server_dsn, @idpass, ), engine => $engine, charset => $charset, field_charset => $field_charset, row_format => $row_format, table => 'sandbox_' . $key, }; $target{$key}->{sth} = $target{$key}->{dbh}->prepare(apply { s/%(table|charset|engine)/$target{$key}->{$1}/g } $query); } } } } my $md5 = Digest::MD5->new(); Benchmark::cmpthese($bench_count,{ map { my $target_sth = $target{$_}->{sth}; $_ => sub { $md5->reset; $md5->add(int(rand(10000))); $target_sth->execute($md5->b64digest); $target_sth->fetch; } } keys %target });
結果
b64digest char(22)
insert
Rate innodb_utf8_ascii_DEFAULT innodb_ucs2_non_DEFAULT myisam_ucs2_ascii_DEFAULT innodb_utf8_non_REDUNDANT innodb_ucs2_non_REDUNDANT innodb_ucs2_ascii_DEFAULT myisam_utf8_non_DEFAULT myisam_ucs2_non_DEFAULT myisam_utf8_ascii_DEFAULT innodb_utf8_ascii_REDUNDANT innodb_ucs2_ascii_REDUNDANT innodb_utf8_non_DEFAULT innodb_utf8_ascii_DEFAULT 32895/s -- -1% -2% -2% -2% -8% -10% -10% -13% -14% -14% -15% innodb_ucs2_non_DEFAULT 33113/s 1% -- -1% -1% -1% -8% -9% -10% -12% -13% -13% -15% myisam_ucs2_ascii_DEFAULT 33445/s 2% 1% -- -0% -0% -7% -8% -9% -11% -12% -12% -14% innodb_utf8_non_REDUNDANT 33445/s 2% 1% 0% -- -0% -7% -8% -9% -11% -12% -12% -14% innodb_ucs2_non_REDUNDANT 33557/s 2% 1% 0% 0% -- -6% -8% -8% -11% -12% -12% -13% innodb_ucs2_ascii_DEFAULT 35842/s 9% 8% 7% 7% 7% -- -1% -2% -5% -6% -6% -8% myisam_utf8_non_DEFAULT 36364/s 11% 10% 9% 9% 8% 1% -- -1% -4% -5% -5% -6% myisam_ucs2_non_DEFAULT 36630/s 11% 11% 10% 10% 9% 2% 1% -- -3% -4% -4% -5% myisam_utf8_ascii_DEFAULT 37736/s 15% 14% 13% 13% 12% 5% 4% 3% -- -1% -1% -3% innodb_utf8_ascii_REDUNDANT 38168/s 16% 15% 14% 14% 14% 6% 5% 4% 1% -- -0% -2% innodb_ucs2_ascii_REDUNDANT 38168/s 16% 15% 14% 14% 14% 6% 5% 4% 1% 0% -- -2% innodb_utf8_non_DEFAULT 38760/s 18% 17% 16% 16% 16% 8% 7% 6% 3% 2% 2% --
まーinsertはたいして差なし。変換コストなんかも気にしないでいいかなぁ?
select
innodb_utf8_non_REDUNDANT 8333/s -- -11% -19% -32% -35% -35% -38% -40% -59% -64% -65% -65% innodb_ucs2_non_REDUNDANT 9317/s 12% -- -10% -24% -27% -28% -30% -33% -55% -60% -61% -61% innodb_ucs2_non_DEFAULT 10309/s 24% 11% -- -16% -20% -20% -23% -26% -50% -55% -57% -57% innodb_ucs2_ascii_DEFAULT 12295/s 48% 32% 19% -- -4% -5% -8% -12% -40% -47% -48% -48% innodb_ucs2_ascii_REDUNDANT 12821/s 54% 38% 24% 4% -- -0% -4% -8% -38% -44% -46% -46% innodb_utf8_non_DEFAULT 12876/s 55% 38% 25% 5% 0% -- -4% -8% -37% -44% -46% -46% innodb_utf8_ascii_DEFAULT 13393/s 61% 44% 30% 9% 4% 4% -- -4% -35% -42% -44% -44% innodb_utf8_ascii_REDUNDANT 13953/s 67% 50% 35% 13% 9% 8% 4% -- -32% -40% -41% -41% myisam_utf8_non_DEFAULT 20548/s 147% 121% 99% 67% 60% 60% 53% 47% -- -11% -14% -14% myisam_ucs2_non_DEFAULT 23077/s 177% 148% 124% 88% 80% 79% 72% 65% 12% -- -3% -3% myisam_ucs2_ascii_DEFAULT 23810/s 186% 156% 131% 94% 86% 85% 78% 71% 16% 3% -- -0% myisam_utf8_ascii_DEFAULT 23810/s 186% 156% 131% 94% 86% 85% 78% 71% 16% 3% 0% --
5.x以降のinnodbのデフォルトROW_FORMATはCOMPACTなんですが、やはりutf8のchar型の場合においてREDUNDANTに比べうまいこと動いていることがわかるベンチ結果ですね。でもって問題のasciiですが基本常にascii指定のほうがパフォーマンスが上になりましたね。
b64digest varchar(22)
フィールドをvarcharにしてみる。
insert
Rate innodb_ucs2_non_REDUNDANT innodb_ucs2_ascii_DEFAULT innodb_ucs2_non_DEFAULT myisam_ucs2_ascii_DEFAULT innodb_utf8_ascii_DEFAULT myisam_utf8_non_DEFAULT myisam_ucs2_non_DEFAULT innodb_utf8_ascii_REDUNDANT innodb_utf8_non_REDUNDANT innodb_utf8_non_DEFAULT myisam_utf8_ascii_DEFAULT innodb_ucs2_ascii_REDUNDANT innodb_ucs2_non_REDUNDANT 34247/s -- -3% -5% -5% -6% -7% -8% -9% -9% -9% -10% -11% innodb_ucs2_ascii_DEFAULT 35336/s 3% -- -2% -2% -3% -4% -5% -6% -6% -6% -7% -8% innodb_ucs2_non_DEFAULT 35971/s 5% 2% -- -1% -1% -3% -4% -4% -4% -5% -5% -6% myisam_ucs2_ascii_DEFAULT 36232/s 6% 3% 1% -- -0% -2% -3% -4% -4% -4% -5% -5% innodb_utf8_ascii_DEFAULT 36364/s 6% 3% 1% 0% -- -1% -3% -3% -3% -4% -4% -5% myisam_utf8_non_DEFAULT 36900/s 8% 4% 3% 2% 1% -- -1% -2% -2% -2% -3% -4% myisam_ucs2_non_DEFAULT 37313/s 9% 6% 4% 3% 3% 1% -- -1% -1% -1% -2% -3% innodb_utf8_ascii_REDUNDANT 37594/s 10% 6% 5% 4% 3% 2% 1% -- -0% -0% -1% -2% innodb_utf8_non_REDUNDANT 37594/s 10% 6% 5% 4% 3% 2% 1% 0% -- -0% -1% -2% innodb_utf8_non_DEFAULT 37736/s 10% 7% 5% 4% 4% 2% 1% 0% 0% -- -1% -2% myisam_utf8_ascii_DEFAULT 38023/s 11% 8% 6% 5% 5% 3% 2% 1% 1% 1% -- -1% innodb_ucs2_ascii_REDUNDANT 38314/s 12% 8% 7% 6% 5% 4% 3% 2% 2% 2% 1% --
ほぼ変化なし
select
Rate innodb_ucs2_non_REDUNDANT innodb_ucs2_non_DEFAULT innodb_ucs2_ascii_REDUNDANT innodb_utf8_ascii_REDUNDANT innodb_utf8_ascii_DEFAULT innodb_utf8_non_DEFAULT innodb_ucs2_ascii_DEFAULT innodb_utf8_non_REDUNDANT myisam_utf8_non_DEFAULT myisam_ucs2_non_DEFAULT myisam_utf8_ascii_DEFAULT myisam_ucs2_ascii_DEFAULT innodb_ucs2_non_REDUNDANT 8902/s -- -3% -24% -25% -27% -27% -28% -32% -58% -60% -60% -62% innodb_ucs2_non_DEFAULT 9202/s 3% -- -21% -23% -24% -25% -25% -30% -57% -58% -59% -60% innodb_utf8_non_REDUNDANT 11719/s 32% 27% -- -2% -4% -4% -5% -11% -45% -47% -47% -50% innodb_utf8_ascii_REDUNDANT 11905/s 34% 29% 2% -- -2% -2% -4% -9% -44% -46% -46% -49% innodb_ucs2_ascii_DEFAULT 12146/s 36% 32% 4% 2% -- -0% -2% -7% -43% -45% -45% -48% innodb_ucs2_ascii_REDUNDANT 12195/s 37% 33% 4% 2% 0% -- -1% -7% -43% -45% -45% -48% innodb_utf8_ascii_DEFAULT 12346/s 39% 34% 5% 4% 2% 1% -- -6% -42% -44% -44% -47% innodb_utf8_non_DEFAULT 13100/s 47% 42% 12% 10% 8% 7% 6% -- -39% -41% -41% -44% myisam_utf8_ascii_DEFAULT 21429/s 141% 133% 83% 80% 76% 76% 74% 64% -- -3% -4% -8% myisam_utf8_non_DEFAULT 22059/s 148% 140% 88% 85% 82% 81% 79% 68% 3% -- -1% -5% myisam_ucs2_ascii_DEFAULT 22222/s 150% 141% 90% 87% 83% 82% 80% 70% 4% 1% -- -4% myisam_ucs2_non_DEFAULT 23256/s 161% 153% 98% 95% 91% 91% 88% 78% 9% 5% 5% --
varcharだとutf8のままが若干早い。変換コストかな。
まとめ
innodb (defaultが)utf8 varchar ROW_FORMAT=COMPACT以外の組み合わせの場合は、できるだけasciiを指定しておくといいかんじになりそう。
参考
[mysql 14237] フィールドごとのキャラクタセットの異なる指定
コメント:0
トラックバック:0
- この記事のトラックバック URL
- https://blog.everqueue.com/chiba/2009/01/15/108/trackback/
- トラックバックの送信元リスト
- mysqlでcharsetをucs2にした場合のasciiのみのフィールド - へぼい日記 より