mysqlの保存用のcharsetをucs2にすると、utf8の場合に比べて速くなる状況があるんじゃないのだろうかとふと思い立ちいろいろとベンチマークとってみた。視点は以下の三つ。
・myisam || innodb
・ucs2 || utf8
・client-prepare || serverside-prepare
mysqlのバージョンはmysql-server-5.0.51a-20(debian lenny)
perl v5.10.0
DBI 1.607
DBD::mysqlは4.0.10にBug #41815のパッチあてたもの。
ベンチコード
insert用
#!/usr/bin/perl use strict; use warnings; use feature qw/say/; use utf8; use DBI; use List::MoreUtils qw/apply/; use Benchmark; my @engines = qw/myisam innodb/; my @charsets = qw/utf8 ucs2/; my $bench_count = 100000; my $insert_str = 'ほげ' x 100; my $client_dsn = 'DBI:mysql:database=sandbox;host=localhost;mysql_read_default_file=/etc/mysql/my.cnf'; my $server_dsn = $client_dsn . ';mysql_server_prepare=1'; my @idpass = qw/sandbox sandbox/; my @init_query = ( 'drop table if exists %table', q{ create table %table( id int unsigned not null auto_increment, name varchar(1024) not null, created datetime not null, primary key(id) ) ENGINE=%engine DEFAULT CHARSET=%charset }, ); my $query = q{insert into %table values(null, ?, now())}; my %target = (); for my $engine ( @engines ) { for my $charset ( @charsets ) { for my $prepare_type ( qw/client server/ ) { my $key = join '_', ($engine, $charset, $prepare_type); $target{$key} = { dbh => DBI->connect( $prepare_type eq 'client' ? $client_dsn : $server_dsn, @idpass, { AutoCommit => 0, }, ), engine => $engine, charset => $charset, table => 'sandbox_' . $key, }; for my $query ( @init_query ) { $target{$key}->{dbh}->do(apply { s/%(table|charset|engine)/$target{$key}->{$1}/g } $query); } $target{$key}->{sth} = $target{$key}->{dbh}->prepare(apply { s/%(table|charset|engine)/$target{$key}->{$1}/g } $query); } } } Benchmark::cmpthese($bench_count,{ map { my $target_sth = $target{$_}->{sth}; $_ => sub { $target_sth->execute($insert_str); } } keys %target }); $_->{dbh}->commit for values %target; $_->{dbh}->disconnect for values %target;
select用
#!/usr/bin/perl use strict; use warnings; use feature qw/say/; use utf8; use DBI; use List::MoreUtils qw/apply/; use Benchmark; my @engines = qw/myisam innodb/; my @charsets = qw/utf8 ucs2/; my $bench_count = 100000; my $client_dsn = 'DBI:mysql:database=sandbox;host=localhost;mysql_read_default_file=/etc/mysql/my.cnf'; my $server_dsn = $client_dsn . ';mysql_server_prepare=1'; my @idpass = qw/sandbox sandbox/; my $query = q{select * from %table where id = ?}; my %target = (); for my $engine ( @engines ) { for my $charset ( @charsets ) { for my $prepare_type ( qw/client server/ ) { my $key = join '_', ($engine, $charset, $prepare_type); $target{$key} = { dbh => DBI->connect( $prepare_type eq 'client' ? $client_dsn : $server_dsn, @idpass ), engine => $engine, charset => $charset, table => 'sandbox_' . $key, }; $target{$key}->{sth} = $target{$key}->{dbh}->prepare(apply { s/%(table|charset|engine)/$target{$key}->{$1}/g } $query); } } } Benchmark::cmpthese($bench_count,{ map { my $target_sth = $target{$_}->{sth}; $_ => sub { $target_sth->execute(150000); $target_sth->fetch; } } keys %target });
/etc/mysql/my.cnfには
[client] default-character-set=utf8 [mysqld] query_cache_size = 0
がかいてある。
で、ベンチ結果を以下にはっつけて少し解釈してみますが、すべての解釈には「今回の環境でこのコードにおいては」という前提付きです。
insert
my $insert_str = ‘ほげ’ x 10;
Rate myisam_utf8_client innodb_ucs2_client myisam_ucs2_client innodb_utf8_client innodb_utf8_server myisam_utf8_server innodb_ucs2_server myisam_ucs2_server myisam_utf8_client 36101/s -- -0% -6% -7% -10% -15% -16% -16% innodb_ucs2_client 36232/s 0% -- -5% -7% -9% -15% -16% -16% myisam_ucs2_client 38314/s 6% 6% -- -2% -4% -10% -11% -11% innodb_utf8_client 38911/s 8% 7% 2% -- -3% -9% -9% -10% innodb_utf8_server 40000/s 11% 10% 4% 3% -- -6% -7% -7% myisam_utf8_server 42553/s 18% 17% 11% 9% 6% -- -1% -1% innodb_ucs2_server 42918/s 19% 18% 12% 10% 7% 1% -- -0% myisam_ucs2_server 43103/s 19% 19% 13% 11% 8% 1% 0% --
my $insert_str = ‘ほげ’ x 100;
Rate innodb_ucs2_client innodb_utf8_client myisam_ucs2_client myisam_utf8_client innodb_utf8_server myisam_utf8_server innodb_ucs2_server myisam_ucs2_server innodb_ucs2_client 29326/s -- -5% -8% -9% -12% -25% -28% -30% innodb_utf8_client 30769/s 5% -- -3% -5% -7% -21% -24% -27% myisam_ucs2_client 31746/s 8% 3% -- -2% -4% -19% -22% -25% myisam_utf8_client 32362/s 10% 5% 2% -- -3% -17% -20% -23% innodb_utf8_server 33223/s 13% 8% 5% 3% -- -15% -18% -21% myisam_utf8_server 39063/s 33% 27% 23% 21% 18% -- -4% -7% innodb_ucs2_server 40486/s 38% 32% 28% 25% 22% 4% -- -4% myisam_ucs2_server 42194/s 44% 37% 33% 30% 27% 8% 4% --
my $insert_str = ‘ほげ’ x 500;
Rate innodb_utf8_client innodb_ucs2_client myisam_utf8_client myisam_ucs2_client innodb_utf8_server innodb_ucs2_server myisam_utf8_server myisam_ucs2_server innodb_utf8_client 14948/s -- -4% -22% -26% -44% -47% -55% -58% innodb_ucs2_client 15552/s 4% -- -19% -23% -42% -45% -53% -56% myisam_utf8_client 19084/s 28% 23% -- -6% -29% -32% -43% -46% myisam_ucs2_client 20202/s 35% 30% 6% -- -25% -28% -39% -43% innodb_utf8_server 26810/s 79% 72% 40% 33% -- -5% -19% -25% innodb_ucs2_server 28249/s 89% 82% 48% 40% 5% -- -15% -21% myisam_utf8_server 33223/s 122% 114% 74% 64% 24% 18% -- -7% myisam_ucs2_server 35587/s 138% 129% 86% 76% 33% 26% 7% --
・myisam || innodb
当然myisamのほうが早い
・ucs2 || utf8
ucs2が若干早い気がするけど誤差の範囲
・client-prepare || serverside-prepare
serversideはデータ量が増えるにつれ明らかにclientに比べ早くなる。
select
上記の’ほげ’ x 500のinsert後に実行
Rate myisam_utf8_client innodb_ucs2_client myisam_utf8_server myisam_ucs2_client innodb_utf8_client myisam_ucs2_server innodb_utf8_server innodb_ucs2_server myisam_utf8_client 25381/s -- -0% -4% -6% -13% -16% -16% -17% innodb_ucs2_client 25381/s 0% -- -4% -6% -13% -16% -16% -17% myisam_utf8_server 26455/s 4% 4% -- -2% -10% -12% -12% -14% myisam_ucs2_client 27027/s 6% 6% 2% -- -8% -11% -11% -12% innodb_utf8_client 29240/s 15% 15% 11% 8% -- -3% -3% -5% myisam_ucs2_server 30211/s 19% 19% 14% 12% 3% -- -0% -2% innodb_utf8_server 30211/s 19% 19% 14% 12% 3% 0% -- -2% innodb_ucs2_server 30675/s 21% 21% 16% 13% 5% 2% 2% --
うーん・・・。たいした差はなし。
思いつくutf8に対するucs2のメリット
・日本語だけで埋め尽くされているフィールドの実容量が約2/3になる(=onメモリ可能なレコード数やページあたりのレコード数が増える)
・バイト数 / 2 = 文字数になる。よって下記のようにchar_lengthをとるものだとかなり早くなる。まぁそんなにうれしいってほどではないけど。
mysql> select max(char_length(name)) from sandbox_innodb_utf8_client; +------------------------+ | max(char_length(name)) | +------------------------+ | 1000 | +------------------------+ 1 row in set (18.02 sec) mysql> select max(char_length(name)) from sandbox_innodb_ucs2_client; +------------------------+ | max(char_length(name)) | +------------------------+ | 1000 | +------------------------+ 1 row in set (12.46 sec) mysql> select max(char_length(name)) from sandbox_myisam_utf8_client; +------------------------+ | max(char_length(name)) | +------------------------+ | 1000 | +------------------------+ 1 row in set (14.99 sec) mysql> select max(char_length(name)) from sandbox_myisam_ucs2_client; +------------------------+ | max(char_length(name)) | +------------------------+ | 1000 | +------------------------+ 1 row in set (7.51 sec)
ucs2のデメリット
・ASCII範囲の容量が2倍になる(myisamのCHAR型とinnodb(ROW_FORMAT=REDUNDANT)のCHAR型を除く)
ぐらい?結果的にucs2に決定的なメリットがあるわけでもないけれど、デメリットもないならばutf8にしてる人はみんなucs2にすればいいよ!とかおもうのだけどどうなんだろう。誰かデメリットが分かる人がいればおしえてください。ただ、mysql6.0.4からはutf8がサロゲートペアの範囲もサポートする(5.xはBMPのみのサポート)ので互換性がなくなるというデメリットは発生しそう。その場合は同様にサポートされたutf32を使うといいんだろうけど容量的なメリットはなくなってしまう。utf16にすると容量的なメリットは維持できるけど、バイト数と文字数の固定関係は得られない。悩みどころ。
クライアントキャラクタセットにucs2が使えないので、utf8からの変換コストが問題になりそうだなぁとおもったけどベンチとってみるとそうでもなかったのが少し意外だった。
番外編:myisamのfixed
myisamはすべてnot nullで固定長のフィールドだと、ROW_FORMATがFIXEDになって早くなる。この話とucs2のバイト数固定性能が関連するんじゃないかと少し探ったけど今のところ関連は見いだせず。
insert
my $insert_str = ‘ほげ’ x 100;
char(255)
Rate innodb_ucs2_client myisam_ucs2_client myisam_utf8_client innodb_utf8_client innodb_utf8_server innodb_ucs2_server myisam_utf8_server myisam_ucs2_server innodb_ucs2_client 29499/s -- -2% -6% -6% -8% -21% -26% -28% myisam_ucs2_client 30211/s 2% -- -4% -4% -6% -19% -24% -27% myisam_utf8_client 31348/s 6% 4% -- -0% -2% -16% -21% -24% innodb_utf8_client 31447/s 7% 4% 0% -- -2% -16% -21% -24% innodb_utf8_server 32051/s 9% 6% 2% 2% -- -14% -19% -22% innodb_ucs2_server 37313/s 26% 24% 19% 19% 16% -- -6% -9% myisam_utf8_server 39683/s 35% 31% 27% 26% 24% 6% -- -4% myisam_ucs2_server 41152/s 40% 36% 31% 31% 28% 10% 4% --
my $insert_str = ‘ほげ’ x 100;
varchar(255)
Rate innodb_ucs2_client myisam_ucs2_client innodb_utf8_client myisam_utf8_client innodb_utf8_server myisam_utf8_server innodb_ucs2_server myisam_ucs2_server innodb_ucs2_client 30395/s -- -8% -12% -13% -19% -21% -24% -25% myisam_ucs2_client 33003/s 9% -- -4% -5% -12% -14% -18% -18% innodb_utf8_client 34483/s 13% 4% -- -1% -8% -10% -14% -15% myisam_utf8_client 34843/s 15% 6% 1% -- -7% -9% -13% -14% innodb_utf8_server 37453/s 23% 13% 9% 7% -- -3% -7% -7% myisam_utf8_server 38462/s 27% 17% 12% 10% 3% -- -4% -5% innodb_ucs2_server 40161/s 32% 22% 16% 15% 7% 4% -- -1% myisam_ucs2_server 40486/s 33% 23% 17% 16% 8% 5% 1% --
select
char(255)
Rate innodb_ucs2_client innodb_utf8_server myisam_utf8_client innodb_utf8_client innodb_ucs2_server myisam_ucs2_client myisam_utf8_server myisam_ucs2_server innodb_ucs2_client 26385/s -- -3% -6% -6% -9% -11% -15% -18% innodb_utf8_server 27174/s 3% -- -3% -3% -7% -8% -13% -15% myisam_utf8_client 28011/s 6% 3% -- -0% -4% -6% -10% -13% innodb_utf8_client 28090/s 6% 3% 0% -- -4% -5% -10% -12% innodb_ucs2_server 29155/s 10% 7% 4% 4% -- -2% -6% -9% myisam_ucs2_client 29674/s 12% 9% 6% 6% 2% -- -4% -7% myisam_utf8_server 31056/s 18% 14% 11% 11% 7% 5% -- -3% myisam_ucs2_server 32051/s 21% 18% 14% 14% 10% 8% 3% --
varchar(255)
Rate myisam_ucs2_server myisam_utf8_server innodb_ucs2_client innodb_ucs2_server myisam_utf8_client innodb_utf8_server myisam_ucs2_client innodb_utf8_client myisam_ucs2_server 27855/s -- -3% -3% -4% -8% -8% -12% -14% myisam_utf8_server 28736/s 3% -- -0% -1% -5% -5% -9% -11% innodb_ucs2_client 28736/s 3% 0% -- -1% -5% -5% -9% -11% innodb_ucs2_server 29070/s 4% 1% 1% -- -4% -4% -8% -10% myisam_utf8_client 30211/s 8% 5% 5% 4% -- -1% -5% -7% innodb_utf8_server 30395/s 9% 6% 6% 5% 1% -- -4% -6% myisam_ucs2_client 31646/s 14% 10% 10% 9% 5% 4% -- -3% innodb_utf8_client 32468/s 17% 13% 13% 12% 7% 7% 3% --
コメント:0
トラックバック:0
- この記事のトラックバック URL
- https://blog.everqueue.com/chiba/2009/01/12/85/trackback/
- トラックバックの送信元リスト
- mysqlの内部キャラセットはucs2にするといいんじゃないだろうか - へぼい日記 より