#! /usr/bin/perl use strict; use warnings; use Benchmark qw(:all); use DBI; use Readonly; use List::Util qw/shuffle/; Readonly my $benchdb => 'bench'; Readonly my $nrecs => 50000; Readonly my $nquery => { # No of queries to run => No of iterations 1 => 50000, 3 => 50000, 10 => 50000, 30 => 20000, 100 => 1000, 300 => 1000, 1000 => 100, 3000 => 100, 10000 => 20, 30000 => 10, 100000 => 5, }; run_bench(); sub dbconnect { my $dbname = shift; my $dbh = DBI->connect("dbi:mysql:", "root", "", { PrintError => 1, RaiseError => 1, AutoCommit => 1, }) or die "Failed to connect: " . $DBI::errstr; $dbh->do("USE $dbname") if $dbname; $dbh->do("SET NAMES utf8"); return $dbh; } sub setup { my $dbh = dbconnect(); $dbh->do("DROP DATABASE IF EXISTS $benchdb"); $dbh->do("CREATE DATABASE $benchdb"); $dbh->do("USE $benchdb"); $dbh->do(q{ CREATE TABLE test1 ( id INT(11) auto_increment, data text character set utf8 collate utf8_bin NOT NULL, PRIMARY KEY (`id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci }); $dbh->begin_work; my $sth = $dbh->prepare_cached(q{ INSERT INTO test1 (data) VALUES (?) }); for (1 .. $nrecs) { $sth->execute("Record #" . $_); } $sth->finish; $dbh->commit; } sub use_single_selects { my ($dbh, $recs) = @_; my $sth = $dbh->prepare_cached(q{ SELECT data FROM test1 WHERE id=? }); for my $id (@$recs) { $sth->execute($id); my @row = $sth->fetchrow_array; $sth->finish; } } sub use_union { my ($dbh, $recs) = @_; my $max_allowed_packet = get_max_allowed_packet($dbh); my $max_open_files = 0.8 * get_open_files_limit($dbh); # guestimate my @data = map { 'SELECT data FROM test1 WHERE id=' . $dbh->quote($_) } @$recs; my $len = 0; my $i = 0; for my $j (0 .. @data - 1) { $len += length($data[$j]) + length(' UNION '); if ($len >= $max_allowed_packet || ($i - $j) >= $max_open_files) { _do_union($dbh, join(' UNION ', @data[$i .. $j - 1] )); $len = 0; $i = $j; } } _do_union($dbh, join(' UNION ', @data[$i .. @data - 1] )); } sub _do_union { my ($dbh, $statement) = @_; my $sth = $dbh->prepare($statement); $sth->execute; while (my @row = $sth->fetchrow_array) { } } sub use_temp_table { my ($dbh, $recs, $with_transaction, $with_index) = @_; $dbh->begin_work if $with_transaction; my $tmp = q{CREATE TEMPORARY TABLE tmp (id INT(11))}; if ($with_index) { $tmp = q{CREATE TEMPORARY TABLE tmp (id INT(11), KEY id_key (`id`))}; } my $sth = $dbh->prepare_cached($tmp); $sth->execute; $sth = $dbh->prepare_cached(q{ INSERT INTO tmp (id) VALUES (?) }); for my $id (@$recs) { $sth->execute($id); } $sth->finish; $dbh->commit if $with_transaction; $sth = $dbh->prepare_cached(q{ SELECT id, data FROM test1 JOIN tmp USING (id) }); $sth->execute; while (my @row = $sth->fetchrow_array) { } $dbh->do(q{DROP TABLE tmp}); } sub use_temp_table_transaction { my ($dbh, $recs) = @_; use_temp_table($dbh, $recs, 1); } sub use_temp_table_index { my ($dbh, $recs) = @_; use_temp_table($dbh, $recs, 0, 1); } sub use_array_insert { my ($dbh, $recs, $with_transaction) = @_; $dbh->begin_work if $with_transaction; my $sth = $dbh->prepare_cached(q{ CREATE TEMPORARY TABLE tmp (id INT(11)) }); $sth->execute; $sth = $dbh->prepare_cached(q{ INSERT INTO tmp (id) VALUES (?) }); $sth->execute_array({ ArrayTupleStatus => \my @tuple_status }, $recs); $dbh->commit if $with_transaction; $sth = $dbh->prepare_cached(q{ SELECT id, data FROM test1 JOIN tmp USING (id) }); $sth->execute; while (my @row = $sth->fetchrow_array) { } $dbh->do(q{DROP TABLE tmp}); } sub use_array_insert_transaction { my ($dbh, $recs) = @_; use_array_insert($dbh, $recs, 1); } sub use_long_insert { my ($dbh, $recs) = @_; my $max_allowed_packet = get_max_allowed_packet($dbh); my $sth = $dbh->prepare_cached(q{ CREATE TEMPORARY TABLE tmp (id INT(11)) }); $sth->execute; my $statement = 'INSERT INTO tmp (id) VALUES '; my @data = map { '(' . $dbh->quote($_) . ')' } @$recs; my $len = length($statement); my $i = 0; for my $j (0 .. @data - 1) { $len += length($data[$j]) + 1; # +1 for comma if ($len >= $max_allowed_packet) { _do_insert($dbh, $statement, [ @data[$i .. $j - 1] ]); $len = length($statement); # reset $i = $j; } } _do_insert($dbh, $statement, [ @data[$i .. @data - 1] ]); $sth = $dbh->prepare_cached(q{ SELECT id, data FROM test1 JOIN tmp USING (id) }); $sth->execute; while (my @row = $sth->fetchrow_array) { } $dbh->do(q{DROP TABLE tmp}); } sub _do_insert { my ($dbh, $statement, $recs) = @_; my $sth = $dbh->prepare($statement . join(",", @$recs)); $sth->execute(); } sub use_in { my ($dbh, $recs) = @_; my $max_allowed_packet = get_max_allowed_packet($dbh); # Haven't re-coded for max_allowed_packet size as it's only an issue # for very large queries, where it ends up representing a small percentage of the time. my $sth = $dbh->prepare_cached( 'SELECT id, data FROM test1 WHERE id IN (' . join(',', map { $dbh->quote($_) } @$recs) . ')' ); $sth->execute; while (my @row = $sth->fetchrow_array) { } } sub use_or { my ($dbh, $recs) = @_; # Haven't re-coded for max_allowed_packet size as it's only an issue # for very large queries, where it ends up representing a small percentage of the time. my $sth = $dbh->prepare_cached( 'SELECT id, data FROM test1 WHERE ' . join(' OR ', map { 'id=' . $dbh->quote($_) } @$recs) ); $sth->execute; while (my @row = $sth->fetchrow_array) { } } sub get_variable { my $dbh = shift; my $varname = shift; my $sth = $dbh->prepare_cached(qq{ SHOW VARIABLES LIKE '$varname' }); $sth->execute; my ($name, $value) = $sth->fetchrow_array; $sth->finish; return $value; } sub get_max_allowed_packet { my $dbh = shift; return get_variable($dbh, 'max_allowed_packet'); } sub get_open_files_limit { my $dbh = shift; return get_variable($dbh, 'open_files_limit'); } sub run_bench { setup(); my $dbh = dbconnect($benchdb); for my $n (sort { $a <=> $b } keys %$nquery) { my @recs = shuffle(1 .. $n); print "\n\nRunning benchmark over $n queries, $nquery->{$n} iterations\n"; timethese($nquery->{$n}, { 'Single Selects' => sub { use_single_selects($dbh, \@recs) }, 'Temp Table' => sub { use_temp_table($dbh, \@recs) }, 'Temp Table + Transaction' => sub { use_temp_table_transaction($dbh, \@recs) }, 'Temp Table + Index' => sub { use_temp_table_index($dbh, \@recs) }, 'Temp Table + Array Insert' => sub { use_array_insert($dbh, \@recs) }, 'Temp Table + Array Insert + Transaction' => sub { use_array_insert_transaction($dbh, \@recs) }, 'Temp Table + Long Insert' => sub { use_long_insert($dbh, \@recs) }, 'Select + IN(...)' => sub { use_in($dbh, \@recs) }, 'Select + OR' => sub { use_or($dbh, \@recs) }, # This crashes at 10000 # 'Select Union' => sub { use_union($dbh, \@recs) }, } ); } } =head1 AUTHOR Jon Schutz =head1 COPYRIGHT & LICENSE Copyright 2008 Jon Schutz, all rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License. =cut