Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
devel:languages:perl
request-tracker
rt-mysql2pg
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File rt-mysql2pg of Package request-tracker
#!/usr/bin/perl # # http://wiki-archive.bestpractical.com/view/rt-mysql2pg # use strict; use DBI; use Data::Dumper; use DBD::Pg qw(PG_BYTEA); use Encode qw(is_utf8); use Getopt::Long; use MIME::Base64 qw(encode_base64); use MIME::QuotedPrint; my $version='1.0'; my ($verbose, $dryrun); my $tsvector_column = 'trigrams'; sub usage { my ($exitcode) = @_; print STDERR <<EOF; rt-mysql2pg [options] Version: $version Converts Request Tracker database from MySQL to PostgreSQL Do a copy of every record in user tables from a source database to a destination database. A destination schema must exists (same as source schema), so inserts can be done without error. Tables in destination database are deleted before doing a copy! options: -c,--copy copy data, --src-dsn & --dst-dsn must be present --[no]fulltext setup/remove fulltext support, --dst-dsn must be present, can be combined with --copy --src-dsn dsn perl DBI data source name (e.g. dbi:mysql:dbname=rt3) --src-user user perl DBI user name --src-password pass perl DBI password --dst-dsn dsn perl DBI data source name (e.g. dbi:Pg:dbname=rt3) --dst-user user perl DBI user name --dst-password pass perl DBI password -n,--dry-run dry run (no db modifications) -v,--verbose run verbosly (incremental) -h,--help help usage EOF exit($exitcode) if defined $exitcode; } sub user_tables { my ($dbh) = @_; my $sth = $dbh->table_info(undef, undef, undef, 'TABLE'); $sth->execute(); my @user_tables; while ( my $r = $sth->fetchrow_arrayref() ) { my ($table_cat, $table_schem, $table_name, $table_type, $remarks) = @$r; next unless $table_type eq 'TABLE'; next if $dbh->{Driver}->{Name} eq 'Pg' && $table_schem ne 'public'; push @user_tables, $table_name; } return \@user_tables; } sub user_seqs { my ($dbh) = @_; return $dbh->selectcol_arrayref('SELECT sequence_name FROM information_schema.sequences'); } sub column_info { my ($dbh, $table) = @_; my $schema = $dbh->{Driver}->{Name} eq 'Pg' ? 'public' : undef; my $sth = $dbh->column_info(undef, $schema, $table, undef); $sth->execute(); my (@coln, %colt); while ( my $r = $sth->fetchrow_arrayref() ) { my ($table_cat, $table_schem, $table_name, $column_name, $data_type, $type_name, $column_size, $buffer_length, $decimal_digits, $num_prec_radix, $nullable, $remarks, $column_def, $sql_data_type, $sql_datetime_sub, $char_octet_length, $ordinal_position, $is_nullable, $type_name_and_size) = @$r; push @coln, lc($column_name); $colt{lc($column_name)} = { 'data_type' => $data_type, 'type_name' => lc($type_name), 'type_name_and_size' => lc($type_name_and_size), }; } return (\@coln, \%colt); } sub copy_table { my ($table, $dbh_src, $dbh_dst) = @_; my ($scoln, $scolt) = column_info($dbh_src, $table); my ($dcoln, $dcolt) = column_info($dbh_dst, lc($table)); my ($s_nrows) = $dbh_src->selectrow_array("SELECT count(*) FROM $table"); my $ncols = @$dcoln; my $slist = join(',', sort keys %$scolt); my $dlist = join(',', sort keys %$dcolt); if ( $slist ne $dlist ) { die qq|\nerror: columns of "$table" on source and destination differs!\n| .qq|src: $slist\ndst: $dlist\n|; } my $attachments = lc($table) eq 'attachments'; my ($att_cont_idx, $att_contenc_idx, $att_conttype_idx); if ( $attachments ) { for(my $i = 0; $i < @$dcoln; $i++) { $att_cont_idx = $i if $dcoln->[$i] eq 'content'; $att_contenc_idx = $i if $dcoln->[$i] eq 'contentencoding'; $att_conttype_idx = $i if $dcoln->[$i] eq 'contenttype'; } } my $chunked = $scolt->{'id'}{'type_name'} eq 'int' ? 1 : 0; my ($id_min, $id_max) = (0, 1); my ($id_step) = 100; if ( $chunked ) { ($id_min, $id_max) = $dbh_src->selectrow_array("SELECT min(id), max(id) FROM $table"); } my $c_sel = "SELECT " . join(',', @$dcoln) . " FROM $table"; $c_sel .= " WHERE id >= ? AND id <= ?" if $chunked; $verbose < 2 || print "\n\texecuting on source:\n\t$c_sel\n"; my @colattr = map { $dcolt->{$_}{'type_name'} eq 'bytea' ? { pg_type => PG_BYTEA } : undef; } @$dcoln; my $c_ins = "INSERT INTO $table (" . join(',', @$dcoln) . ") VALUES (" . ( join(',', ('?') x @$dcoln) ) . ")"; my $sth_dst = $dbh_dst->prepare($c_ins); $verbose < 2 || print "\n\texecuting on dest:\n\t$c_ins\n\n"; my $sth_src = $dbh_src->prepare($c_sel); my $n = 0; for(my $id_low = $id_min; $id_low <= $id_max; $id_low+=$id_step) { $sth_src->execute($chunked ? ($id_low, $id_low + $id_step -1) : ()); while ( my $ar = $sth_src->fetchrow_arrayref() ) { $verbose < 3 || print Data::Dumper->Dump([$ar], [qw(values)]); unless ( $dryrun ) { if ( $attachments && $ar->[$att_contenc_idx] eq 'none' ) { if ( $ar->[$att_conttype_idx] =~ m/^text\b/ && !is_utf8($ar->[$att_cont_idx], 1) ) { $ar->[$att_contenc_idx] = 'quoted-printable'; $ar->[$att_cont_idx] = encode_qp($ar->[$att_cont_idx]); } elsif ( $ar->[$att_conttype_idx] =~ m/^(application|image)/ || $ar->[$att_cont_idx] =~ m/\x00/ || !is_utf8($ar->[$att_cont_idx], 1) ) { $ar->[$att_contenc_idx] = 'base64'; $ar->[$att_cont_idx] = encode_base64($ar->[$att_cont_idx]); } } for(my $i = 0; $i < $ncols; $i++) { $sth_dst->bind_param($i +1, $ar->[$i], $colattr[$i]); } $sth_dst->execute(); }; $n++; } if ( $verbose ) { my $msg = "; $n/$s_nrows rec"; print($msg, ("\b" x length($msg))); } } $n == $s_nrows || die qq|error: $n rows copied, but source contains $s_nrows!\n|; return $n; } sub copy_everything { my ($dbh_src, $dbh_dst) = @_; my @stabs = @{user_tables($dbh_src)}; my @dtabs = @{user_tables($dbh_dst)}; my @tabs = @stabs; foreach my $t (@dtabs) { push @tabs, $t unless grep(lc($_) eq lc($t), @stabs); } foreach my $table (@tabs) { if ( !grep(lc($_) eq lc($table), @dtabs) ) { warn qq|warn: skipping table "$table" not existing on destination\n|; next; } if ( !grep(lc($_) eq lc($table), @stabs) ) { warn qq|warn: skipping table "$table" not existing on source\n|; next; } $verbose && print "tab: $table: "; $dryrun || $dbh_dst->do("DELETE FROM $table"); $verbose && print "del"; my $n = copy_table($table, $dbh_src, $dbh_dst); $verbose && print "; $n records copied"; $verbose && print "\n"; } my $user_seqs = user_seqs($dbh_dst); foreach my $seq (@$user_seqs) { $verbose && print "seq: $seq: "; my $table = $seq; $table =~ s/_id_s(?:eq)?$//; $dbh_dst->do("SELECT setval('$seq', (SELECT max(id) FROM $table)+1)"); $verbose && print "updated\n"; } } sub rm_fulltext { my ($dbh_dst) = @_; my ($attidx_exists) = $dbh_dst->selectrow_array(q{SELECT * FROM pg_indexes WHERE schemaname='public' AND tablename='attachments' AND indexname='attachments_textsearch'}); if ( $attidx_exists ) { $verbose && print "index attachments_textsearch: "; $dryrun || $dbh_dst->do('DROP INDEX attachments_textsearch'); $verbose && print "dropped\n"; } my ($ocfvidx_exists) = $dbh_dst->selectrow_array(q{SELECT * FROM pg_indexes WHERE schemaname='public' AND tablename='objectcustomfieldvalues' AND indexname='largecontent_textsearch'}); if ( $ocfvidx_exists ) { $verbose && print "index largecontent_textsearch: "; $dryrun || $dbh_dst->do('DROP INDEX largecontent_textsearch'); $verbose && print "dropped\n"; } my ($rel_att_id) = $dbh_dst->selectrow_array( q{SELECT relfilenode FROM pg_class WHERE relname='attachments'}); my ($rel_ocfv_id) = $dbh_dst->selectrow_array( q{SELECT relfilenode FROM pg_class WHERE relname='objectcustomfieldvalues'}); my ($trig_att_exist) = $dbh_dst->selectrow_array(qq{SELECT EXISTS ( SELECT * FROM pg_trigger WHERE tgrelid=$rel_att_id AND tgname='tsvectorupdate')}); if ( $trig_att_exist ) { $verbose && print "trigger tsvectorupdate on attachments: "; $dryrun || $dbh_dst->do('DROP TRIGGER tsvectorupdate ON attachments'); $verbose && print "dropped\n"; } my ($trig_ocfv_exist) = $dbh_dst->selectrow_array(qq{SELECT EXISTS ( SELECT * FROM pg_trigger WHERE tgrelid=$rel_ocfv_id AND tgname='tsvectorupdate')}); if ( $trig_ocfv_exist ) { $verbose && print "trigger tsvectorupdate on objectcustomfieldvalues: "; $dryrun || $dbh_dst->do('DROP TRIGGER tsvectorupdate ON objectcustomfieldvalues'); $verbose && print "dropped\n"; } my ($dcoln, $dcolt) = column_info($dbh_dst, 'attachments'); if ( grep( $_ eq $tsvector_column, @$dcoln ) ) { $verbose && print "column attachments.$tsvector_column: "; $dryrun || $dbh_dst->do("ALTER TABLE attachments DROP COLUMN $tsvector_column"); $verbose && print "dropped\n"; } my ($dcoln, $dcolt) = column_info($dbh_dst, 'objectcustomfieldvalues'); if ( grep( $_ eq $tsvector_column, @$dcoln ) ) { $verbose && print "column objectcustomfieldvalues.$tsvector_column: "; $dryrun || $dbh_dst->do("ALTER TABLE objectcustomfieldvalues DROP COLUMN $tsvector_column"); $verbose && print "dropped\n"; } } sub create_lang { my ($dbh_dst, $lanname) = @_; my ($lang_exists) = $dbh_dst->selectrow_array( qq|SELECT EXISTS (SELECT * FROM pg_language WHERE lanname='$lanname')|); $verbose && print "language $lanname: "; if ( $lang_exists ) { $verbose && print "already exists\n"; } else { $dryrun || $dbh_dst->do("CREATE LANGUAGE $lanname"); $verbose && print "created\n"; } } sub create_func { my ($dbh_dst, $stmt) = @_; $stmt =~ m/^ \s* CREATE \s+ (?: OR \s+ REPLACE \s+ )? FUNCTION \s+ ([^()[:space:]]+) /xi || die qq|create_func: called with strange statement: `$stmt'\n|; my $funcname = $1; $verbose && print "function $funcname: "; $dryrun || $dbh_dst->do($stmt); $verbose && print "redefined\n"; } sub create_trig { my ($dbh_dst, $stmt) = @_; $stmt =~ m/^ \s* CREATE \s+ TRIGGER \s+ (\S+) \s+ (?: BEFORE | AFTER ) \s+ \S+ \s+ (?: OR \s+ \S+ \s+ )* ON \s+ (\S+) /xi || die qq|create_trig: called with strange statement: `$stmt'\n|; my $trig_name = $1; my $tabl_name = $2; $verbose && print "trigger $trig_name on $tabl_name: "; $dryrun || $dbh_dst->do($stmt); $verbose && print "created\n"; } sub create_index { my ($dbh_dst, $stmt) = @_; $stmt =~ m/^ \s* CREATE \s+ INDEX \s+ (\S+) /xi || die qq|create_index: called with strange statement: `$stmt'\n|; my $index_name = $1; $verbose && print "index $index_name: "; $dryrun || $dbh_dst->do($stmt); $verbose && print "created\n"; } sub setup_fulltext { my ($dbh_dst) = @_; my $dbname = $dbh_dst->{'Name'}; $dbname =~ s/.*\bdbname=([^;]+).*/$1/; create_lang($dbh_dst, 'plpgsql'); create_lang($dbh_dst, 'plperl'); create_lang($dbh_dst, 'plperlu'); create_func($dbh_dst, <<'EOF'); CREATE OR REPLACE FUNCTION text_to_trigrams(text) RETURNS text[] AS $$ $_ = lc($_[0]); my %trgms; pos = 0; while ( ! m/\G$/sgc ) { next if m/\G\s+/sgc; next if m/\G\n/sgc; my ($token) = m/\G(\S+)/sgc; for(my $i = 0; $i < length($token) -2; $i++) { $trgms{substr($token, $i, 3)} = 1; } } return [ map { unless ( m/^[[:word:]]+$/ ) { s/\\/\\\\/g; s/'/''/g; $_ = "'$_'"; } $_; } keys %trgms ]; $$ LANGUAGE plperlu IMMUTABLE; EOF create_func($dbh_dst, <<'EOF'); CREATE OR REPLACE FUNCTION array_to_trigrams(text[]) RETURNS text[] AS $$ use utf8; my ($t) = lc($_[0]); my %trgms; my @ta; $t =~ m/^\{/gc || die q|text_to_trigrams: input invalid at | . pos($t) . ': `' . substr($t, pos($t), 16) . q|'\n|; while ( 1 ) { if ( $t =~ m/\G"/gc ) { $t =~ m/\G((?:(?:\\\\)*(?:\\")*[^"\\]*)*)/gc; push @ta, $1; $t =~ m/\G"/gc || die q|text_to_trigrams: input invalid at | . pos($t) . ': `' . substr($t, pos($t), 16) . q|'\n|; } elsif ( $t =~ m/\G([^,}]*)/gc ) { push @ta, $1; } $t =~ m/\G,/gc && next; $t =~ m/\G\}/gc && last; die q|text_to_trigrams: input invalid at | . pos($t) . ': `' . substr($t, pos($t), 16) . q|'\n|; } foreach ( @ta ) { next if $_ eq 'NULL'; s/\\(\d{1,3})/chr(oct($1))/ge; s/\\x([[:xdigit:]]{1,2})/chr(hex($1))/ge; s/\\b/\b/ge; s/\\f/\f/ge; s/\\n/\n/ge; s/\\r/\r/ge; s/\\t/\t/ge; s/\\(.)/$1/ge; for(my $i = 0; $i < length($_) -2; $i++) { $trgms{substr($_, $i, 3)} = 1; } } return [ map { unless ( m/^[[:word:]]+$/ ) { s/\\/\\\\/g; s/'/''/g; $_ = "'$_'"; } $_; } keys %trgms ]; $$ LANGUAGE plperlu IMMUTABLE; EOF create_func($dbh_dst, <<'EOF'); CREATE OR REPLACE FUNCTION array_to_tsvector(text[]) RETURNS tsvector AS $$ SELECT array_to_string($1, ' ')::tsvector; $$ LANGUAGE SQL IMMUTABLE; EOF create_func($dbh_dst, <<'EOF'); CREATE OR REPLACE FUNCTION array_to_tsquery(text[]) RETURNS tsquery AS $$ SELECT array_to_string($1, ' & ')::tsquery; $$ LANGUAGE SQL IMMUTABLE; EOF create_func($dbh_dst, <<'EOF'); CREATE OR REPLACE FUNCTION text_to_trgm_tsvector(text) RETURNS tsvector AS $$ SELECT array_to_tsvector( text_to_trigrams( $1 ) ) $$ LANGUAGE SQL IMMUTABLE; EOF create_func($dbh_dst, <<'EOF'); CREATE OR REPLACE FUNCTION text_to_trgm_tsquery(text) RETURNS tsquery AS $$ SELECT array_to_tsquery( text_to_trigrams( $1 ) ) $$ LANGUAGE SQL IMMUTABLE; EOF create_func($dbh_dst, <<EOF); CREATE OR REPLACE FUNCTION attachments_trigger() RETURNS trigger AS \$\$ BEGIN IF new.contenttype LIKE 'text/%' THEN new.$tsvector_column := text_to_trgm_tsvector( coalesce(new.subject,'') || ' ' || coalesce(new.content,'')); ELSE new.$tsvector_column := text_to_trgm_tsvector( coalesce(new.subject,'') ); END IF; RETURN new; END \$\$ LANGUAGE plpgsql EOF create_func($dbh_dst, <<EOF); CREATE OR REPLACE FUNCTION objectcustomfieldvalues_trigger() RETURNS trigger AS \$\$ BEGIN new.$tsvector_column := text_to_trgm_tsvector(coalesce(new.largecontent, '')); RETURN new; END \$\$ LANGUAGE plpgsql EOF my ($dcoln, $dcolt) = column_info($dbh_dst, 'attachments'); $verbose && print "column attachments.$tsvector_column: "; if ( grep( $_ eq $tsvector_column, @$dcoln ) ) { $verbose && print "already exists\n"; } else { $dryrun || $dbh_dst->do("ALTER TABLE attachments ADD COLUMN $tsvector_column tsvector"); $verbose && print "added\n"; } $verbose && print "column attachments.$tsvector_column: "; $dryrun || $dbh_dst->do(qq{UPDATE attachments SET $tsvector_column = text_to_trgm_tsvector( coalesce(subject,'') || ' ' || coalesce(content,'')) WHERE contenttype LIKE 'text/%'}); $dryrun || $dbh_dst->do(qq{UPDATE attachments SET $tsvector_column = text_to_trgm_tsvector( coalesce(subject,'')) WHERE contenttype NOT LIKE 'text/%'}); $verbose && print "values updated\n"; my ($dcoln, $dcolt) = column_info($dbh_dst, 'objectcustomfieldvalues'); $verbose && print "column objectcustomfieldvalues.$tsvector_column: "; if ( grep( $_ eq $tsvector_column, @$dcoln ) ) { $verbose && print "already exists\n"; } else { $dryrun || $dbh_dst->do("ALTER TABLE objectcustomfieldvalues ADD COLUMN $tsvector_column tsvector"); $verbose && print "added\n"; } $verbose && print "column objectcustomfieldvalues.$tsvector_column: "; $dryrun || $dbh_dst->do(qq{UPDATE objectcustomfieldvalues SET $tsvector_column = text_to_trgm_tsvector( coalesce(largecontent,''))}); $verbose && print "values updated\n"; create_index($dbh_dst, <<EOF); CREATE INDEX attachments_textsearch ON attachments USING GIN($tsvector_column) EOF create_index($dbh_dst, <<EOF); CREATE INDEX largecontent_textsearch ON objectcustomfieldvalues USING GIN($tsvector_column) EOF create_trig($dbh_dst, <<'EOF'); CREATE TRIGGER tsvectorupdate BEFORE INSERT OR UPDATE ON attachments FOR EACH ROW EXECUTE PROCEDURE attachments_trigger() EOF create_trig($dbh_dst, <<'EOF'); CREATE TRIGGER tsvectorupdate BEFORE INSERT OR UPDATE ON objectcustomfieldvalues FOR EACH ROW EXECUTE PROCEDURE objectcustomfieldvalues_trigger() EOF } MAIN: { $| = 1; my ($usage, $copy, $fulltext, $vacuum); my ($dbi_src_dsn, $dbi_src_user, $dbi_src_password); my ($dbi_dst_dsn, $dbi_dst_user, $dbi_dst_password); usage(1) unless Getopt::Long::GetOptions( 'c|copy' => \$copy, 'h|help' => \$usage, 'v|verbose+' => \$verbose, 'n|dry-run' => \$dryrun, 'src-dsn=s' => \$dbi_src_dsn, 'src-user=s' => \$dbi_src_user, 'src-password=s' => \$dbi_src_password, 'dst-dsn=s' => \$dbi_dst_dsn, 'dst-user=s' => \$dbi_dst_user, 'dst-password=s' => \$dbi_dst_password, 'fulltext!' => \$fulltext, 'vacuum' => \$vacuum); usage(0) if $usage; if ( @ARGV == 2 ) { ($dbi_src_dsn, $dbi_dst_dsn) = @ARGV; } elsif ( @ARGV ) { usage(1); } my ($dbh_dst, $dbh_src); if ( $dbi_dst_dsn ) { $dbh_dst = DBI->connect($dbi_dst_dsn, $dbi_dst_user, $dbi_dst_password, { 'RaiseError' => 1, 'AutoCommit' => 1, } ); } if ( $dbi_src_dsn ) { $dbh_src = DBI->connect($dbi_src_dsn, $dbi_src_user, $dbi_src_password, { 'RaiseError' => 1, 'AutoCommit' => 1, }); $dbh_src->{'mysql_enable_utf8'} = 1; $dbh_src->do("SET NAMES utf8"); } $SIG{'__WARN__'} = sub { warn @_ unless $_[0] =~ m/^NOTICE: word is too long to be indexed/; }; SWITCH: { $copy && do { die "error: destination DSN not set! (see usage)\n" unless $dbh_dst; die "error: source DSN not set! (see usage)\n" unless $dbh_src; rm_fulltext($dbh_dst); copy_everything($dbh_src, $dbh_dst); setup_fulltext($dbh_dst) if $fulltext; last SWITCH; }; defined $fulltext && do { die "error: destination DSN not set! (see usage)\n" unless $dbh_dst; if ( $fulltext ) { rm_fulltext($dbh_dst); setup_fulltext($dbh_dst); } else { rm_fulltext($dbh_dst); } last SWITCH; }; warn "Nothing to do; see usage (-h)\n"; } if ( $vacuum ) { $verbose && print "vacuum full: "; $dryrun || $dbh_dst->do("VACUUM FULL"); $verbose && print "done\n"; } $dbh_dst->disconnect() if $dbh_dst; $dbh_src->disconnect() if $dbh_src; }
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor