#!/usr/bin/env perl

# $Id: dump_tables.pl,v 1.11 2003/01/28 21:02:24 mokrejs Exp $

=head1 NAME

dump_tables.pl - a perl program to dump specific list of databases/tables

=head1 SYNOPSIS

dump_tables.pl --options --arguments

=head1 DESCRIPTION

This script allows you to dump databases from one host and directly
upload the data back to mysql on another host.

You can omit specified tables or databases
to work on. If you do not specify any --db option, a list of databases
will be fetched from source database host (--from-host). If you do not specify any --table ,
list of tables will be fetched for each database from mysql host.


The strong advantage of this program is to selectively include or exclude
some databases or tables which you do or do not want to modify. Hence
we offer --omit-(table|database) and --table and --db arguments.
The program is clever enough to figure out what tables or databases
are present on the system and operate on all of them, if no --db
or --table was specified.

Note that this program will take the advantage of ~/.my.cnf file, so you
do not have to specify username, password, hostname, port on a commandline.

=head1 OPTIONS of dump_tables.pl

--db database name, this option can be specified multiple times on a command-line

--table table name, this option can be specified multiple times on a command-line

--omit-table do not copy this table

--omit-database do not copy this database

--from-host name of sql server, *from* which we dump data

--to-host name of sql server, *to* which we import data

--user sql username for both sql servers

--password sql password for both sql servers

--port sql port for both sql server

--stdin read list of databases from STDIN

--all-on-from-host copy all databases available on source sql host

--missing-on-to-host copy databases missing on target sql host (does NOT check for table missing in databases present on both servers).

--all-on-to-host copy those databases from source host, which are already present on target sql server

--dry-run do not copy anything, just print messages instead. Use for testing.


=head1 OPTIONS PASSED TO mysqldump

--add-locks

--delayed-insert

--add-drop-table

--force


=head1 EXAMPLES

dump_tables.pl --db=db1 --db=db2 --from-host=foo --to-host=bar --dry-run

dump_tables.pl --db=db1 --db=db2 --from-host=foo --to-host=bar --omit-table=table3

cat list.txt | dump_tables.pl --db=db1 --db=db2 --from-host=foo --to-host=bar --omit-table=table3 --stdin

dump_tables.pl --db=db1 --db=db2 --from-host=foo --to-host=bar --add-drop-table --add-locks

dump_tables.pl --missing-on-to-host --from-host=foo --to-host=bar --omit-database=db3

--missing-on-to-host will figure out which *databases* are missing on target host

dump_tables.pl --all-on-from-host --from-host=foo --to-host=bar --omit-database=db3

=head1 AUTHOR

Martin Mokrejs <mmokrejs@natur.cuni.cz> http://www.natur.cuni.cz/~mmokrejs/mysql-tools

=cut


use strict;
use Getopt::Long;
use DBI;

$| = 1;

my $db;
my @db;
my $stdin;
my $table;
my @table;
my $cmd;
my $fromhost;
my $tohost;
my $user;
my $password;
my $port;
my $verbose;
my $force;
my $adddroptable;
my $addlocks;
my $allonfromhost;
my $missingontohost;
my $missingonfromhost;
my $allontohost;
my @omittable;
my @omitdatabase;
my @onfromhost;
my @ontohost;
my $delayedinsert = 0;
my $dryrun = 0;

#
# The username and password is read from $HOME/.my.cnf , but can be overriden here or by --user and --password options
#
$user = "";
$password = "";
$port = "3306";
$verbose = 0;
$force = 0;
$adddroptable = 0;
$stdin = 0 ;


GetOptions ('db=s' => \@db, 'table=s' => \@table, 'from-host=s' => \$fromhost, 'to-host=s' => \$tohost, 'user=s' => \$user, 'password=s' => \$password, 'port=i' => \$port, 'verbose+' => \$verbose, 'force!' => \$force, 'add-drop-table!' => \$adddroptable, 'add-locks!' => \$addlocks, 'stdin!' => \$stdin, 'all-on-from-host!' => \$allonfromhost, 'missing-on-to-host!' => \$missingontohost, 'all-on-to-host!' => \$allontohost, 'omit-table=s' => \@omittable, 'omit-database=s' => \@omitdatabase, 'delayed-insert!' => \$delayedinsert, 'dry-run!' => \$dryrun);

die "Error: No --from-host option specified!\n" if ( !scalar($fromhost) );
die "Error: No --to-host option specified!\n" if ( !scalar($tohost) );
die "Error: Specify either --stdin or --db\n" if ( $stdin == 1 && scalar(@db) != 0 );

my @specifiedtables = @table;

if ( $force == 1 ) {
  $force = "-f";
} elsif ( $force == 0 ) {
  $force = '';
}

# pass to mysqldump --add-drop-table option
if ( $adddroptable == 1 ) {
  $adddroptable = "--add-drop-table"
} elsif ( $adddroptable == 0 ) {
  $adddroptable = "";
}

# pass to mysqldump --add-locks option
if ( $addlocks == 1 ) {
     $addlocks = "--add-locks"
} elsif ( $addlocks == 0 ) {
     $addlocks = "";
}

# pass to mysqldump --delayed-insert
if ( $delayedinsert == 1 ) {
  $delayedinsert = "--delayed-insert";
} else {
  $delayedinsert = "";
}

# prepare list of tables not to be copied per each dump
if ( scalar (@omittable) or scalar (@omitdatabase) ) {
  print "Info: Omitting ", scalar(@omitdatabase), " databases: @omitdatabase\n" if ( $verbose >= 1 );
  print "Info: Omitting ", scalar(@omittable), " tables: @omittable\n" if ( $verbose >= 1 );
  chomp(@omittable) if ( scalar (@omittable) );
  chomp(@omitdatabase) if ( scalar (@omitdatabase) );
}


# we need --from-host to read list of databases
die "Error: No --from-host option specified!\n" if ( !scalar($fromhost) );

# get list of databases on source machine (--from-host)
my @onfromhostdb = ShowDatabasesOrTables("","",$fromhost,$port,$user,$password) or die "Error: Cannot get a list of databases on --from-host $user\@$fromhost:$port\n";

# Get list of databases on target machine (--to-host)
# We will use this information while checking for existence of a given database on target
my @ontohostdb = ShowDatabasesOrTables("","",$tohost,$port,$user,$password) or die "Error: Cannot get a list of databases on --to-host $user\@$fromhost:$port\n";

if ( !@db && $stdin == 1 ) {
  # read list of databases from STDIN as supplied by user
  @db = <STDIN>;
  chomp(@db);
} elsif ( !@db && $stdin == 0 ) {
  # if we should copy only databases missing on either of them or just those on fromhost (as the --db= wasn't specified)
  if ( scalar($missingontohost) and $missingontohost == 1 ) {
    my @missingontohostdb = FindMissingDatabasesOrTables(@onfromhostdb, @ontohostdb);
    @db = @missingontohostdb;
  } else {
    @db = @onfromhostdb;
  }
} else {
  # user has listed databases on a commandline
}

foreach $db ( @db ) {
  chomp($db);
  if ( scalar (@omitdatabase) ) {
    next if ( ExactMatch($db,@omitdatabase) == 1 );
  }

  if ( CheckIfDbExists($fromhost,$port,$db,@onfromhostdb) == 0 ) {
    warn "Warning: *Source* database $db does not exist on $fromhost:$port, nothing to copy! Skipping.\n";
    next;
  }

  if ( CheckIfDbExists($tohost,$port,$db,@ontohostdb) == 0 ) {
    print "Info: Creating database $db on *target* host $tohost:$port.\n" if ($verbose >= 1);
    if ( $dryrun == 0 ) {
      if ( CreateDb($tohost,$port,$db,$user,$password) == 1 ) {
        next;
      }
    }
  }

  @table = @specifiedtables;
  if ( !scalar (@table) ) {
    die "Error: No --fromhost option specified!\n" if ( !scalar($fromhost) );
    @onfromhost = ShowDatabasesOrTables($db, "",$fromhost,$port,$user,$password) or die "Error: Cannot fetch a list of tables in database $db from $user\@$fromhost:$port. Is $db really empty?\n";

    if ( scalar($allontohost) or scalar($missingontohost) ) {
      @ontohost = ShowDatabasesOrTables($db,"",$tohost,$port,$user,$password);
    }

    if ( scalar($missingontohost) ) {
      @table = FindMissingDatabasesOrTables(@onfromhost,@ontohost) or warn "Error: Cannot get a list of tables missing\n";
    } elsif ( scalar($allontohost) ) {
      @table = @ontohost;
    } else {
      @table = @onfromhost;
    }
  }

  if ( scalar(@table) == 0 ) {
     warn "Warning: No tables to be copied within $db\n" if ($verbose >= 1 );
     next;
  }

  if ( scalar(@omittable) or scalar(@omitdatabase) ) {
    @table = RemoveOmittables(\@table,\@omittable);
  }

  print "Info: DB=$db TABLES=", scalar(@table), " OMITTED=", scalar(@omittable), "\n" if ( $verbose >= 2 );
  foreach $table ( @table ) {
    chomp($table);

    $cmd = "mysqldump --quote -C --extended-insert --host=$fromhost $adddroptable $addlocks $force $delayedinsert ";
    $cmd = $cmd . "$db $table | mysql --host=$tohost $db";
    $cmd =~ s/\ +/\ /g;



    if ( $dryrun == 0 ) {
      print "Info: Executing '$cmd' ...";
      if (system($cmd)) {
         print "failed.\n";
	 warn "Error: Failed to dump $db.$table: $!\n";
      } else {
        print "succeeded.\n";
      }
    } else {
      print "DRY-RUN: $cmd\n";
    }

  }
  @table = @specifiedtables;
}

sub ShowDatabasesOrTables {
  my ($db,$table,$hostname,$port,$user,$password) = @_;
  my $db1 = "mysql";
  my $cmd;
  if ( $db ne "" ) {
    $db1 = $db;
    $cmd = $table if ( $table ne "" );
    $cmd = "tables" if ( $table eq "" );
  } elsif ( $db eq "" ) {
    $cmd = "databases";
  }
  my $dbh = DBI->connect("DBI:mysql:database=$db1;mysql_read_default_file=$ENV{HOME}/.my.cnf;host=$hostname;port=$port","$user","$password") || die "\nError: Unable to open initial connection to database $db1 on $hostname: $DBI::errstr";
  my $statement = "show $cmd;";
  my $sth = $dbh->prepare($statement);
  if ( $dbh->errstr ) {
     die "Error: cannot prepare command '$statement': $dbh->errstr\n";
  }
  $sth->execute;
  if ( $dbh->errstr ) {
     $dbh->disconnect;
     die "Error: execute of command '$statement' failed: $statement\n";
  }
  my @row_ary = ();
  my @dbb;
  while (1) {
    @row_ary = $sth->fetchrow_array;
    if ( $dbh->errstr ) {
       warn "Error: fetchrow_array failed after command '$statement' was executed: $dbh->errstr\n";
    }
    last if !scalar @row_ary;
    # push list of databases (one per row in SQL output)
    push(@dbb, $row_ary[0]);
  }
  $dbh->disconnect;
  return @dbb;
}

sub RemoveOmittables {
  my ($table, $omittable) = @_;
  my %unique;
  my @unique;
  foreach ( @$table ) {
    chomp($_);
    $unique{$_}++;
  }
  foreach ( @$omittable ) {
    chomp($_);
    $unique{$_}++;
    $unique{$_}++;
  }
  foreach ( sort keys %unique ) {
    push (@unique, $_) if ( $unique{$_} == 1 );
  }
  @unique = sort @unique;
  return @unique;
}

sub FindMissingDatabasesOrTables {
  my (@fromhost, @tohost) = @_;
  my %all;
  foreach ( @fromhost, @tohost ) {
    $all{$_}++;
  }
  my @unique;
  foreach ( sort keys %all ) {
    push (@unique, $_) if ( $all{$_} == 1 );
  }
  my @tmp;
  if ( scalar($missingonfromhost) and $missingonfromhost == 1 ) {
    my @missingonfromhost;
    foreach my $key ( @unique ) {
      foreach ( @tohost ) {
        push (@missingonfromhost, $_) if ( $key eq $_ );
      }
    }
  @tmp = sort @missingonfromhost;
  return @tmp;
  } elsif ( scalar($missingontohost) and $missingontohost == 1 ) {
    my @missingontohost;
    foreach my $key ( @unique ) {
      foreach ( @fromhost ) {
        push (@missingontohost, $_) if ( $key eq $_ );
      }
    }
  @tmp = sort @missingontohost;
  return @tmp;
  }
}

# because `grep /string/, @array' matches partial strings, we need exact match here
sub ExactMatch {
  my ($string, @array) = @_;
  my $match = 0;
  foreach my $entry ( @array ) {
    $match = $match + 1 if ( $entry eq $string );
  }
  print "String $string matched $match times.\n" if ( $verbose >=3 );
  return $match;
}

# check if database exist in supplied array
sub CheckIfDbExists {
  my ($hostname,$portnumber,$databasename,@array) = @_;
  print "Fetched " . @array . " databasenames from $hostname:$portnumber.\n" if ( $verbose >=2 );

  if ( ExactMatch($databasename,@array) >= 1 ) {
     print "$databasename exists on $hostname:$portnumber.\n" if ( $verbose >=1 );
     return 1;
  } else {
     warn "$databasename does not exist on $hostname:$portnumber.\n" if ( $verbose >=1 );
     return 0;
  }
}

sub CreateDb {
  my ($hostname,$port,$db,$user,$password) = @_;

  my $dbh = DBI->connect("DBI:mysql:;host=$hostname;mysql_read_default_file=$ENV{HOME}/.my.cnf;port=$port","$user","$password",{ RaiseError => 0, AutoCommit => 1, PrintError => 0 });
  if ( $DBI::errstr ) {
    print STDERR "[$user\@$hostname:$port]: ERROR $DBI::errstr\n";
    return 1;
  }

  my $rc;
  $rc = $dbh->func('createdb', $db, $hostname, $user, $password, 'admin');

  if ( $dbh->errstr ) {
    warn "While creating database $db on $hostname we got from sql server $rc = ERROR.\n$dbh->errstr\n";
  } elsif ( ($rc == 1) and ($verbose >=3) ) {
    print "While creating database $db on $hostname we got from sql server $rc = OK.\n";
  }

  $dbh->disconnect;
  return 0;
}
