#!/usr/bin/perl -w

# This script read on STDIN a list of email addresses, removes leading
# and trailing spaces, removes comments in single and double quotes,
# splits lines containing several email addresses separated by comma
# into multiple lines.
#
# Run this program several times on any mailbox you wish, as the email
# addresses are *appended* tho the logfile. Then, use once command like:
# perl extract-emails.pl < extract-emails.log > emails.txt
# to get them out.
#
# Written by: <mmokrejs@natur.cuni.cz>
#
my $line;
my $newline;
my %addr;

while (<STDIN>) {
  $line = $_;
  if ( $line =~ m/,/gm ) {
    chomp($line);
    @lines = split(/,/,$line);
    foreach $line (@lines) {
      next if ( not $line =~ m/@/ );
      chomp($line);
      cleanup(\$line);
    }
  } else {
    chomp($line);
    cleanup(\$line);
  }
}

foreach (keys %addr) {
  # print unique list of email addresses
  if ( m/[\s\t]/ ) {
    warn "Not a single word, skipped: \`$_\'\n";
  } elsif ( m/;/ ) {
    warn "Weird address, skipped: \`$_\'\n";
  } elsif ( m/\// ) {
    warn "Weird address, skipped: \`$_\'\n";
  } else {
    print "$_\n";
  }
}


sub cleanup {
  my ($line) = @_;
  #print "OLD: $$line\n";
  $$line =~ s/\([^)]*\)//g;
  if ( $$line =~ m/</ ) {
    $$line =~ s/^[^<]*//;
    $$line =~ s/[^>]*$//;
  }
  if ( $$line =~ m/</ ) {
    $$line =~ s/^<//;
    $$line =~ s/>$//;
  }
  $$line =~ s/"[^"]*"//g;
  $$line =~ s/'[^']*'//g;
  $$line =~ s/\r\n/\n/g;
  $$line =~ s/,/\n/g;
  $$line =~ s/[\s\t]*$//g;
  $$line =~ s/^[\s\t]*//g;
  # print the result
  $$line =~ tr/[A-Z]/[a-z]/;
  if ( ( not m/recipient\ list\ suppressed/ ) and ( $$line ne "" ) ) {
    # store addresses in a list so that we make them effectivey unique
    $addr{$$line} = 1;
  }
}
