#!/usr/local/bin/perl -w
#
# eud2mbox.pl, Eudora to mbox mail converter
#
# Copyright (C) 1999 Jonathan J. Miner
# Portions Copyright (C) 1999 Dave Lorand <davel@src.uchicago.edu>
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# eud2mbox.pl,v 1.35 2000/08/08 22:42:05 miner Exp
# Jon Miner <miner@doit.wisc.edu>
# 
# TODO: Use HTML::Parser?  Parse "HTML" messages in to chunks and wrap the
# lines. Possibly do this with all messages? Use Text::Wrap to do the actual
# wrapping.
# 

# Patched by Ward Vandewege (ward@pong.be) on 2001/05/27
# Just a couple of bugfixes:
#		1. Deal with CR/LF style files properly
#		2. Deal with mbox-files that contain some lines in CR/LF format and others in LF format
#		3. Cleared out the parsing of the TOC files with some information found in the toc.pl script by Christopher Russo <crusso@mit.edu>, Feb. 1997
# This version of the eud2mbox.pl script lives online at http://patch.be/eud2mbox/

my $RCSiD = 'eud2mbox.pl,v 1.35 2000/08/08 22:42:05 miner Exp';

use Getopt::Long;
use strict;
use diagnostics;
require 'print_hash.pl';

my $VERSION_MAJ = 1;
my $VERSION_MIN = 2;
my $VERSION_ALPHA = "b";
my $VERSION = "$VERSION_MAJ.$VERSION_MIN".$VERSION_ALPHA;

my $HEADER = "a6 a2 a32 c a61 l l C*";
my $MESSAGE = "lll C C*";

sub get_toc($);
sub get_messages($);

printf("Eud2mbox %s (build %s), Copyright (C) 1999 Jonathan J. Miner\n\n",
       $VERSION, ($RCSiD =~ /,v ([\d.]+)/));

my %opts = ("toc",1);
GetOptions(\%opts,"v","version","help","prefix=s","out=s","toc!","d");

my $verbose = (defined($opts{'v'}) || defined($opts{'d'})) ? 1 : 0;
my $debug = defined($opts{'d'}) ? 1 : 0;

if ( $debug ) {
    print "Options: \n";
    print_hash(\%opts,2);
    print "Args: @ARGV\n";
    print "Debug: $debug\n";
    print "Verbose: $verbose\n";
}

CopyRight(0) if ($opts{'version'});
Error(0) if ($opts{'help'});
Error(-1) if (!defined($ARGV[0]));
Error(-1, "Prefix as argument usage deprecated: See --prefix") 
    if (defined($ARGV[1]));

my $mboxName = defined ($opts{'prefix'}) ? "$opts{prefix}.$ARGV[0]" : $ARGV[0];
my $outMboxName = defined($opts{'prefix'}) ? "$opts{'prefix'}.$ARGV[0]" : 
                                             $ARGV[0];
my $out = "out/$outMboxName";

my @statuses = ("N","RO","RO","RO","RO","N","N","N","RO","N");

if (! -f "$mboxName.mbx" and -f $mboxName) {         # a mac file
    system "perl -015l12pe 5 $mboxName > $mboxName.mbx";
}


if (defined $opts{'out'}) {
    if ( -d $opts{'out'} ) {
        $out = $opts{'out'}."/$outMboxName";
    } else {
        $out = defined($opts{'prefix'}) ? "$opts{'prefix'}.$opts{'out'}" : 
                                          $opts{'out'};
    }
}

open OUTMBOX, ">$out" or Error(-1, "error opening '$out' for writing: $!");

my %toc;
my @messages;

open MBOX, "<$mboxName.mbx" or Error(-1, "error opening '$mboxName.mbx': $!");
if ($opts{'toc'}) {
    open TOC, "<$mboxName.toc" or 
        Error(-1, "error opening '$mboxName.toc': $!");

    binmode TOC; 

    %toc = get_toc($mboxName);
    @messages = get_messages($toc{'messages'});

    close TOC;

} else {
    print "Processing without a TOC file is currently not implemented.\n";
}

close MBOX;

foreach my $message (@messages) {
    my @body = @{$message->{'body'}};
    my @header = @{$message->{'header'}};
    my %toc = %{$message->{'toc'}};

    push @header, "Status: $toc{status}";
    push @header, "X-Status: $toc{'x-status'}"
        if (defined($toc{'x-status'}));

    foreach (@header) {
        print OUTMBOX "$_\n";
    }

# Separate header and body
    print OUTMBOX "\n";

    my $html = 0;
    my $div = 0;
    my $blockquote = 0;
    
    foreach (@body) {
        s/^(>+)([^ >])/$1 $2/g;
        if ($html == 0) {
            if (/^<html>$/) {
                $html = 1;
                next;
            } else {
                s/<\/?x-flowed>//gi;
                print OUTMBOX "$_\n";
            }
        } else {
            $div = 1 if (s/^<DIV>//);
            $div = 0 if (s#^</DIV>##);
            if (s/^<blockquote [^>]*>// || $blockquote) {
                $blockquote = 1;
                $_ = "> $_";
            }
            s/<br>/\n/;
            s/<\/html>/\n/;
            s/&nbsp\;/ /gi;
            s/&quot\;/"/gi;
            s/&amp\;/&/gi;
            $blockquote = 0 if (s#</blockquote>##);

            print OUTMBOX "$_";
        }
    }
}

close OUTMBOX;

sub Error {
    my ($exit, $Error_Message) = @_;

    CopyRight();
    
    print "Error: $Error_Message\n\n" if (defined $Error_Message);

    (undef, my $name) = ($0 =~ /(.*[\\\/])?(.*)/);
    my $namelen = length($name) - 2;
    printf("Usage: %s <mailbox name> [-v] [-d] [--out|-o] [--prefix|-p] ".
           "[--notoc]\n".
           " "x$namelen.
           "                         [--help|-h] [--version]\n", 
           $name);

    print "     <mailbox name> is the name of the mailbox\n";
    print "                    (to which .toc and/or .mbx is appended.)\n";
    print "     [-v] Run in verbose mode.\n";
    print "     [-d] Run in debug mode.\n";
    print "     [-o | --out] <path/filename>\n";
    print "                  Output filename and/or path.\n";
    print "                   o If set to an existing directory, files will be\n";
    print "                     written it to the directory.\n";
    print "                   o If set to a file name (with/without full path)\n";
    print "                     mailbox will be written to that file.\n";
    print "     [-p | --prefix] <prefix string> \n";
    print "                     Prefix to prepend to the output FILE name.\n";
    print "     [--notoc] Ignores the .toc file and just runs over the mbx\n";
    print "               Use with a corrupt or missing toc file\n";
    print "                * For Macintosh Eudora, see \n";
    print "                  \"Use old-style \".toc\" files\" in Misc. Settings.\n";
    exit $exit;
}

sub CopyRight {
    my ($exit) = @_;
    print "This program is free software and may be freely distributed and".
          "modified \n";
    print "under the terms set forth in the GNU General Public License.\n";
    print "This progam is distributed with ABSOLUTELY NO WARRANTY.\n";
    print "See gpl.txt and/or http://www.gnu.org/ for more information.\n\n";
    if (defined $exit) {
        print "\nUse --help for usage information.\n";
        exit $exit;
    }
}

sub get_toc($) {
    my $mboxName = shift;
		my ($buf,$d1,$d2,$nm1,$nm2,@x);
		my $count = read(TOC, $buf, 104);
		($toc{'version'}, $d1, $toc{'mboxname'}, $toc{'mboxtype'}, $d2, $nm1, $nm2, @x) = unpack($HEADER, $buf);

		$toc{'messages'} = $nm1 + $nm2*256;

    if ($verbose) {
        printf "Name: %s\n", $toc{'mboxname'};
        printf "Messages: %d\n", $toc{'messages'};

        if ($debug) {
            printf "version: 0x%lx\n", $toc{'version'};
            printf "Type: %d\n", $toc{'mboxtype'};
        }
        print "------\n";
    }

    %toc;
}

sub get_messages($) {

    my $num_msg = shift;

    for (my $i = 0; $i < $num_msg; $i++) {
        my %message = ();
        my %tempmsg = ();
				my $msgbuf;
			
				seek(TOC, 104+218*$i, 0);
				my $count = read(TOC, $msgbuf, 218);
				my ($msgoffset,$msgsize,$secs,$status,$read,$x) = unpack($MESSAGE,$msgbuf);
				$message{'offset'} = $msgoffset;
				$message{'length'} = $msgsize;
				$message{'date'} = $secs;

        $message{'status'} = $statuses[$status];
        $message{'x-status'} = ($status == 2) ? 'A' : undef;

        if ($verbose) {
            print "Message $i\n";
            printf "Length: %d\n", $message{'length'};
            printf "Date: %d (%s)\n",$message{'date'}, scalar localtime($message{'date'});
            printf("Status: %d (%s) [%s]\n",$status, $message{'status'},
                   defined($message{'x-status'}) ? $message{'x-status'} : "");
            if ($debug) {
                printf "Offset: %d\n", $message{'offset'};
            }
            print "------\n";
        }
        $tempmsg{'toc'} = \%message;
        push @messages, \%tempmsg;
    }

    foreach my $entry (@messages) {
        my $buf = "";
        my $in = "";
        my @lines = ();
        seek MBOX, $entry->{'toc'}->{'offset'}, 0;
        for (my $i = 1; $i <= $entry->{'toc'}->{'length'}; $i++) {
            read MBOX, $in, 1;
            if ((ord $in) == 10) {
                my $chop = chop($buf);	#Throw away last character (probably CR)
								$buf .= $chop if ((ord $chop) != 13);	#But append the character again if it is not a CR!
                push @lines, $buf;
                $buf = "";
            } else {
                $buf .= $in;
            }
        }
        my $body = 0;
        my @body = ();
        my @header = ();
        foreach (@lines) {
            if ($body == 0) {
                if (/^$/) {
                    $body = 1;
                    next;
                }
                push @header, $_;
            } else {
                push @body, $_;
            }
        }
        $entry->{'body'} = \@body;
        $entry->{'header'} = \@header;
    }

    @messages;
}
