#!/bin/sh -u
#    $0 ...list of mboxes ...
# -Ian! D. Allen - idallen@idallen.ca - www.idallen.com

# SAFECAT=' safecat tmp new '
# rm -f $OUT/* tmp/* new/*
# rm idcache

OUT=out

if [ -t 2 -a -s idcache ] ; then
    while : ; do
	echo 1>&2 -n "$0: idcache exists - keep ? [Yn] "
	read ans </dev/tty || exit $?
	case "$ans" in
	"" | y | Y | yes | YES ) break ;;
	n | N | no | NO )
	    echo 1>&2 "$0: Removing idcache and $OUT/* ..."
	    if [ -e $OUT ] ; then
		(
		    cd $OUT || exit $?
		    e=$( find . -maxdepth 0 -empty )
		    if [ ${#e} = 0 ] ; then
			mkdir -p .old$$ || exit $?
			mv * .old$$/
			rm -rf .old$$ &
		    fi
		) || exit $?
	    fi
	    rm -rf idcache mutt.cache
	    break
	    ;;
	esac
    done
fi
mkdir -p $OUT || exit $?

### SEP=$( echo "\237\227\217\377 " )  # something very unlikely
SEP='X-IDALLEN-Envl-Frm'  # something very unlikely
MAXPERDIR=10000 # this many files, max, per output directory
MAXMSGSPERFILE=10000 # this many msgs, max, per output file
FORMAIL=" formail -D 2000000 idcache -c -z -p '' -R 'From ' '$SEP: ' -s "

## MAXMSGSPERFILE=1000 # this many msgs, max, per output file
## echo "DEBUG using smaller MAXMSGSPERFILE of $MAXMSGSPERFILE"

# pass the file name and date through to Perl using a fake first msg
for f do
    datesec=$( stat -c '%Y' "$f" ) || exit !?
    basename=$( basename "$f" )
    echo "$SEP: $SEP: $basename $datesec"
    eval $FORMAIL <$f
done | /usr/bin/time perl -we "
    use strict;
    use warnings;
    use integer;
    use Date::Parse;
    use IO::Handle;
    use POSIX qw(strftime);
    use Fcntl qw( :seek );

    my \$SEP = '$SEP';
    my \$MAXPERDIR = $MAXPERDIR;
    my \$MAXMSGPERFILE = $MAXMSGSPERFILE;
    my \$FORMAIL = \"$FORMAIL\";
    my \$OUT =\"$OUT\";
"'
    my $sep = lc $SEP;
    my $SEPHDR = "$SEP: ";
    my $SEPLEN = length($SEPHDR);
    my $file;
    my $auxinput;
    my $filename;
    my $prevfilename;
    my $prevdatesec;
    my %headers = ();
    my $in_headers = 0;
    my $headerlinecount = 0;
    my $bodylinecount = 0;
    my $prevbodylinecount = 0;
    my $msgcount = 0;
    my $filemsgcount = 0;
    my $skipmsg = 0;
    my $fromsec; # from From_ line
    my $fromstr; # from From_ line
    my $datesec; # from Date: line
    my $recvsec; # from Received: lines
    my $fixedformattell;
    my $prevfixedformattell;
    my $starthdrtell;
    my $prevstarthdrtell;
    my $startbodytell;
    my $prevstartbodytell;
    my $contentlength;
    my $internalfromtell;
    my $internalbodylinecount;
    my $lines; # from Lines: header
    my $prevlines;
    my $FILENAME;
    my $NOW = time;
    my $FIXEDFORMAT = "Lines: %-9d\nContent-Length: %-9d\n";
    # 2,000,000 messages into 200 mailboxes means 10,000 msgs per box
    ## my $MAXFILESIZE = 1024 * 1024; # not used
    # dates for "idallen" domains
    my $D1980 = str2time("1 jan 1980"); # @cgl.uwaterloo.ca to Feb 19 1993
    my $D1993 = str2time("19 feb 1993"); # @ncf.ca (end of CGL)
    my $D2002 = str2time("2002/05/29"); # @idallen.ca
    my $DATESEC;
    my $DATESTR;
    my @tmplist = ( $sep,
	qw( from to apparently-to resent-to cc bcc date ) );
    my %want;
    foreach my $k ( @tmplist ) {
	$want{$k} = undef;  # only the key has to exist
    }
    @tmplist = ( qw(
	Action Mailer Organization Personal_Name
	Precedence Status Subject Thread-Topic To
	X-ASG-Orig-Subj X-FreePort-Flags X-HELO-From X-Keywords
	X-MS-Has-Attach X-MS-TNEF-Correlator X-Mailer
	X-Originating-IP X-PMFLAGS X-Spam-Level X-UIDL
	) );
    my %reject;
    foreach my $k ( @tmplist ) {
	$reject{$k} = undef;  # only the key has to exist
    }

    die "$0: found \"cur\" subirectory - QUIT\n" if -d "cur";

    my $save = undef;
    my $aux;
    for(;;){
	# we may need to read some aux input before continuing with STDIN
	if ( $auxinput ) {
	    if ( eof($aux) || ! defined($_=<$aux>) ) {
		close($aux);
		wait;
	    	$aux = $auxinput = undef;
		$_ = $save;
		$save = undef;
		goto CONTINUE;
	    }
	} else {
	    $_ = <>;
	    last if ! defined($_);
	}
	if ( substr($_,0,$SEPLEN) eq $SEPHDR ){
	    $skipmsg = 0; # if we were skipping a message, stop
	    if ( defined($file) ){
		# Flush prints $_ and returns false if not starting new msg
		&Flush($_) or next;
		# if we found a hidden message, do it before the next msg
		if ( $auxinput ) {
		    die if $save; # can not do more than one aux input
		    # save our state; when done hidden msg, return to CONTINUE
		    $save = $_;
		    my $pid = open($aux, "-|");
		    die "$0: cannot fork: $!\n  " unless defined $pid;
		    if ( $pid ) {
			$_ = <$aux>; # read first header line of aux input
			if ( ! defined($_) ) {
			    print "  Empty first line of input from formail\n";
			    $auxinput =~ s/\n\n.*/\n/s;
			    $auxinput =~ s/\n/\n  AUX /g;
			    $auxinput =~ s/  AUX $//;
			    print "  AUX $auxinput";
			    next; # will see eof and restore $save
			}
		    } else {
			# child sends aux input into $FORMAIL
			# $SIG{PIPE} = "IGNORE";
			open(FM, "|$FORMAIL")
			    or die "$0: Cannot open $FORMAIL: $!\n  ";
		        print FM $auxinput;
		        close FM or die "$0: status $! $FORMAIL\n  ";
		        exit; # forked() child also flushes buffers!
		    }
		}
	    }
	    CONTINUE: # resume here after restoring $_ from $save
	    # pick up a fake first msg containing input filename and date
	    if ( substr($_,$SEPLEN,$SEPLEN) eq $SEPHDR ){
		($FILENAME,$DATESEC) = /^$SEP: $SEP: (\S+) (\d+)$/;
		die "$0: Cannot parse special header: $_"
		    unless $DATESEC;
		$DATESTR = &MailDate($DATESEC);
		print "=== Input $FILENAME dated $DATESTR\n";
		$skipmsg = 1;
		$prevdatesec = $DATESEC; # for msgs with bad Date: lines
		next;
	    }
	    &StartNewMsg($_); # emit From_ line and start msg header
	    next;
	}
	next if $skipmsg;
	if ( $in_headers ) {
	    if ( $_ eq "\n" ) {
		$in_headers = 0;
		&CheckHeaders();
	    } else {
		++$headerlinecount;
		# replace tabs and remove trailing whitespace in the headers
		s/ *\t+ */ /g;
		s/[ \t]+$//;  # do not use \s because it removes trailing \n
		my($header,$char) = /^(\S+):\s*(\S)?/;
		if ( ! defined($char) ) {
		    if ( ! defined($header) ) {
			die "$0: Cannot parse header: $_  ";
		    }
		    next if exists($reject{$header});
		    print "  Empty header: $_";
		} else {
		    $header = lc $header;
		    if ( $header eq "received" ) {
			# find the earliest "received" date
			# UCletter/digit after semicolon up to EOL or paren
			# insist on a time zone, to skip uncertain times
			my ($rcvdate) = /;\s*([0-9A-Z][^;()\n]{5,50})/;
			my ($fussy)   = /;\s*([0-9A-Z][^;()\n]{5,50}\s+(?:[-+]\d{3,4}|UT|[-+\w]{3,9}))(?:\s*$|\s)/;
			# my ($fussy,$f2)   = /;\s*([0-9A-Z][^;()\n]{5,50}\s+(([-+]\d{3,4}|[-+\w]{3,9})))\s*(?:$|\s)/;
			# if ( defined($f2) ){
			#     print "DEBUG \"$fussy\" and \"$f2\"\n";
			# }
			my $sec;
			# some machines are just wrong
			# go back one week and forward one day, max
			# 604800 sec = 7 days
			if ( $rcvdate && length($rcvdate) >= 6
				    && ($sec=str2time($rcvdate))
				    && $sec > ($fromsec-604800)
				    && $sec < ($fromsec+86400) ){
			    if ( ! defined($fussy) ) {
				my $fix = $rcvdate;
				$fix =~ s/\sGMT([-+]\d+):/$1/;
				$fix =~ s/(\d)\s([+-]\d)/$1 ${2}000/;
				# XXX ($fussy) = /;\s*([0-9A-Z][^;()\n]{5,50}\s+(?:[-+]\d{3,4}|UT|[-+\w]{3,9}))(?:\s*$|\s)/;
				print "  No time zone: \"$rcvdate\"\n";
			    } elsif ( ! defined($recvsec) ) {
				$recvsec = $sec;
			    } elsif ( $sec < $recvsec ) {
				if ( ($recvsec-$sec) > (2*86400) ){
				    my $d1 = &MailDate($sec);
				    my $d2 = &MailDate($recvsec);
				    print "  Received: $d1 <++ $d2\n";
				}
				$recvsec = $sec;
			    }
			}
		    } elsif ( $header eq "date" ) {
			my ($userdate) = /^\S+:\s*(.+)$/;
			my $str;
			$datesec = &FixDate($userdate);
			# find the earliest date to check against
			my $minsec = $fromsec;
			if ( $recvsec && $recvsec < $minsec ) {
			    $minsec = $recvsec;
			}
			# some people do not set their computer clocks well
			# allow 1/2 hour into future and less than a day past
			# 86400 sec == 1 day   1800 sec = 1/2 hr
			if ( $datesec > ($minsec+1800)
				    || $datesec < ($minsec-80000) ){
			    $str = &MailDate($minsec);
			    my $d2 = &MailDate($datesec);
			    print "  Header $str used instead of Date: $d2\n";
			    $datesec = $minsec;
			} else {
			    $str = &MailDate($datesec);
			}
			if ( &SameDate($str,$userdate) ) {
			    $_ = "Date: $str\n";
			} else {
			    print $file "Date: $str\n";
			    $_ = "X-$_";
			}
		    } elsif ( $header eq "from" ) {
			my ($from) = /^\S+:[ \t]*(.+)$/;
			my $new = &FixFrom($from);
			# only put out X-From if $new is really different
			if ( substr($new,0,length($from)) ne $from ) {
			    print $file "From: $new\n";
			    $_ = "X-$_";
			}
		    } elsif ( $header eq "content-length" ) {
			($contentlength) = /^\S+:[ \t]*(\d+)/;
			if ( ! defined($contentlength) || $contentlength < 0 ) {
			    print "  Invalid $_";
			}
			next;
		    } elsif ( $header eq "lines" ) {
			($lines) = /^\S+:[ \t]*(\d+)/;
			if ( ! defined($lines) || $lines < 0 ) {
			    print "  Invalid $_";
			}
			next;
		    } elsif ( $header eq "status" || $header eq "x-uid" ) {
		    	next;
		    }
		    if ( exists($want{$header}) ) {
			($headers{$header}) = /^\S+:[ \t]*(.+)$/;
		    }
		}
	    }
	} else {
	    if ( $contentlength > 100 && substr($_,0,5) eq "From " ) {
		$internalfromtell = tell($file);
		$internalbodylinecount = $bodylinecount;
	    }
	    ++$bodylinecount;
	}
	print $file $_;
    }
    if ( defined($file) ){
	&Flush(undef);
    }
    print "Found $msgcount unique messages\n";
    exit;

    # turn seconds into an RFC822/2822/5322 date format
    sub MailDate {
	return strftime("%a, %d %b %Y %H:%M:%S %z",localtime($_[0]));
    }
    sub SameDate {
	my ($d1,$d2) = @_;
	$d1 =~ s/^\s+|\s+$|,//g;
	$d1 =~ s/ 0/ /g;
	$d1 =~ s/  +/ /g;
	$d2 =~ s/^\s+|\s+$|,//g;
	$d2 =~ s/ 0/ /g;
	$d2 =~ s/  +/ /g;
	return $d1 eq $d2;
    }
    # called at the end of the headers to tidy things up before the body
    sub CheckHeaders {
	# see if we can guess a missing To: line based on date
	if ( ! $headers{"to"} && ! $headers{"cc"} ) {
	    my $to;
	    if ( ! ($to=$headers{"apparently-to"}) ) {
		if ( ! ($to=$headers{"resent-to"}) ) {
		    if ( ! ($to=$headers{"bcc"}) ) {
			if ( $FILENAME !~ /outgoing/i ) {
			    if ( $fromsec <= $D1993 ) {
				$to = "idallen\@cgl.uwaterloo.ca";
			    } elsif ( $fromsec <= $D2002 ) {
				$to = "idallen\@ncf.ca";
			    } else {
				$to = "idallen\@idallen.ca";
			    }
			} else {
			    # Cannot find a valid "To:" in the outgoing header;
			    #   assume this message is part of previous message
			    # flush headers, rewind, append them to prev message
			    # (may require re-opening previous output file)
			    CopyAppend();
			    return;
			}
		    }
		}
	    }
	    print $file "To: $to\n";
	    print "  To: $to\n";
	}
	if ( ! $headers{"date"} ) {
	    # use the From_ or Received: line date
	    my $str;
	    if ( $recvsec && $recvsec < $fromsec ) {
		$datesec = $recvsec;
		$str = &MailDate($datesec);
	    } else {
		$datesec = $fromsec;
		$str = $fromstr;
	    }
	    print $file "Date: $str\n";
	    # print "  Date: $str\n";
	}
	if ( ! $headers{"from"} ) {
	    # use the envelope from line
	    my ($from) = $headers{$sep} =~ /^(\S+)/;
	    die "$0: cannot parse envelope From_: $headers{$sep}\n  "
	    	unless $from;
	    my $new = &FixFrom($from);
	    print $file "From: $new\n";
	    # print "  From: $new\n";
	    # if ( $new ne $from ) {
	 	# print $file "X-From: $from\n";
	    # }
	}
	# remember this place to rewrite later when we know the true size
	$fixedformattell = tell($file);
	$contentlength = -1 unless $contentlength;
	$lines = -1 unless $lines;
	printf $file $FIXEDFORMAT, $lines, $contentlength;
	$startbodytell = tell($file) + 2; # body starts here
    }
    sub Flush {
	my $from = $_[0];
	if ( $headerlinecount == 0 ) {
	    print "  Redundant Flush skipped for msg $msgcount\n";
	    return 1; # successful flush was already done
	}
	--$bodylinecount if $bodylinecount > 0; # do not count last newline
	# calculate the body length by diff between tell() positions
	# if the calculated length or lines differs from stated, fix them
	my $endbodytell = tell($file);
	my $bodylength = $endbodytell - $startbodytell;
	my $conlendiff = ($contentlength > 0) 
	    ? $bodylength - $contentlength : -1; # actual vs stated
	my $linediff = ($lines > 0)
	    ? $bodylinecount - $lines : -1; # actual vs stated
	if ( $conlendiff > 1 || $conlendiff < 0
	    || $linediff > 1 || $linediff < 0 ) {
	    if ( $conlendiff > 10 && $internalfromtell ) {
		# if the actual vs stated diff is not small, something is wrong
		# mis-calculated content length? we have a hidden message!
		my $internalbodylength = $internalfromtell - $startbodytell;
		my $diff = $contentlength - $internalbodylength;
		print "  Internal From_ found with Content-Length diff $diff\n";
		# read the hidden message into $auxinput
		if ( ! $file->flush ) {
		    die "$0: Unable to flush \"$filename\": $!\n  ";
		}
		seek($file,$internalfromtell,SEEK_SET)
		    or die "$0: Cannot seek to $internalfromtell: $!\n  ";
		read($file,$auxinput,$bodylength-$internalbodylength+10)
		    or die "$0: cannot read $bodylength-$internalbodylength+10 from $filename: $!\n  ";

		truncate $file,$internalfromtell; # truncate to end of this msg

		# fix the lines and length for the current msg
		$bodylinecount = $internalbodylinecount;
		$bodylength = $internalbodylength;

		$linediff = $bodylinecount - $lines; # actual vs stated
		$conlendiff = $bodylength - $contentlength; # actual vs stated

		## my $tmp = $auxinput;
		## $tmp =~ s/\n/\n  DEBUG /g;
		## $tmp =~ s/  DEBUG $//;
		## print "  DEBUGAUX $tmp";
	    } elsif ( $conlendiff < -200 ) {
		# this msg continues on to next msg; can happen if the
		# previous msg was "eaten" by formail by a too-large
		# Content-Length, and was found and resurrected via $auxinput,
		# and the resurrected msg has a Content-Length that should
		# have eaten this message; do not start a new msg
		$from =~ s/^$SEPHDR/From /; # restore the From_ line
		print "  Continues: Content-Length vs FTell $contentlength $bodylength $conlendiff\n";
		print "  Continues: $from";
		print $file $from;
		return 0; # not a real flush
	    }
	    print "  Lines vs Count $lines $bodylinecount $linediff\n"
		if $lines >= 0 && $linediff;
	    print "  Content-Length vs FTell $contentlength $bodylength $conlendiff\n"
		if $contentlength >= 0 && $conlendiff;
	    seek($file,$fixedformattell,SEEK_SET)
		or die "$0: Cannot seek to $fixedformattell: $!\n  ";
	    printf $file $FIXEDFORMAT, $bodylinecount, $bodylength;
	    seek($file,0,SEEK_END)
		or die "$0: Cannot seek back to 0: $!\n  ";
	}
	print "$msgcount $datesec hdr $headerlinecount body $bodylinecount len $bodylength\n";
	$prevdatesec = $datesec;
	$prevstartbodytell = $startbodytell;
	$prevstarthdrtell = $starthdrtell;
	$prevfixedformattell = $fixedformattell;
	$prevbodylinecount = $bodylinecount;
	$prevlines = $lines;
	%headers = ();
	$headerlinecount = 0;
	$internalfromtell = undef;
	$internalbodylinecount = undef;
	# if we exceed our output file size, close off this file
	# if ( $endbodytell >= $MAXFILESIZE ) {
	if ( $filemsgcount >= $MAXMSGPERFILE ) {
	    if ( ! $file->flush ) {
		die "$0: Unable to flush \"$filename\": $!\n  ";
	    }
	    utime($datesec,$datesec,$file);
	    if ( ! close($file) ) {
		die "$0: Unable to close \"$filename\": $!\n  ";
	    }
	    print "=== Output Close $filename tell $endbodytell\n";
	    $prevfilename = $filename;
	    $file = undef;
	    $filename = undef;
	    $filemsgcount = undef;
	}
	return 1; # successful flush
    }
    sub FixFrom {
	local $_ = $_[0];
	return $_ if /[^ ]\@[^ ]+\.[^ ][^ ]/; 
	return "$_\@ncf.ca" if /^[a-z][a-z][0-9][0-9][0-9]$/;
	if ( /^\w+$/ ) {
	    # bare word: add a domain to a bare userid e.g. "idallen"
	    my $to = $headers{"to"} || $headers{"apparently-to"} 
	    	|| $headers{"resent-to"};
	    if ( defined($to) && $to =~ /waterloo/ ) {
		return "$_\@cgl.uwaterloo.ca";
	    } elsif ( defined($to) && $to =~ /freenet|ncf\.ca/ ) {
		return "$_\@ncf.ca";
	    } elsif ( $fromsec <= $D1993 ) {
		return "$_\@cgl.uwaterloo.ca";
	    } elsif ( $fromsec <= $D2002 ) {
		return "$_\@ncf.ca";
	    } else {
		return "$_\@idallen.ca";
	    }
	}
	# (?=pattern) - man perlre - zero width positive look-ahead assertion
	if ( /\bidallen\@(?:janlinux|oak|elm)(?:$|[^\w.])/ ) {
	    s/\bidallen\@(?:janlinux|oak|elm)(?=$|[^\w.])/idallen\@ncf.ca/ if /NCFreeNet/;
	    s/\bidallen\@(?:janlinux|oak|elm)(?=$|[^\w.])/idallen\@idallen.ca/;
	}
	return $_;
    }
    # take a string and return reasonable seconds
    sub FixDate {
	local $_ = my $str = $_[0];
	my $parsed;
	while ( ! defined($parsed=str2time($_)) ) {
	    next if s/Atlantic Daylight Time/-0300/;
	    next if s/Atlantic Standard Time/-0400/;
	    next if s/Eastern Daylight Time/-0400/;
	    next if s/Eastern Standard Time/-0500/;
	    next if s/Central Daylight Time/-0500/;
	    next if s/Central Standard Time/-0600/;
	    next if s/Mountain Daylight Time/-0600/;
	    next if s/Mountain Standard Time/-0700/;
	    next if s/Pacific Daylight Time/-0700/;
	    next if s/Pacific Standard Time/-0800/;
	    next if s/E\. Europe Daylight Time/+0300/;
	    next if s/E\. Europe Standard Time/+0200/;
	    next if s/GMT Standard Time/+0000/;
	    next if s/ PM P/ P/;
	    next if s/\bIST\b/+0100/;
	    next if s/\bNOVT\b//;
	    next if s/\bH0600\b/-0300/;
	    next if s/:(\d\d)-(\d\d\d\d)$/:$1 -$2/;
	    # chop alphanumeric junk off the right side and try again
	    next if s/ +[\w]+ *$// && length >= 6;

	    # give up and use the date of the Received: or previous msg
	    $parsed = $recvsec ? $recvsec : $prevdatesec;
	    last;
	} continue {
	    print "  Parsing \"$str\" failed - trying \"$_\"\n";
	}
	# if the date is unreasonable, use the date of the previous msg
	if ( $parsed < $D1980 ) {
	    my $sec = $recvsec ? $recvsec :
		( $prevdatesec ? $prevdatesec : $DATESEC );
	    my $pd = &MailDate($sec);
	    print "  Parsed date $str < Jan 1980 - fixed to $pd\n";
	    $parsed = $sec;
	} elsif ( $parsed > ($DATESEC+86400) ) {
	    # allow one day (60*60*24=86400) of slop
	    my $sec = $recvsec ? $recvsec :
		( $prevdatesec ? $prevdatesec : $DATESEC );
	    my $pd = &MailDate($sec);
	    print "  Parsed date $str > file $DATESTR - fixed to $pd\n";
	    $parsed = $sec;
	}
	return $parsed;
    }
    # Copy/Append the headers from the open $file to the previous message
    # - flush output into current message
    # - rewind to start of current message
    # - if current message starts at top of file:
    #   - open old file for appending
    #   - copy most headers onto end of old file
    #   - close new file and unlink it
    #   - put old file descriptor in to new file descriptor $file
    #   - copy old file name to new file $filename
    #   - continue on writing $file as if nothing had happened
    # - else
    sub CopyAppend {
	local $_; # this is necessary for the while(<>) handle
	my $linecount = 0;

	if ( ! $file->flush ) {
	    die "$0: Unable to flush \"$filename\": $!\n  ";
	}
	seek($file,$starthdrtell,SEEK_SET)
	    or die "$0: Cannot seek to $starthdrtell in $filename: $!\n  ";
	if ( $starthdrtell == 0 ) {
	    # we just started a new file and have to add this msg to
	    # the end of the previous file and previous msg, fixing
	    # the Lines: and Content-Length: fields there
	    print "  Rewinding and moving content to $prevfilename\n";
	    my $oldfd;
	    open($oldfd,">>",$prevfilename)
	    	or die "$0: Cannot append to \"$prevfilename\": $!\n  ";
	    while ( <$file> ) {
		s/^$SEPHDR/From /; # restore the From_ line
		next if /^X-IDALLEN/; # remove my added headers
		print $oldfd $_;
		++$linecount;
	    }
	    # get rid of the file we just read (now empty)
	    close($file) or die "$0: $!\n  ";
	    unlink($filename);
	    # now pretend we were writing the old file all along
	    $filename = $prevfilename;
	    $file = $oldfd;
	    $filemsgcount = $MAXMSGPERFILE; # but start a new file right away
	} else {
	    # have to add this msg to the end of the previous msg, fixing
	    # the Lines: and Content-Length: fields there
	    print "  Seeking and moving content to previous msg\n";
	    my $tmp = "";
	    open(MEMORY, ">", \$tmp)
		or die "$0: Cannot open in-memory fd: $!\n  ";
	    while ( <$file> ) {
		s/^$SEPHDR/From /; # restore the From_ line
		next if /^X-IDALLEN/; # remove my added headers
		print MEMORY $_;
		++$linecount;
	    }
	    close(MEMORY) or die "$0: $!\n  ";
	    seek($file,$starthdrtell,SEEK_SET)
		or die "$0: Cannot seek in $filename: $!\n  ";
	    print $file $tmp;
	    truncate $file,tell($file); # truncate to this length
	    $tmp =~ s/\n/\n  DEBUG /g;
	    $tmp =~ s/  DEBUG $//;
	    print "  DEBUGCPY $tmp";
	}
	$fromsec = $datesec = $recvsec = $prevdatesec;
	$fixedformattell = $prevfixedformattell;
	$startbodytell = $prevstartbodytell;
	$starthdrtell = $prevstarthdrtell;
	$lines = $prevlines;
	$bodylinecount = $prevbodylinecount + $linecount;
	$contentlength = -1; # ignore any previous Content-Length header
	--$msgcount; # we did not really start a new message
	--$filemsgcount; # we did not really start a new message
    }
    sub StartNewMsg {
	local $_ = $_[0];
	# my $filename = sprintf( "%010d.%08d.%s", $NOW, ++$msgcount, $FILENAME );
	++$msgcount;
	if ( ! defined($file) ) {
	    my $upper = int($msgcount / ( $MAXPERDIR * $MAXMSGPERFILE ) );
	    my $lower = $msgcount % ( $MAXPERDIR * $MAXMSGPERFILE);
	    my $dirname = sprintf("%s/%04d", $OUT, $upper);
	    $filename = sprintf("%s/%06d", $dirname, $lower);
	    # truncate the file then open in read/write mode
	    while ( ! open($file,"+>",$filename) ) {
		# if the directory is missing, create it
		next if mkdir("$dirname");
		die "$0: Unable to open new \"$filename\": $!\n  ";
	    }
	    print "=== Output Open $filename\n";
	    $filemsgcount = 0;
	}
	++$filemsgcount;
	#  "bounce-MusicNewsletter 5-idallen=freenet.carleton.ca"@announce2.mp3.com  Tue Sep 28 14:11:55 1999
	#  idallen  Sun Feb 18 00:10:49 1990 remote from watcgl
	#  "\"janes56314@texas.net\" <janes56314@texas.net> Bcc":  Sat Oct  2 16:30:16 1999
	my ($date) = /^\S+\s+(?:"(?:\\"|[^"])+"\S+|[^\s"]+)\s+(.*?)(?: remote from .*)?$/;
	$fromsec = &FixDate($date);
	$fromstr = &MailDate($fromsec);
	$starthdrtell = tell($file);
	$in_headers = 1;
	$headerlinecount = 1;
	$bodylinecount = 0;
	$datesec = undef;
	$recvsec = undef;
	$fixedformattell = undef;
	$startbodytell = undef;
	$contentlength = undef;
	$lines = undef;

	($headers{$sep}) = /^\S+:[ \t]*(.+)$/;
	substr($_,0,$SEPLEN) = "From ";

	print $file "$_";
	print $file "X-IDALLEN-Mbox-File: $FILENAME\n";
	print $file "X-IDALLEN-Mbox-Date: $DATESEC $DATESTR\n";
	print $file "X-IDALLEN-Envl-File: $filename $msgcount $filemsgcount\n";
	print $file "X-IDALLEN-Envl-Date: $fromsec $fromstr\n";
    }
'