#!/usr/bin/perl
#
# DISCLAIMER:
#
# I make no claims about the ability of this script to maintain
# the integrity of your email attachments.  If they become desperately
# and horribly corrupted when they are stripped, and you delete the
# original mbox file, it's your problem, not mine.  Otherwise have
# fun.
#
# mimeStrip.pl Version 0.8
#
#   - if the boundary contained a ++ it would be misinterpreted
#     perl interprets the plus signs as nested regexps (rickythesk8r)
#
# mimeStrip.pl Version 0.7
#
#   - the boundary would not be found if the boundary keyword were 
#     given in uppercase 
#
# mimeStrip.pl Version 0.6
#
#   - fixed a problem with perl 5.8 precompilation failing to
#     lock what was being interpreted as stdin
#
# mimeStrip.pl Version 0.5
#
#   - if no "filename" is given in the Content-Disposition, check
#     for a "name" in the Content-Type.
#   - boundary match regex changed
#
# mimeStrip.pl Version 0.4
#
#   - The content type boundary was not being found
#     if it was not lower-case
#
# mimeStrip.pl Version 0.3
#
# 03.04.2003
#
#   - some filenames were not being found
#   - should run with no args
#
# mimeStrip.pl Version 0.2
#
# 19.02.2003
#   - some 'boundaries' were not being matched if they contained
#     characters that were being interpolated during the match,
#     using 'index' instead of m//
#   - the wrong 'envelope' was being assigned to a message, not a
#     huge problem, but not good either, D'Oh!
#

require 5.002;                # for SUPER
use MIME::Base64;
use File::Basename;
use File::stat;
use Fcntl ':flock';
use Getopt::Long;
use Time::ParseDate;   

# Usage is: $me --in folder --out folder.out --dir output-directory

#if ($#ARGV == -1) {
#   Usage();
#}

$result = GetOptions qw( --in=s --out=s --swap! --dir=s --cat! --help! );

if ( "$opt_help" ne "") {
   Usage();
}

$stdin = STDIN;
if ( "$opt_in" ne "" ) {
   $folder=$opt_in;
   $fs = stat $folder;

   open($stdin,"+<$folder") || die "Error: opening input folder $folder\n";
   if (!lock($stdin)) {
      close($stdin);
      printf STDERR "Error: could not lock folder $folder\n";
   }
}

$cat=("$opt_cat" eq "") ? ">" : ">>";

if ( "$opt_out" ne "" ) {
   $output=$opt_out;
   # redirect STDOUT to $output
   open(STDOUT,"$cat$output") || die "Error: opening output folder $output\n";
}

if ( "$opt_dir" eq "" ) {
   $opt_dir=".";
}

@header = ();
@body = ();
$last=0;
$date=0;
while(<$stdin>) {

   chomp;

   if (/^From /) {
      $env = $_;	# this is for the next header!
      $inheader = 1;

      if (@header == NULL) {
         $envelope = $env;
         next;
      }

      processMessage();

      $envelope = $env;
      @header = ();
      @body = ();
      $last = 0;
      $date = 0;

      next;
   }
   if ($inheader) {
      if (/^$/) {
         $inheader = 0;
         @body = ();
      }
      elsif (/^Date:(.*)/) {
         push @header, $_;
         $last++;
         $date=$1;
      }
      elsif (/^(\s+)(.*)/) {
         $header[$last-1] .= "\n$1$2";
         next;
      }
      elsif (/^(\S+):(.*)/) {
         push @header, $_;
         $last++;
         next;
      }
      else {
         printf STDERR "-----\nUnexpected header entry\n";
         printf STDERR "$_-----\n";
         next;
      }
   }

   push @body,$_;

};

processMessage();

if ($stdin != STDIN) {
   unlock($stdin);
   close($stdin);
}

if ($fs) {
   chmod $fs->mode, $output;
   chown $fs->uid,$fs->gid, $output;
}

if ( "$opt_swap" ne "" && "$opt_in" ne "" && "$opt_out" ne "" ) {
	printf STDERR "swapping $opt_in $opt_out\n";
	rename("$opt_in", "$opt_in".swap);
	rename("$opt_out", "$opt_in");
	rename("$opt_in".swap, "$opt_out");
}

sub aprint {
   $out=$_[0]; shift;
   foreach $e (@_) {
      print $out "$e\n";
   }
}

sub processMessage {

   my $boundary="";

   # look for multipart in Content-Type header
   foreach $h (@header) {
      if ($h =~ /^Content-Type:(.*)/i) {
         $_ = $h;
         if (/multipart\/mixed/i) {
            #if (/boundary="(\S+)"/i) << replace with the regex below to deal with
	    #boundaries without quotes
            if (/boundary\s*=[\s"]*([\S]+[^"]+)/i) {
               $boundary=$1;
	       $boundary =~ s/\+/\\+/g ;
               last;
            }
         }
      }
   }

   print STDOUT "$envelope\n";
   print STDERR "$envelope\n";
#  print STDERR "."; # a bit of feedback to stderr

   aprint(STDOUT,@header);
   if ($boundary) {

      my @mimepart=();
      my $nbody=$#body+1;
      for ($i=0; $i < $nbody; $i++) {
         $_ = $body[$i];
         
         if (-1 != index $_,"--$boundary") {

            next if $#mimepart == -1;

            # process mimepart
            if ($trencode =~ /base64/i) {

               if ($cdisp =~ /filename="(.*)"/i) {
                  # get rid of any path specs
                  $filename= basename "$1";
               }
               elsif ($cdisp =~ /filename=(.*)/i) {
                  # get rid of any path specs
                  $filename= basename "$1";
               }
	       elsif ($ctype =~ /name="(.*)"/i) {
                  $filename= basename "$1";
	       }
	       elsif ($ctype =~ /name=(.*)/i) {
                  $filename= basename "$1";
	       }
               else {
                  print STDERR "\n Warning: no filename given\n";
                  $filename="noname.bin";
               }

               # account for duplicates
               $filename = uniqueName("$opt_dir/$filename");

               if (open(FILE,">$filename")) {

                  print STDERR " Writing $filename\n";

                  binmode(FILE);
                  $go=0;
                  foreach $mp (@mimepart) {
                     $_ = $mp;
                     if ($go == 0) {
                        # start processing after reaching a blank line in @mimepart
                        $go = 1 if (/^$/);
                        next;
                     }
                     next if ( /^$/ );         # skip blank lines
                     last if (/--$boundary/);  # stop at boundary

                     $decoded = decode_base64($mp);
                     print FILE $decoded;
                  }
                  close(FILE);

                  if ($fs) {
                     chmod $fs->mode, $filename;
                     chown $fs->uid,$fs->gid, $filename;
                  }
                  if ($date) {
                     my $mtime=parsedate($date);
                     utime $mtime, $mtime, $filename if $mtime;
                  }

                  # tell the L^Huser where their attachment is
                  print STDOUT "Content-Type: text/plain; charset=us-ascii\n";
                  print STDOUT "Content-Transfer-Encoding: 7bit\n\n";
                  print STDOUT "*****\n";
                  print STDOUT "***** Content-Type: $ctype\n";
                  print STDOUT "***** Content-Transfer-Encoding: $trencode\n";
                  print STDOUT "***** Content-Description: $cdesc\n";
                  print STDOUT "***** Content-Disposition: $cdisp\n";
                  print STDOUT "*****\n\n";
                  print STDOUT "***** Attached file saved to disk: $filename\n\n";
               }
               else {
                  printf STDERR "\n Error: could not open attachment file $filename\n";
                  aprint(STDOUT,@mimepart);
               }
            }
            else {
               aprint(STDOUT,@mimepart);
            }

            print STDOUT "$body[$i]\n"; # print the boundary marker

            @mimepart = ();
            $filename="";
            $trencode="";
            $ctype="";
            $cdisp="";
            $cdesc="";
         }
         else {
            if ( /^Content-Transfer-Encoding:(.*)/i ) {
               $trencode=$1;
            }
            elsif( /^Content-Type:(.*)/i ) {
               $ctype=$1;
               while ( /;$/ ) {
                  $ctype .= $body[++$i];
                  $_ .= $body[$i];
               }
            }
            elsif( /^Content-Disposition:(.*)/i ) {
               $cdisp=$1;
               while(/;$/) {
                  $cdisp .= $body[++$i];
                  $_ .= $body[$i];
               }
            }
            elsif( /^Content-Description:(.*)/i ) {
                $cdesc=$1;
            }
            push @mimepart,$_;
         }
      }
      aprint(STDOUT,@mimepart);
   }
   else {
      aprint(STDOUT,@body);
   }
   print STDOUT "\n";
}

sub uniqueName {
   #$filename = uniqueName("$opt_dir/$filename");

   my $f, $p, $g, $x;
   
   ($f,$p) = fileparse($_[0]);

   @chunks = split( /\./, $f );
   $x = ".bin";
   if ($#chunks > 0) {
      $f = $chunks[0];
      $x = ".$chunks[$#chunks]";
      for (my $i=1; $i<$#chunks;$i++) {
         $f .= ".$chunks[$i]";
      }
   }

   my $g = "$p$f$x";
   while( -f "$g" ) {
      ++$n;
      $g="$p$f-$n$x";
   }

   return $g;
}

sub lock {
   # true on success
   $rval = flock($_[0],LOCK_EX | LOCK_NB); # exclusive lock, non blocking
   return $rval;
}

sub unlock {
   $rval = flock($_[0],LOCK_UN);
   return $rval;
}

sub Usage() {
   my $usage;
   my $me=basename $0;

$usage=<<USAGE;
$me ver. 0.8 Copyleft 2003, Steeve McCauley

 Usage is: $me --in folder --out folder.out --dir output-directory

   --in   input folder (default: stdin )
   --out  output folder (default: stdout)
   --swap rename in folder to out, and vice versa
   --dir  directory for decoded attachments (default: current)
   --cat  concatonate output to output folder (default: truncate)
   --help this help message

   Example,

      $me --dir=~user/Mail/mime < user > user.stripped

USAGE
   print $usage;
   exit(1);
}

