#!/usr/bin/perl -w
use strict qw(refs subs vars);

# NoWS = no whitespace
#
# Given a patch (as a unified diff, as from git), remove all 
# whitespace-only changes. Reads from stdin, writes a new patch
# to stdout.
#
# Copyright 2013 Phil Carmody (pc+nows@asdf.org)
# Released under the "Creative Commons Attribution-ShareAlike" licence
# (legalese) http://creativecommons.org/licenses/by-sa/3.0/legalcode
# (readable) http://creativecommons.org/licenses/by-sa/3.0/
#
# As this is clearly a million miles from being a finished product, caveat 
# downloader, users are encouraged to try to fix its shortcomings, and to
# pass those those modifications back to the author.
#
# BUGS: Oh come on, this is v0.1!
# E.g. currently the line numbers in the patches produced are all b0rked.

# currently act on just one file

my $context=3;
my $verbose=1; # 1 documents each file, 2 each chunk, 3 each sliver, 4 each line
my $minimal=1; # try extra hard to minimise output

sub processsliver($$$)
{
  my ($m, $p, $i)=@_;
  my ($ms,$ps)=('','');
  print STDERR ("Process sliver:\n") if($verbose>2);
  if(!scalar(@$m) != !scalar(@$p)) { return 1; }
  map { my $o=substr($_,1); push(@$i, " $o"); $ms.=$o; } @$m;
  $ms=~s/\s+//g;
  map { $ps.=substr($_,1); } @$p;
  $ps=~s/\s+//g;
  print STDERR ("Comparing\n\t'$ms'\n\t'$ps'\n = ", ($ms ne $ps)?1:0, "\n") if($verbose>2);
  return ($ms ne $ps);
  #if($ms eq $ps) {
  #  # If there are ws differences only, then @undo is used
  #  # to return plain context rather than the +/- lines.
  #  return 0;
  #} else {
  #  @$i=();
  #  return 1;
  #}
}
sub minimisesliver_crude($$$$)
{
  my ($m, $p, $pr, $po)=@_;
  my ($nm, $np)=(scalar(@$m), scalar(@$p));
  print STDERR ("ms_c entry: nm=$nm np=$np\n") if($verbose>2);
  if(!$nm && !$np) { return 0; } # both empty = identical (imposs)
  if(!$nm || !$np) { return 1; } # one full, one empty - can't share anything
  my $min = $nm>$np ? $np : $nm;
  for(my $n=0; $n<$min; ++$n) {
    my $l=substr($m->[$n],1);
    my $ml=$l;
    my $pl=substr($p->[$n],1);
    $ml=~s/\s+//g;
    $pl=~s/\s+//g;
    if($ml eq $pl) { print STDERR ("common line $n\n") if($verbose>3); push(@$pr, " $l"); }
    else { last; }
  }
  if(scalar(@$pr)) {
    splice(@$m, 0, scalar(@$pr));
    splice(@$p, 0, scalar(@$pr));
    ($nm, $np)=(scalar(@$m), scalar(@$p));
    print STDERR ("ms_c ttrim: nm=$nm np=$np\n") if($verbose>2);
    if(!$nm && !$np) { return 0; } # both empty = identical - yay!
    if(!$nm || !$np) { return 1; } # one full, one empty - can't share anything
    $min = $nm>$np ? $np : $nm;
  }
  # and now search back from the end
  for(my $n=0; $n<$min; ++$n) {
    my $l=substr($m->[$nm-1-$n],1);
    my $ml=$l;
    my $pl=substr($p->[$np-1-$n],1);
    $ml=~s/\s+//g;
    $pl=~s/\s+//g;
    if($ml eq $pl) { print STDERR ("common line -$n\n") if($verbose>3); push(@$po, " $l"); }
    else { last; }
  }
  if(scalar(@$po)) {
    splice(@$m, -scalar(@$po));
    splice(@$p, -scalar(@$po));
    ($nm, $np)=(scalar(@$m), scalar(@$p));
    print STDERR ("ms_c btrim: nm=$nm np=$np\n") if($verbose>2);
    if(!$nm && !$np) { return 0; } # both empty = identical (imposs)
    if(!$nm || !$np) { return 1; } # one full, one empty - can't share anything
  }
  return 1; # some difference remains
}

sub minimisesliver($$$$)
{
  my ($m, $p, $pr, $po)=@_;
  my ($nm, $np)=(scalar(@$m), scalar(@$p));
  print STDERR ("ms entry: nm=$nm np=$np\n") if($verbose>2);
  if(!$nm && !$np) { return 0; } # both empty = identical (imposs)
  if(!$nm || !$np) { return 1; } # one full, one empty - can't share anything
  my $mscrude=minimisesliver_crude($m, $p, $pr, $po);
  print STDERR ("ms mscrude=$mscrude\n") if($verbose>2);
  if(!$mscrude) { return 0; }
  ($nm, $np)=(scalar(@$m), scalar(@$p));
  print STDERR ("ms postcrude: nm=$nm np=$np\n") if($verbose>2);
  if(!$nm && !$np) { return 0; } # both empty = identical (imposs)
  if(!$nm || !$np) { return 1; } # one full, one empty - can't share anything
  # not just indentation changes, some lines have been split perhaps?
  my %m=(''=>0);
  my $mb='';
  for(my $n=0; $n<$nm; ++$n) {
    my $nows=substr($m->[$n],1);
    $nows=~s/\s+//g;
    $mb.=$nows;
    $m{$mb}=$n+1;
  }
  my ($matchm, $matchp)=(-1, -1);
  my $pb='';
  for(my $n=0; $n<$np; ++$n) {
    my $nows=substr($p->[$n],1);
    $nows=~s/\s+//g;
    $pb.=$nows;
    if(defined($m{$pb})) {
      $matchm=$m{$pb}; $matchp=$n+1;
    }
  }
  if($matchm>-1) {
    my @removed=$matchm ? splice(@$m, 0, $matchm) : ();
    foreach(@removed) { s/^-/ /; push(@$pr, $_); }
    splice(@$p, 0, $matchp) if($matchp);
    ($nm, $np)=(scalar(@$m), scalar(@$p));
    print STDERR ("ms ttrim: nm=$nm np=$np\n") if($verbose>2);
    if(!$nm && !$np) { return 0; } # both empty = identical - yay!
    if(!$nm || !$np) { return 1; } # one full, one empty - can't share anything
  }
  # and now search back from the end
  %m=(''=>0);
  $mb='';
  for(my $n=0; $n<$nm; ++$n) {
    my $nows=substr($m->[$nm-1-$n],1);
    $nows=~s/\s+//g;
    $mb=$nows.$mb;
    $m{$mb}=$n+1;
  }
  ($matchm, $matchp)=(-1, -1);
  $pb='';
  for(my $n=0; $n<scalar(@$p); ++$n) {
    my $nows=substr($p->[$np-1-$n],1);
    $nows=~s/\s+//g;
    $pb=$nows.$pb;
    if(defined($m{$pb})) {
      $matchm=$m{$pb}; $matchp=$n+1;
    }
  }
  if($matchm>-1) {
    my @removed=$matchm ? splice(@$m, -$matchm) : ();
    foreach(@removed) { s/^-/ /; unshift(@$pr, $_); }
    splice(@$p, -$matchp) if($matchp);
    ($nm, $np)=(scalar(@$m), scalar(@$p));
    print STDERR ("ms rtrim: nm=$nm np=$np\n") if($verbose>2);
    if(!$nm && !$np) { return 0; } # both empty = identical (imposs)
    if(!$nm || !$np) { return 1; } # one full, one empty - can't share anything
  }
  return 1;
}

sub clean_chunk(@)
{
  my $header=shift(@_);
  my ($subfrom,$sub,$addto,$add)=($header=~m/^\@\@ -(\d+),(\d+) \+(\d+),(\d+).*/);
  my $seek=0;
  while($seek<scalar(@_) and $_[$seek] =~ /^ /) { ++$seek; }
  while($seek>$context) {
    shift(@_); ++$subfrom; --$sub; ++$addto; --$add;
    --$seek;
  }
  $seek=0;
  while($seek<scalar(@_) and $_[$#_ - $seek] =~ /^ /) { ++$seek; }
  while($seek>$context) {
    pop(@_); --$sub; --$add;
    --$seek;
  }
  $header =~ s/^\@\@ -(\d+),(\d+) \+(\d+),(\d+)/@@ -$subfrom,$sub +$addto,$add/;
  unshift @_,$header;
  return @_;
}

sub processchunk(@)
{
  my $header=shift(@_);
  print STDERR (scalar(@_), " lines follow $header") if($verbose>1);
  my @build=();
  my @context=();
  my @minus=();
  my @plus=();
  my $realdiffs=0;
  my $outputdrift=0;
  foreach(@_) {
    if(m/^-/) {
      print STDERR ("Got MINUS $_") if($verbose>3);
      push(@minus, $_);
    } elsif(m/^\+/) {
      print STDERR ("Got PLUS $_") if($verbose>3);
      push(@plus, $_);
    } else {
      if(@minus||@plus) {
	my $haddiffs;
	my @pre=();
	my @post=();
	if($minimal) {
	  $haddiffs = minimisesliver(\@minus,\@plus,\@pre,\@post);
	} else {
	  $haddiffs = processsliver(\@minus,\@plus,\@pre);
	}
	if($haddiffs) {
	  ++$realdiffs;
	  push(@build, @context, @pre, @minus, @plus, @post);
	  ((@pre || @post) && print STDERR ("trimmed ".scalar(@pre)." from top and ".scalar(@post)." from bottom\n")) if($verbose>2);
	} else {
	  print STDERR ("minimised whole sliver\n") if($verbose>2);
	  die("post nonempty") if(@post);
	  push(@build, @context, @pre);
	  $outputdrift+=scalar(@pre)-scalar(@plus);
	}
	@context=@minus=@plus=();
      }
      print STDERR ("CONTEXT $_") if($verbose>3);
      push(@context, $_);
    }
  }
  if(@minus||@plus) {
    my $haddiffs;
    my @pre=();
    my @post=();
    if($minimal) {
      $haddiffs = minimisesliver(\@minus,\@plus,\@pre,\@post);
    } else {
      $haddiffs = processsliver(\@minus,\@plus,\@pre);
    }
    if($haddiffs) {
      ++$realdiffs;
      push(@build, @context, @pre, @minus, @plus, @post);
    } else {
      die("post nonempty") if(@post);
      push(@build, @context, @pre);
      $outputdrift+=scalar(@pre)-scalar(@plus);
      print STDERR ("minimised whole sliver.\n") if($verbose>2);
    }
  } elsif(@context) {
    push(@build, @context);
  }
  if($realdiffs) {
    # Firstly, have we changed thenumber of lines in the target?
    print STDERR ("With outputdrift=$outputdrift, $header becomes ") if($verbose>2);
    $header=~m/^\@\@ -\d+,\d+ \+\d+,(\d+).*/;
    my $newsize=$1+$outputdrift;
    $header=~s/^(\@\@ -\d+,\d+ \+\d+,)\d+(.*)/$1$newsize$2/;
    print STDERR ($header) if($verbose>2);

    # Secondly - are we leaving too much trailing context?
    # Not needed for git apply - only plain old patch!
    #my $ix=@build;
    #while(--$ix>=0) {
    #  if($build[$ix] !~ m/^ /) { last; }
    #}

    unshift(@build, $header);
    @build = clean_chunk(@build);
    @build;
  } else {
    ();
  }
}

sub processchunk_crude(@)
{
  my $header=shift(@_);
  my ($sub,$add)=($header=~m/^\@\@ -\d+,(\d+) \+\d+,(\d+).*/);
  print STDERR (scalar(@_), " lines ($sub sub, $add add) follow $header") if($verbose>1);
  my @build=();
  my @context=();
  my @minus=();
  my @plus=();
  my $realdiffs=0;
  my $outputdrift=0;
  foreach(@_) {
    if(m/^-/ and $sub) {
      print STDERR ("CGot($sub,$add) MINUS $_") if($verbose>3);
      push(@minus, $_); $sub--;
    } elsif(m/^\+/ and $add) {
      print STDERR ("CGot($sub,$add) PLUS $_") if($verbose>3);
      push(@plus, $_); $add--;
    } elsif($add and $sub) {
      print STDERR ("CGot($sub,$add) context $_") if($verbose>3);
      push(@minus, '-'.substr($_,1)); $sub--;
      push(@plus, '+'.substr($_,1)); $add--;
    } elsif(!$add && !$sub) {
      last;
    } else {
      print STDERR ("ERROR: state($sub,$add), unexpected $_");
    }
  }
  my @undo=();
  if(processsliver(\@minus,\@plus,\@undo)) {
    # can't process in one chunk - look at individual slivers
    unshift(@_, $header);
    return processchunk(@_);
  } else {
    ();
  }
}

sub parsefile(@)
{
  my $inchunks=0;
  my @build=();
  my @chunk=();
  my $realchunks=0;
  foreach(@_) {
    if(m/^@@ /) {
      if($inchunks) {
	my @newchunk=processchunk_crude(@chunk);
	if(@newchunk) { ++$realchunks; push(@build, @newchunk); }
	@chunk=();
      } else {
	$inchunks=1;
      }
      push(@chunk,$_);
    } else {
      if($inchunks) {
	push(@chunk, $_);
      } else {
	push(@build, $_);
      }
    }
  }
  if($inchunks) {
    my @newchunk=processchunk_crude(@chunk);
    if(@newchunk) { ++$realchunks; push(@build, @newchunk); }
    @chunk=();
  }
  return $realchunks ? @build : ();
}

my @file=();
while(<>) {
  if(m/^diff --git/)
    {
      if(@file) {
	my @newfile=parsefile(@file);
	if(@newfile) { print(@newfile); }
	@file=();
      }
      print STDERR ("New file $_") if($verbose>0);
    }
  push(@file, $_);
}
if(@file) {
  my @newfile=parsefile(@file);
  if(@newfile) { print(@newfile); }
}


