# Copyright (C) 2004-2008 Daniel Vrit

# This file is part of Manitou-Mail (see http://www.manitou-mail.org)

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.

# Filtering code for manitou-mdx 0.9.11

use strict;

package Manitou::Filters;

use Data::Dumper;
use MIME::Head;
use MIME::Parser;
use MIME::Words qw(:all);
use Manitou::Encoding;
use DBI;

my $filter_exprs;

# ctxt->evp: position in expression during left->right traversal
# ctxt->expr: current expression
# ctxt->len: equal to length(ctxt->expr)
# ctxt->npar: current level of parenthesis
# ctxt->errstr: error string
# ctxt->evstack: list of operand values handled as a stack
# ctxt->mailobj: mime object for current mail
# ctxt->mail_id: mail_id of current mail if available

my $PRI_DOT=10;
my $PRI_AND=24;
my $PRI_OR=22;
my $PRI_UNARY_NOT=30;
my $PRI_EQ=40;
my $PRI_CONTAINS=40;
my $PRI_REGEXP=40;

my $TYPE_STRING=1;
my $TYPE_FUNC=2;
my $TYPE_NUMBER=3;
my $TYPE_SUBEXPR=4;
my $TYPE_PSTRING=5;
my $TYPE_MAILID=6;

my %eval_binary_ops =
  (
   "and" => {"func" => sub { return $_[0] && $_[1]; },
	     "pri" => $PRI_AND},
   "or"  => {"func" => sub { return $_[0] || $_[1]; },
	     "pri" => $PRI_OR},
   "contains"  => {"func" => sub { return index($_[0],$_[1])>=0; },
	     "pri" => $PRI_CONTAINS},
   # alias for 'contains'
   "contain"  => {"func" => sub { return index($_[0],$_[1])>=0; },
	     "pri" => $PRI_CONTAINS},
   "equals"  => {"func" => sub { return $_[0] eq $_[1]; },
	     "pri" => $PRI_EQ},
   "eq"  => {"func" => sub { return $_[0] eq $_[1]; },
	     "pri" => $PRI_EQ},
   "ne"  => {"func" => sub { return $_[0] ne $_[1]; },
	     "pri" => $PRI_EQ},
   "regmatches"  => {"func" => sub { return $_[0] =~ /$_[1]/ },
	     "pri" => $PRI_REGEXP}
  );

my %eval_unary_ops =
  (
   "not" => {"func" => sub { return !$_[0]; },
	     "pri" => $PRI_UNARY_NOT}
  );


sub get_parent {
  my ($mailobj,$dbh) = @_;
  my $in_reply_to=$mailobj->head->get('In-Reply-To');
  my $sth=$dbh->prepare("SELECT mail_id FROM mail WHERE message_id=?");
  if ($in_reply_to =~ /.*\<(.*)\>/) {
    $in_reply_to=$1;
    $sth->execute($in_reply_to);
    my @r=$sth->fetchrow_array;
    return $r[0] if (@r);
  }

  my $other_mails=$mailobj->head->get('References');
  my @thread_msgs=split / /, $other_mails;
  for my $m (reverse(@thread_msgs)) {
    $m=$1 if ($m =~ /.*\<(.*)\>/);
    $sth->execute($m);
    my @r=$sth->fetchrow_array;
    return $r[0] if (@r);
  }
  undef;
}

sub func_parent {
  my ($ctxt)=@_;
  my $mail_id=$ctxt->{mail_id};
  if ($mail_id) {
    my $sth=$ctxt->{dbh}->prepare("SELECT in_reply_to FROM mail WHERE mail_id=?");
    $sth->execute($mail_id);
    my @r=$sth->fetchrow_array;
    return $r[0];		# may be undef
  }
  elsif ($ctxt->{mime_obj}) {
    return get_parent($ctxt->{mime_obj}, $ctxt->{dbh});
  }
  undef;
}

sub func_has_parent {
  return (func_parent(@_)!=0);
}

my %eval_funcs =
  (
   "rawheader" =>
   {"func" => sub {
      my $ctxt=$_[0];
      my $o=$ctxt->{mime_obj};
      my $v = $o->head->get($_[1]);
      chomp $v;
      return $v;
    },
    "args" => 1,
    "return_type" => $TYPE_STRING
   },

   "header" =>
   {"func" => sub {
      my $ctxt=$_[0];
      my $o=$ctxt->{mime_obj};
      my $v = $o->head->get($_[1]);
      chomp $v;
      return Manitou::Encoding::header_decode($v);
    },
    "args" => 1,
    "return_type" => $TYPE_STRING
   },

   "headers" =>
   {"func" => sub {
      my $ctxt=$_[0];
      my $o=$ctxt->{mime_obj};
      if (!exists $ctxt->{cache}->{headers}) {
	$ctxt->{cache}->{headers}=Manitou::Encoding::header_decode($o->head->as_string);
      }
      return $ctxt->{cache}->{headers};
    },
    "args" => 0,
    "return_type" => $TYPE_STRING
   },

   "rawheaders" =>
   {"func" => sub {
      my $ctxt=$_[0];
      my $o=$ctxt->{mime_obj};
      return $o->head->as_string;
    },
    "args" => 0,
    "return_type" => $TYPE_STRING
   },

   "recipients" =>   # Returns all recipients (To, Cc, Bcc) separated by commas
   {"func" => sub {
      my $ctxt=$_[0];
      my $o=$ctxt->{mime_obj};
      if (exists $ctxt->{cache}->{recipients}) {
	return $ctxt->{cache}->{recipients};
      }
      my $r;
      for my $h ("To", "Cc", "Bcc") {
	my $v = Manitou::Encoding::header_decode($o->head->get($h));
	$r = defined $r ? "$r,$v" : $v;
      }
      return $ctxt->{cache}->{recipients}=$r;
    },
    "args" => 0,
    "return_type" => $TYPE_STRING
   },

   "body" =>
   {"func" => sub {
      my $ctxt=$_[0];
      my $o=$ctxt->{mime_obj};
      return $o->bodyhandle->as_string if ($o->bodyhandle);
    },
    "args" => 0,
    "return_type" => $TYPE_STRING
   },

   "mailbox" =>
   {"func" => sub {
      my $ctxt=$_[0];
      return $ctxt->{mailbox};
    },
    "args" => 0,
    "return_type" => $TYPE_STRING
   },

   "has_parent" =>
   {"func" => sub { return func_has_parent(@_); },
    "args" => 0,
    "return_type" => $TYPE_NUMBER
   },

   "parent" =>
   {"func" => sub { return func_parent(@_); },
    "args" => 0,
    "return_type" => $TYPE_MAILID
   },

   "address_part" =>
   {"func" => sub {
      my @a=Mail::Address->parse($_[0]);
      return $a[0]->address;
    },
    "args" => 1,
    "return_type" => $TYPE_STRING
   },

   "name_part" =>
   {"func" => sub {
      my @a=Mail::Address->parse($_[0]);
      return $a[0]->name;
    },
    "args" => 1,
    "return_type" => $TYPE_STRING
   }
  );

sub nxchar {
  my ($ctxt)=@_;
  return substr($ctxt->{expr},$ctxt->{evp},1);
}

sub skip_blanks {
  my ($ctxt)=@_;
  my $c;
  my $p=$ctxt->{evp};
  do {
    $c=substr($ctxt->{expr}, $p, 1);
    $p++;
  } while ($c eq " " || $c eq "\t");
  $ctxt->{evp}=$p-1;
}

sub getnum {
  my ($ctxt)=@_;
  my $v;
  my $end=0;
  my $c;
  my $p=$ctxt->{evp};
  do {
    $c=substr($ctxt->{expr}, $p, 1);
    $p++;
    if ($c =~ /[0-9]/) { $v += $c; }
    else { $end=1; }
  } while (!$end);
  $ctxt->{evp}=$p-1;
  push @{$ctxt->{evstack}}, [$v, $TYPE_NUMBER];
}

sub eval_subexpr {
  my ($ctxt,$exprname)=@_;
  if (defined($ctxt->{cache_subexpr}->{$exprname})) {
    return @{$ctxt->{cache_subexpr}->{$exprname}};
  }
  elsif (defined($filter_exprs->{$exprname})) {
    my $evstr=$filter_exprs->{$exprname}->{expr};
    my %subctxt;
    $subctxt{mime_obj}=$ctxt->{mime_obj};
    $subctxt{evp}=0;
    $subctxt{cache} = $ctxt->{cache};
    $subctxt{expr}=$evstr;
    $subctxt{len}=length($evstr);
    if (eval_expr(0, \%subctxt)) {
      my ($pv2) = pop(@{$subctxt{evstack}});
      $ctxt->{cache_subexpr}->{$exprname}=@{$pv2};
      return @{$pv2};
    }
    else {
      print "Error $exprname: ", $subctxt{errstr}, " at character ", $subctxt{evp}+1, "\n";
      exit 1;
    }
  }
  else {
    $ctxt->{errstr}="Unknown expression $exprname";
    return undef;
  }
}

sub getsym {
  my ($ctxt)=@_;
  my $v;
  my $end=0;
  my $c;
  my $p=$ctxt->{evp};
  do {
    $c=lc(substr($ctxt->{expr}, $p, 1));
    $p++;
    if ($c =~ /[a-z_]/) { $v .= $c; }
    else { $end=1; }
  } while (!$end);
  $ctxt->{evp}=$p-1;
  return $v;
}

sub get_op_name {
  my ($ctxt)=@_;
  my $v;
  my $end=0;
  my $c;
  my $p=$ctxt->{evp};
  do {
    $c=substr($ctxt->{expr}, $p++, 1);
    if ($c =~ /[A-Za-z_0-9]/) { $v .= lc($c); }
    else { $end=1; }
  } while (!$end);
  $ctxt->{evp}=$p-1;
  return $v;
}

# parse a string that should be ended by the $endc character
# interpret backslash-style quoting
sub getstring {
  my ($ctxt, $endc)=@_;
  my $end=0;
  my $c;
  my $v;
  my $p=$ctxt->{evp};
  my $l=$ctxt->{len};
  while (!$end) {
    if ($p==$l) {
      $ctxt->{errstr}="Premature end of string";
      $ctxt->{evp}=$p-1;
      return undef;
    }
    else {
      $c=substr($ctxt->{expr}, $p++, 1);
      if ($c eq "\\") {
	if ($p<$l) {
	  $c = substr($ctxt->{expr}, $p++, 1);
	  $v .= $c;
	}
	else {
	  next; # and error
	}
      }
      elsif ($c eq $endc) {
	$end=1;
      }
      else {
	$v .= $c;
      }
    }
  }
  $ctxt->{evp}=$p;
  push @{$ctxt->{evstack}}, [$v,$TYPE_STRING];
  1;
}

sub eval_expr {
  my ($level, $ctxt)=@_;
  my $endexpr=0;

  skip_blanks($ctxt);

  #### operand or unary operator followed by operand ####
  my $c=nxchar($ctxt);
  if ($c eq "(") {
    $ctxt->{npar}++;
    $ctxt->{evp}++;
    if (eval_expr(0, $ctxt) && nxchar($ctxt) ne ")") {
      $ctxt->{errstr}="Unmatched parenthesis";
    }
    $ctxt->{npar}--;
    $ctxt->{evp}++;		# eat ')'
  }
  elsif ($c eq ")") {
    # We're accepting parentheses around an empty content, but only
    # in the context of evaluating function arguments
    my $last_elt=@{$ctxt->{evstack}};
    if ($last_elt>0 && $ctxt->{npar}>0) {
      my $pv=@{$ctxt->{evstack}}[$last_elt-1];
      if (@{$pv}[1] == $TYPE_FUNC) {
	$endexpr=1;
      }
    }
    if (!$endexpr) {
      $ctxt->{errstr}="Unmatched closing parenthesis";
    }
  }
  elsif ($c eq '"' || $c eq "'") {
    $ctxt->{evp}++;
    return undef if (!getstring($ctxt, $c));
  }
  elsif ($c eq "!") {		# logical not
    if ($level <= $PRI_UNARY_NOT) {
      $ctxt->{evp}++;
      if (eval_expr($PRI_UNARY_NOT, $ctxt)) {
	my $pv=pop(@{$ctxt->{evstack}});
	my ($v,$t)=@{$pv};
	push(@{$ctxt->{evstack}}, [!$v,$t]);
      }
    }
    else { $endexpr=1; }
  }
  elsif ($c =~ /^[0-9]$/) {
    getnum($ctxt);
  }
  elsif (lc($c) =~ /^[a-z]$/) {
    my $startp=$ctxt->{evp};
    my $sym=getsym($ctxt);
    if (defined($eval_funcs{$sym})) {
      # function, evaluate later when args are known
      push @{$ctxt->{evstack}}, [$sym, $TYPE_FUNC];
    }
    elsif (defined($eval_unary_ops{$sym})) {
      my $f=$eval_unary_ops{$sym};
      if ($level <= $f->{pri}) {
	if (eval_expr($f->{pri}, $ctxt)) {
	  my $pv=pop(@{$ctxt->{evstack}});
	  my ($v,$t)=@{$pv};
	  my $fn=$f->{func};
	  push(@{$ctxt->{evstack}}, [&$fn($v),$t]);
	}
      }
      else {
	$ctxt->{evp}=$startp;
	$endexpr=1;
      }
    }
    else {
      # sub-expr, evaluate now
      my ($v,$t)=eval_subexpr($ctxt, $sym);
      if (!defined($v)) {
	$ctxt->{evp}=$startp;		# step back to the error
	return undef;
      }
      push @{$ctxt->{evstack}}, [$v,$t] ;
    }
  }
  else {
    # default
    $ctxt->{errstr} = "Parse error";
  }
  return undef if ($ctxt->{errstr});

  #### binary operator and 2nd operand ####
  while (!$endexpr && !defined($ctxt->{errstr})) {
    if ($ctxt->{evp} >= length($ctxt->{expr})) {
      $endexpr=1;
      last;
    }
    skip_blanks($ctxt);
    $c=nxchar($ctxt);
    if ($c eq ")") {
      if ($ctxt->{npar}==0) {
	$ctxt->{errstr}="Unmatched closing parenthesis";
      }
      $endexpr=1;
    }
    elsif ($c =~ /[A-Za-z]/) {
      my $p=$ctxt->{evp};
      my $o=get_op_name($ctxt);
      my $f=$eval_binary_ops{$o};
      if (defined($f)) {
#	print Dumper($f);
	if ($level <= $f->{pri}) {
	  if (eval_expr($f->{pri}, $ctxt)) {
	    my $pv2=pop(@{$ctxt->{evstack}});
	    my $pv1=pop(@{$ctxt->{evstack}});
	    my ($v2,$t2)=@{$pv2};
	    my ($v1,$t1)=@{$pv1};
	    my $fn=$f->{func};
	    my $v=&$fn($v1,$v2);
	    push(@{$ctxt->{evstack}}, [$v, $TYPE_NUMBER]);
	  }
	}
	else {
	  $ctxt->{evp}=$p;
	  $endexpr=1;
	}
      }
      else {
	$ctxt->{evp}=$p;
	$ctxt->{errstr} = "Unknown operator";
      }
    }
    elsif ($c eq "=") {
      if ($level <= $PRI_EQ) {
	$ctxt->{evp}++;
	if (eval_expr($PRI_EQ, $ctxt)) {
#	  print Dumper($ctxt);
	  my $pv2=pop(@{$ctxt->{evstack}});
	  my $pv1=pop(@{$ctxt->{evstack}});
	  my ($v2,$t2)=@{$pv2};
	  my ($v1,$t1)=@{$pv1};
	  push(@{$ctxt->{evstack}}, [($v1 eq $v2)?1:0, $TYPE_NUMBER]);
	}
      }
      else { $endexpr=1; }
    }
    elsif ($c eq "(") {
      # functions
      my $last_elt=@{$ctxt->{evstack}};
      my $pv=@{$ctxt->{evstack}}[$last_elt-1];
      if (@{$pv}[1] == $TYPE_FUNC) {
	$ctxt->{evp}++;
	$ctxt->{npar}++;
	# zero or one-arg only functions until now
	if (eval_expr(0, $ctxt)) {
	  $c=nxchar($ctxt);
	  if ($c ne ")") {
	    $ctxt->{errstr}="Closing parenthesis expected";
	    $endexpr=1;
	  }
	  else {
	    $ctxt->{npar}--;
	    $ctxt->{evp}++;
	    my ($v,$t);
	    if ($last_elt != @{$ctxt->{evstack}}) {
	      # if there is an argument (the stack is deeper than before evaluating
	      # arguments), then pop it
	      my $pv=pop(@{$ctxt->{evstack}});
	      ($v,$t)=@{$pv};
	    }
	    $pv=pop(@{$ctxt->{evstack}});
	    my ($func_name,$tfn)=@{$pv};
	    my $f=$eval_funcs{$func_name};
	    if (defined($f)) {
	      my $fn=$f->{func};
	      $v=&$fn($ctxt,$v,$t);
	      # functions are all prototyped func(string) at the moment
	      push(@{$ctxt->{evstack}}, [$v, $f->{return_type}]);
	    }
	  }
	}
      }
      else {
	$ctxt->{errstr} = "Unexpected open parenthesis";
      }
    }
    elsif ($c eq ".") {
      my $stack_depth=@{$ctxt->{evstack}};
      if ($stack_depth==0) {
	$ctxt->{errstr} = "missing operand at left of composition operator '.'";
	last;
      }
      my $pv=@{$ctxt->{evstack}}[$stack_depth-1];
      if ($level <= $PRI_DOT) {
	$ctxt->{evp}++;
	my $f=getsym($ctxt);
	skip_blanks($ctxt);
	$c=next_char($ctxt);
      }
      else { $endexpr=1; }
    }
    else {
      $ctxt->{errstr} = "Unexpected operator";
    }
  }

  return undef if (defined($ctxt->{errstr}));
  1;
}

sub process_filter_mimeobj {
  my ($filter_expr,$top,$res,$mail_id,$dbh,$exprs)=@_;
  my %h;
  $filter_exprs=$exprs;
  $h{mime_obj} = $top;
  $h{evp}=0;
  $h{errstr}=undef;
  $h{npar}=0;
  $h{expr}=$filter_expr;
  $h{len}=length($filter_expr);
  $h{mail_id}=$mail_id;
  if (eval_expr(0, \%h)) {
    my ($pv2) = pop(@{$h{evstack}});
    my ($v2,$t2)=@{$pv2};
    if (!$v2) {
      $v2=0;
    }
    $$res=$v2;
  }
  else {
    $$res=undef;
    print STDERR "Error $filter_expr: ", $h{errstr}, " at character ", $h{evp}+1, "\n";
    return undef;
  }
  1;
}

sub mimeobj_from_db {
  my ($mail_id,$ctxt)=@_;

  my $sth=$ctxt->{dbh}->prepare("SELECT bodytext FROM body WHERE mail_id=?");
  $sth->execute($mail_id);
  my ($body)=$sth->fetchrow_array;

  $sth=$ctxt->{dbh}->prepare("SELECT lines FROM header WHERE mail_id=?");
  $sth->execute($mail_id);
  my ($headers)=$sth->fetchrow_array;

  my $top = MIME::Entity->build(Encoding => '-SUGGEST',
				Charset => 'iso-8859-1',
				Data => $body);

  for my $hl (split (/\n/, $headers)) {
    $top->head->replace($1, $hl);
  }
  return $top;
}

sub log_filter_hit {
  my ($dbh, $ctxt, $expr_id)=@_;
  if (!exists $ctxt->{filters_hit_count}) {
    my $sth=$dbh->prepare("INSERT INTO filter_log(expr_id, mail_id, hit_date) VALUES(?,?,now())");
    $sth->execute($expr_id, $ctxt->{mail_id});
    $ctxt->{filters_hit_count}=1;
  }
  else {
    # When storing several filter hits for the same context (which
    # means for the same message), each hit_date is one microsecond after
    # the previous one (now() being constant during the transaction).
    # This is a kludge that will allow us later to order by hit_date and get the
    # hits in the order in which they happened instead of having an additional
    # int column that would waste space.
    my $sth=$dbh->prepare("INSERT INTO filter_log(expr_id, mail_id, hit_date) VALUES(?,?,now()+?*cast('0.000001s' as interval))");
    $sth->execute($expr_id, $ctxt->{mail_id}, $ctxt->{filters_hit_count});
    $ctxt->{filters_hit_count}++;
  }
}

1;
