#!/usr/bin/perl -w
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 
# scan_watch -- 
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 



=head1 NAME

slowscan -- Script to extract slow scans from archive argus logs

=head1 SYNOPSIS

    C<slowscan [<options>]>

  C<-D> <directory>-- Directory where data is held
  C<-F> <filename> -- File containing config (including filter)
  C<-s> <filename> -- Store history to this filename
  C<-S>            -- only print summary to stdout
  C<-l> <filename> -- Load history from this filename
  C<-o> <filename> -- Output to filename
  C<-d> <days>     -- Discard records that are Days old
  C<-c> <date>     -- date of Current batch (today, yesterday, 31/12/99)
                      defaults to 'yesterday'
  C<-q>            -- quiet, don't print the input file names

=head1 Examples

  I have a crontab entry:
  C<0 1 * * *       cd sw;scan_watch -q  -s history -l history  -d 7>

=head1  DESCRIPTION

Slowscan is an experimental script which is designed to pick up very
slow scans from archived argus logs.  It uses Argus::Archive to access
the data, so this module must be configured to match your archive
layout for slowscan to work properly

=cut

use strict;
use Data::Dumper;
use Getopt::Std;
use Date::Manip;

#^# use lib "$Argus::Home/lib";
use lib "/home/argus/lib";

use Argus;
use Argus::SlowScan;
use Argus::Archive;

#
# we use the times in the file names for two purposes, firstly to
# identify the file from which observatiOns were drawn and secondly as
# a crude time stamp for pruning record from the history.  If this
# causes problems then it is easy enough to carry both the file name
# and the timestamp from the first (or last) record in the file.
#

use vars qw($opt_d $opt_s $opt_l $opt_t $opt_o $opt_c $opt_D $opt_q 
	    $opt_F  $opt_S $VAR1);

getopts("F:D:s:d:l:c:o:qS") || die "Invalid options found";

my ( $config, $filter);

if( defined $opt_F ) {  # user specified ra config file
   $config = $opt_F;
   $filter = '';   # get filter from config file
} else {  # defaluts
   $config = "$Argus::Home/lib/ra.conf";
   $filter = $Argus::SlowScan::RA_Filter;
}

########################################################## 


$opt_c = 'yesterday' if( ! defined $opt_c );

my $date =ParseDate($opt_c) or die "Invalid date '$opt_c'";

my $today = UnixDate($date, '%Y.%m.%d');
my ( $err);
my $discard =  UnixDate(DateCalc($date,"- $opt_d days", \$err),
		     '%Y.%m.%d.%H.%M') if defined $opt_d;
my $idle =  UnixDate(DateCalc($date,"- $Argus::SlowScan::Drop_int days",
			   \$err),  '%Y.%m.%d.%H.%M');

Argus::Archive::Set_Archive_Directory( $opt_D ) if defined $opt_D;

my @rec; 
my %src;
my %proto;

if( defined $opt_l) {  # load history file
    require "$opt_l";
    foreach my $ip ( keys %$VAR1 ) { # populate the %src structure
	$src{$ip} = $VAR1->{$ip};
	next if ! defined $discard;   # and prune it if necessary
	foreach my $time ( keys %{$src{$ip}} ) {
	    if ($time lt $discard ) {
		delete $src{$ip}->{$time};   # trim records
	    }
	}
    }
}

my @files = Argus::Archive::Get_File_List($date);


foreach my $fn ( @files ) {   # process each file
    my $xfile;

    my ($time) = $fn =~ /([.m0-9]+\d)\.gz/;  # extract time from fn
    print "$fn\n" if ! defined $opt_q;

    die "Can't read $fn:$!" if( ! -r $fn);

    open(SW, "$Argus::Client_path/ra -F $config -I -AZs -r $fn -c $filter |")
        || die "can't open ra:$!";

    while (<SW>) {
	my ($frag, $icmp, $local_src) = (0, 0, 0);

	chomp;

	my ( $timestmp, $usec, $flags, $proto, $src,  $srcp, $sym, $dst,
	     $dstp, $top, $frp, $tbytes, $fbytes, $state) = split(/\t/, $_);

       if( $frag = $flags =~ /f/i ) {
           $dstp = $srcp = 0 if $flags =~ /F/;
	}

	$local_src = $src =~ /$Argus::Local_IP_re/o;

	next if( defined $Argus::SlowScan::Ignore{$src} &&
		 $Argus::SlowScan::Ignore{$src} eq $dst );

	if( $proto eq 'icmp' ) {
           if( $state =~ /^ECR/ and ! $local_src ) {
	      $dst .= ".icmp-$state";
	   } else {  # ignore non ping and local
	      next;
	   }
        }elsif( $proto eq 'udp' or $proto eq 'rtp' ) {
	    
	    next if ( $frp != 0  or # bidirectional traffic
			# undirectional flows reasonable number of packets
 		      ($top > 10 and !$frp) or
	              ($dstp > 33400 and $dstp < 33600 ));  # traceroutes
	    next if $Argus::SlowScan::NoNB_Name and $dstp == 137;
	    $dst .=  ".udp-$dstp";
	}elsif( $proto eq 'tcp' ) { # ignore detached FINs to HNP and 
	    next if ( $state =~ /^R/ ||  # stray RSTs
                      ( $dstp >= 1024 and  $srcp < 1024 and
                        ($state eq 'F' or $state eq 'FR')) ||
		       $tbytes != 0         # received data
		     );
	    next if ( ! $Argus::SlowScan::NoGnuttela  and
		      $local_src and $dstp == 6346 );  # gnutella 
	    if( ! $dstp  ) { ######################
		$dst .= '.tcp-frag';
	    } else {
		$dst .=  ".tcp-$dstp";
	    }
	}elsif( $proto eq 'ip' ) { # frags
           $dst = '0-frag';
        } else {
            $proto{$proto}++;
	    next;
        }

	if( ! exists $src{$src} ) { # seen this source address before?
	    $src{$src} = {};
	}
	if( ! exists $src{$src}->{$time} ) { #in this time interval?
	    $src{$src}->{$time} = {};
	}
	if( ! exists $src{$src}->{$time}->{$dst} ) { #for this dest?
	    $src{$src}->{$time}->{$dst} = 1;
	} else {
	    $src{$src}->{$time}->{$dst}++;
	}
    }
}

my $reportf;
my $report_open = 0;

# Report on what we found

# what do we call the report file?

if( defined $opt_o ) { # explicitly specified
    $reportf = $opt_o;
} elsif ( defined $today ) { # use date
    $reportf = "report-$today";
} else {                     # use default
    $reportf = 'report';
}

foreach my $p (keys %proto ) { 
   print "Protocol $p -- count $proto{$p}\n";
}

print scalar keys %src, "\n";

HOST:
foreach my $ip ( sort Argus::cmp_ip  keys %src ) { # each host

    my @times = sort {$b cmp $a} keys %{$src{$ip}};
    my $t = scalar @times;
    my $f = 0;
    my $tot = 0;
    my %dst_count;  # number of times for each dst.
    my $summ = 0;

# has this site 'timed out' ?

    if( defined $idle && ($t == 0 || $times[0] lt $idle ))
    { # no activity for time out period
	delete $src{$ip};
	next;
    }

    my %ports;

    foreach my $time (@times) {  # count number of local host involved
	if( ref $src{$ip}->{$time} eq 'HASH') {
	    foreach my $dst (keys %{$src{$ip}->{$time}} ) {
	if ( ! exists $dst_count{$dst} ) {
		    $dst_count{$dst} = $src{$ip}->{$time}->{$dst};
		} else { 
		    $dst_count{$dst} += $src{$ip}->{$time}->{$dst};
		}
		$f += $src{$ip}->{$time}->{$dst} if ($dst =~ /\.0$/);
		if( $dst =~ /\.(\w+-\w+)$/ ) {
		    $ports{$1} = 1;
		}
	    }
	}
	else {
            my ($n) = $src{$ip}->{$time} =~ /^(\d+)/;
	    $summ += $n;
	}
    }


    my $ports = (scalar keys %ports) <= 2 ? join(',', keys %ports) : '';

    my $n = (keys %dst_count) + $summ;

# this controls the reporting threshold

    next unless Argus::SlowScan::report_this( $ip, $t, $n );

    next if( defined $today && $times[0] lt $today);  # no new data

    my $dn = gethostbyaddr(pack("C4",split(/\./,$ip)),2) || '';

# print a summary
    report( "\n$dn","[$ip] -- hosts $n, times $t, frags $f $ports\n");

# some 'noisy' host just get the one line summary above...

    if( defined @Argus::SlowScan::Summary ) {
	foreach my $pat  (@Argus::SlowScan::Summary) {
	    next HOST if "$dn $ip" =~ /$pat/;
	}
    }

# print detail for each time period...

    foreach my $time (@times) {
	my ($date) = $time =~ /^(\d{4}\.\d\d\.\d\d)\./ ;
	report( "\tfile: data/$date/argus-$time.gz\n" );
	if( ref $src{$ip}->{$time} ne 'HASH') { # has been summarised...
	   report("\t\t****$src{$ip}->{$time}\n");
	} else {
	   my $hosts = scalar keys %{$src{$ip}->{$time}};
	   my $Desc;
	   my $lines = Argus::list_ips (undef, \$Desc,
			     [sort Argus::cmp_ip keys %{$src{$ip}->{$time}}]);
	   if ( ref ($src{$ip}->{$time}) &&
             (scalar @$lines) < $Argus::SlowScan::Maxlines ) {
	       report( "\t\t", join("\n\t\t", @$lines), "\n");
	   }
	   else { # Don't try and print big scans...
	       $hosts = $src{$ip}->{$time} if ! defined $hosts;
	       report( "\t\t**** $hosts hosts $Desc\n" );
	       undef $src{$ip}->{$time};
	       $src{$ip}->{$time} = "$hosts hosts $Desc" ;
	   }
	}
    }
}

# use datadumper to write out the history data structure ready for
# next run.

if(defined $opt_s) {   # Store data
    $Data::Dumper::Indent = 0;
    
    if( -f $opt_s ) { rename $opt_s, "$opt_s.old" }

    open(DUMP, ">$opt_s") || die "Can't open $opt_s:$!";
    print DUMP Dumper(\%src);
    close (DUMP)
}

exit (0);


sub report {

    if(! $report_open) {
	open(REPORT, ">$reportf") || die "Can't open '$reportf':$!";
	$report_open = 1;
    }

    print REPORT @_;
    print @_ unless defined $opt_S and $_[0] =~ /^\t/;
}












