#!/usr/bin/perl

###########################################################
#
# Licensed Material - Property of IBM
#
# BogoSec - Source Code Security Quality Metric Calculator
#
# (C) Copyright IBM Corp. 2004-2008
#
# FILE   : bogosec
#
# PURPOSE: File to run differnt scanner and analyze their output
#	   to calulate a level of security for the input code target.
#
# HISTORY:
#	07/2004 originated by:
#		Tony Petz (petz.agoston@gmail.com)
#		Dustin Kirkland (dustin.kirkland@us.ibm.com)
#	01/2005 Loulwa Salem (loulwa@us.ibm.com) - add compiler based scanner functionality
#	01/2005 Loulwa Salem (loulwa@us.ibm.com) - Changes to improve code readability.
#	02/2005 Loulwa Salem (loulwa@us.ibm.com) - replaced shell commands with Perl code.
#	03/2005 Loulwa Salem (loulwa@us.ibm.com) - Added timeout option
#	07/2005 Loulwa Salem (loulwa@us.ibm.com) - New option to exclude vulnerabilities
#		Code to keep track of all vulnerability types found
###########################################################

use strict;

# load modules

# needed for proper command line option handling
use Getopt::Long;

# liberal use of temporary files/directories
use File::Temp;

# To use files copy and move operations
use File::Copy;

# execute each scanner in a separate thread
use Thread qw(async);

# module to abtain the absolute path of the target
use Cwd 'abs_path';

# globals and default values
my %OPTS = (
		global_config_file	=>	"/etc/bogosec.conf",
		config_file		=> 	"$ENV{HOME}/.bogosecrc",
		log_dir			=> 	"$ENV{PWD}/",
		logging			=>	undef,
		min_sev			=>  	0,
		no_header_files 	=>	undef,
		plugin_dir 		=> 	"/usr/lib/bogosec/plugins/",
		sev_range_max		=>	10,
		scan_timeout		=>	undef,
		temp_log_dir		=>	"/tmp/",
		exclude_vuln		=>	undef,
		verbosity		=> 	0
	   );
my $TARGET = "";
my $TARGET_FILE = "";
my $DELETE_TARGET = 0;
my @PLUGINS = ();
my %LOGFILES = {};
my %PLUGIN_ARGS = {};
my %RESULTS = {};
my %THREADS = {};
my @ALL_FILES = ();
my %ALL_FILES = {};
my $SCORE = "";
my $SEV_PTS = "";
my @PLUGINS_USED = ();
my $TOTAL_LINES = 0;
my @LIST_OF_FILES = ();
my %STATUS = {};
my $NEW_DIR = "";
my %Vuln = ();
#my $V_list = "";

sub usage() {
# display command line usage information
	print "Usage: bogosec [options] TARGET\n";
	print "OPTIONS:\n";
	print "\t-l\t\t\t\tenable logging\n";
	print "\t--logdir <dir>\t\t\tchange log directory\n";
	print "\t--min-sev <0-10>\t\tchange min sev cutoff\n";
	print "\t--nhf\t\t\t\tdon't scan header files\n";
	print "\t-p <plugin> [args]\t\trun with specified plugin/args\n";
	print "\t--plugin-dir <dir>\t\tuse plugin-dir instead of default\n";
	print "\t--sev-range-max <num>\t\tchange maximum severity range\n";
	print "\t--timeout <num>\t\t\tset cpu timeout in seconds\n";
	print "\t--temp-log-dir <dir>\t\tchange temporary logging directory\n";
	print "\t-v <0|1>\t\t\tenable/disable verbose output\n";
	print "\t--xp <plugin>\t\t\texclude plugin\n";
	print "\t--xv <vuln1:vuln2:...etc>\texclude vulnerabilities\n";
	print "\nplease see manpage for more documentation\n\n";
}

sub round($) {
# simple round-to-nearest-integer function
	if ( ($_[0] - int($_[0])) < 0.5 ) {
		return int($_[0]);
	} else {
		return int($_[0]) + 1;
	}
}

sub check_directories() {
# make sure all directories in %OPTS have a trailing slash
	if ($OPTS{log_dir} !~ m/^.*\/$/) {
		$OPTS{log_dir} .= "/";
	}
	if ($OPTS{plugin_dir} !~ m/^.*\/$/) {
		$OPTS{plugin_dir} .= "/";
	}
	if ($OPTS{temp_log_dir} !~ m/^.*\/$/) {
		$OPTS{temp_log_dir} .= "/";
	}
}

sub parse_config($) {
# overide any defaults with options specified in configuration file
	if (-f $_[0]) {
		open(FH,"<$_[0]") || die("Cannot open config file: $_[0]\n");
		while (<FH>) {
			s/#.*//;
			(my $keyword) = m/^\s*(\w+)/;
			$keyword =~ s/-/_/g;
			if (exists($OPTS{$keyword})) {
				($OPTS{$keyword}) = m/^\s*\w+\s*(.*)$/;	
			} elsif (length($keyword) != 0) {
				print "Error in config file $_[0]: $keyword is not a valid configuration paramter!\n";
			}
		}
	}
}

sub parse_cmd_line() {
# read command line options
	my @plugin_list = ();
	my $vuln_list = "";
	my @exclude_list = ();
	my @omitted_directories = ();
	my $result_cmd_read = GetOptions(\%OPTS,
			'exclude_plugin|exclude-plugin|xp=s' => \@exclude_list,
			'log_dir|log-dir=s',
			'logging|l',
			'min_sev|min-sev|minimum-severity=i',
			'no_header_files|no-header-files|nhf',
			'plugin|p=s' => \@plugin_list,
			'plugin_dir|plugin-dir=s',
			'sev_range_max|sev-range-max=i',
			'scan_timeout|timeout=i',
			'temp_log_dir|temp-log-dir=s',
			'exclude_vuln|exclude-vuln|xv=s',
			'verbosity|v=i',
			);
	if ($result_cmd_read != 1 || $#ARGV != 0) {
		usage();
		exit 1;
	}
# get absolute target path (works if user enters . or ../DIR type targets)
	$TARGET = abs_path($ARGV[0]);

# verify target
	if (! -f $TARGET && ! -d $TARGET) {
		print "Error: target \"$TARGET\" is not a valid file or directory.\n";
		exit 1;
	}
# check if target is a tar.gz or .tgz ball
	$TARGET_FILE = $TARGET;
	if ( ($TARGET =~ /\.tar\.gz$/) || ($TARGET =~ /\.tgz$/) ) {
		$TARGET = unpack_tarball($TARGET);
# check if target is a src.rpm file
	} elsif ( ($TARGET =~ /\.src\.rpm$/) ) {
		$TARGET = unpack_srcrpm($TARGET);
	} elsif ( ($TARGET =~ /\.dsc$/) ) {
		$TARGET = unpack_dsc($TARGET);
	}
# verify plugin dir
	if (! -d $OPTS{plugin_dir}) {
		print "Error: \"$OPTS{plugin_dir}\" is not a valid directory.\n";
		exit 1;
	}
	unshift(@INC, $OPTS{plugin_dir});

# process specified pluggins
	if ($#plugin_list >= 0) {
		foreach (@plugin_list) {
			(my $plugin) = m/^\s*(\w+)/;
			(my $args) = m/^\s*\w+\s*(.*)$/;
			push(@PLUGINS, $plugin);
			$PLUGIN_ARGS{$plugin} = $args;
		}
	} else {
# user did not define a list of plugins to use (default)
		opendir(DH,$OPTS{plugin_dir});
		my @available_modules = readdir(DH);
		foreach my $mod (@available_modules) {
			if ( $mod =~ /\.pm$/ ) {
				$mod =~ s/\.pm$//g;
				push(@PLUGINS, $mod);
			}
		}
		closedir(DH);
	}


# process vulnerabilities exclude list
	#$V_list = $OPTS{exclude_vuln};

# process plugin exclude list (useful if one scanner/plugin is problematic)
	if ($#exclude_list >= 0) {
		foreach (@exclude_list) {
			for (my $i=0;$i<=$#PLUGINS;$i++) {
				if ($PLUGINS[$i] =~ m/^$_$/) {
					splice(@PLUGINS,$i,1);
				}
			}
		}
	}
}

sub unpack_tarball($) {
# unpack archive file to a temp directory
	my $target = shift @_;
	my $new_target = mkdtemp($OPTS{temp_log_dir} . "bogosec.temp_target.XXXXXX");
	`tar -zxvf $target -C $new_target`;
	if ( $? != 0 ) {
		print("Error: unable to extract $target\n");
		exit 1;
	}
	$DELETE_TARGET = 1;
	return $new_target;
}

sub unpack_srcrpm($) {
# unpack src.rpm file to temp directory, rebuild it and apply the patches
	my $target = shift @_;
	my $spec_file = "";
	$NEW_DIR = mkdtemp($OPTS{temp_log_dir} . "bogosec.temp_rpm.XXXXXX");
# only BUILD directory needs to be created, SOURCES and SPECS are created automatically
	mkdir($NEW_DIR . "/BUILD", 0700);

# Backup ~/.rpmmacros file if it exists, then create a new file to make the rpm mechanism 
#	put its output in the specified directory rather than the default location
	if (-e"$ENV{HOME}/.rpmmacros") {
		move("$ENV{HOME}/.rpmmacros", "$ENV{HOME}/.rpmmacros.save");
	}
	open(FH, ">$ENV{HOME}/.rpmmacros") || die "Cannot open ~/.rpmmacros \n";
	print FH "%_topdir $NEW_DIR";
	close(FH);

# Install the target using rpm, then use rpmbuild to only execute the %prep section
# 	of spec file (this unpacks the source to BUILD and applies the patches).
	`rpm -ivh $target`;
	
# This following block of code eliminates the . and .. from the directory listing to ensure 
#	they do not cause any problems in obtaining the spec filename.
	opendir(DH,"$NEW_DIR/SPECS");
	while (my $entry = readdir(DH)) {
		if ($entry =~/^[\.]{1,2}$/) {
			next;
		}
		else {
			$spec_file = $entry;
		}
	}
	closedir(DH);

	chomp($spec_file);
	`rpmbuild -bp $NEW_DIR/SPECS/$spec_file 2>/dev/null`;
	if ( $? != 0 ) {
		print "Error: rpmbuild did not execute correctly\n";
		move("$ENV{HOME}/.rpmmacros.save", "$ENV{HOME}/.rpmmacros");
		`rm -rf $NEW_DIR`;
		exit 1;
	}
	my $new_target = $NEW_DIR. "/BUILD";

# Restore original ~/.rpmmacros
	if (-e"$ENV{HOME}/.rpmmacros.save") {
		move("$ENV{HOME}/.rpmmacros.save", "$ENV{HOME}/.rpmmacros");
	}
	$DELETE_TARGET = 1;
	return $new_target;
}

sub unpack_dsc($) {
# unpack Debian source to a temp directory
	my $target = shift @_;
	my $new_target = mkdtemp($OPTS{temp_log_dir} . "bogosec.temp_target.XXXXXX");
	`dpkg-source -x $target "$new_target/src" 2>/dev/null`;
	if ( $? != 0 ) {
		print("Error: unable to extract $target\n");
		exit 1;
	}
	$DELETE_TARGET = 1;
	return $new_target;
}

sub severity_stats() {
# extended analysis routine
# somewhat limited, as this requires severities 0-10 (default)
	print "\nScanner hits break down by severity level\n";

#			  0 1 2 3 4 5 6 7 8 9 10
	my @severities = (0,0,0,0,0,0,0,0,0,0,0);
	my $total_sev = 0;
	foreach my $file (@ALL_FILES) {
		for (my $i=0; $i<$#{$ALL_FILES{$file}}; $i++) {
			if (defined($ALL_FILES{$file}[$i])) {
				my $avg = 0;
				my @nums = split(" ",$ALL_FILES{$file}[$i]);
				foreach (@nums) {
					$avg += $_;
				}
				$avg = $avg/($#nums + 1);
				$avg = round($avg);
				@severities[$avg]++;
			}
		}
	}
	foreach (@severities) {
		$total_sev += $_;
	}
	for (my $i=1; $i<=10; $i++) {
		my $bar = ($severities[$i]/$total_sev) * 50;
		$bar = round($bar);
		print "[$i]	|";
		for (my $j=0; $j<$bar; $j++) {
			print "x";
		}
		print "($severities[$i])\n";
	}
	print "\n";
}


# useful test/debug subroutines
# -----------------------------
sub dump_opt() {
	print "\n\nOPTS\n\n";
	while (my ($k, $v) = each %OPTS) {
		print "$k : $v\n";
	}
	print "\plugins\n----\n";
	print "@PLUGINS\n....................\n";
}

sub verify_data_struct() {
	print "verify data structure...\n--------------------\n";
	foreach my $plugin (@PLUGINS) {
		print " * Analysis by $plugin *\n";
		foreach my $result (@{$RESULTS{$plugin}}) {
			print "$result\n";
		}
	}
}

sub verify_analysis() {
	print "verifying analysis data structures...\n-------------------\n";
	print "\@ALL_FILES contains:\n";
	foreach (@ALL_FILES) {
		print;
		print " ";
	}
	print "\n";
	foreach my $file (@ALL_FILES) {
		print "\$ALL_FILES{$file} = $ALL_FILES{$file}\n";
		for (my $i = 0; $i < $#{$ALL_FILES{$file}}; $i++) {
			if (defined($ALL_FILES{$file}[$i])) {
				print "[$i] : $ALL_FILES{$file}[$i]\n";
			}
		}
	}
}
# -----------------------------
# end useful test/debug subroutines


###################
#    MAIN BODY    #
###################

# Load defaults, configuration file, command line options
parse_config($OPTS{global_config_file});
parse_config($OPTS{config_file});
parse_cmd_line();
check_directories();

# check scanner plugin list
if ($#PLUGINS < 0) {
	print "Error: no plugins defined (or no plugins found in directory \"$OPTS{plugin_dir}\")\n";
	exit 1;
}
# load scanner plugins
foreach my $mod (@PLUGINS) {
	eval "use $mod;";
}

# Run each scanner multithreaded
foreach my $plugin (@PLUGINS) {
	my $scanner = eval("$plugin\:\:name();");
	@LIST_OF_FILES = ();
	if (eval("$plugin\:\:exists();")) {
		my $path = $scanner;
		my $default_args = eval("$plugin\:\:get_args();");
		(my $FH, $LOGFILES{$plugin}) = mkstemp($OPTS{temp_log_dir} . "bogosec.$scanner.log.XXXXXX");
		if ($OPTS{no_header_files}) {
			@LIST_OF_FILES = `find $TARGET | egrep -i '(\\.c\$|\\.cpp\$|\\.c\\+\\+\$)'`;
		} else {
			@LIST_OF_FILES = `find $TARGET | egrep -i '(\\.c\$|\\.cpp\$|\\.c\\+\\+\$|\\.h\$)'`;
		}
		$THREADS{$plugin} = async {
			my $compiler_based = eval("$plugin\:\:does_compile();");
			print("Running $scanner...\n");
# if scanner is compiler based, then use the run() function in the plugin
			if ($compiler_based) {
				eval("$plugin\:\:run(\$LOGFILES{\$plugin},\$TARGET_FILE,\$TARGET);");
			}
			else {
# else execute the static analysis scanners on each file
# following three lines will print the command running on files at the top of each scanner log file.
				open(FH, ">$LOGFILES{$plugin}") || die "Cannot open $LOGFILES{$plugin}\n";
				print FH "COMMAND : $path $default_args <file_name>\n";
				close(FH);
				foreach my $file (@LIST_OF_FILES) {
					chomp($file);
# if user defined a timeout option, set cpu time limit then run scanners. Else run scanners without limit
					if (defined($OPTS{scan_timeout})) {
						`ulimit -t $OPTS{scan_timeout}; $path $default_args $file >> $LOGFILES{$plugin} 2>&1`;
					} else {
						`$path $default_args $file >> $LOGFILES{$plugin} 2>&1`;
					}
					if ( $? != 0 ) {
						print("Warning: $scanner failed with error code ( $? ) on file $file\n");
					}
				}
			}
# ";" on following line is required by async().
		};
		push(@PLUGINS_USED, $plugin);
	}
}

foreach my $plugin (@PLUGINS_USED) {
	$STATUS{$plugin} = $THREADS{$plugin}->join;
	$RESULTS{$plugin} = eval("$plugin\:\:analyze(\$LOGFILES{\$plugin}, \$TARGET, \$OPTS{sev_range_max}, \$OPTS{exclude_vuln});");
# if logging option specified, then copy log files to the log directory first.
	if (defined($OPTS{logging})) {
		my $logfile = $OPTS{log_dir} . eval("$plugin\:\:name();") . ".log";
		`cp $LOGFILES{$plugin} $logfile`;
	}
	unlink($LOGFILES{$plugin});
}


# parse analysis
foreach my $plugin (@PLUGINS_USED) {
	my $this_score = 0;
	my $this_lines = shift @{$RESULTS{$plugin}};
	if (!defined($this_lines)) {
		$this_lines = 0;
		foreach my $file (@LIST_OF_FILES) {
			chomp($file);
			# BUG: we should use sloccount here!
			$this_lines += `wc -l $file`;
		}
	}
	if ($STATUS{$plugin} eq "Error") {
		print "Error: $plugin encountered a problem and will be excluded from final calculation\n";
		$this_lines = 0;
	}
	$TOTAL_LINES += $this_lines;
	foreach my $result (@{$RESULTS{$plugin}}) {
		my ($file, $line, $score, $vulnerability) = split(/:/, $result);
		if ($score >= $OPTS{min_sev}) {
			if (!exists($ALL_FILES{$file})) {
				push(@ALL_FILES, $file);
			}
			$ALL_FILES{$file}[$line] .= "$score ";
			if ($STATUS{$plugin} eq "Error") {
				$score = 0;
			}
			$SEV_PTS += $score;
			$this_score += $score;
		}
		$Vuln{$vulnerability}++;
	}
	print(eval("$plugin\:\:name();") . "\n  " . round($this_score) . " points\n  $this_lines lines\n");
}

my @vuln = split(/:/, $OPTS{exclude_vuln});
if ($#vuln >= 0) {
	print "\nVulnerabilities excluded are: \n"; 
	foreach (@vuln) {
		print "$_ ";
	}
	print "\n";
}

print "\nDetailed Vulnerability Count:\n";
my @key = keys %Vuln;
my @value = values %Vuln;
while(@key) {
	print "\t", pop(@key), '=', pop(@value), "\n"; 
}

# If user scanned tar.gz or src.rpm file, delete temp target
if ($DELETE_TARGET) {
	`rm -rf $TARGET`;
	`rm -rf $NEW_DIR`;
}

# Put conditional to avoid divide by zero case
# calculate final score
if ( $TOTAL_LINES != 0 ) {
	$SCORE = $SEV_PTS/$TOTAL_LINES;
} else {
	$SCORE = 0;
}

# more verbose output
if ($OPTS{verbosity} > 0) {
	if ($OPTS{sev_range_max} != 10) {
		print "\nNOTE: Extra verbosity cannot be used when sev_range_max is modified\n\n";
	} else {
		severity_stats();
	}
}

# debug calls...
# verify_analysis();
# verify_data_struct();
# dump_opt();

print(">>> Using scanners: (");
foreach my $plugin (@PLUGINS_USED) {
	print(eval("$plugin\:\:name();") . " ");
}
print ")\n";
print(">>> " . round($SEV_PTS) . " total severity points\n");
print ">>> " . $TOTAL_LINES . " total lines of code scanned\n";
print ">>> final score = $SCORE\n";
exit 0;
