#! /usr/bin/perl -w
#
# This script can anonymize IP addresses in our text files and pcap captures.
# The IP mapping table can be saved out, and be reused & augmented in future
# runs. 

use Net::IP;
use Net::Pcap;
use FileHandle;
use Getopt::Long;
use strict;

my ($opt_read_pcap, $opt_read_text, $opt_write_pcap,
    $opt_write_text, $opt_read_map, $opt_write_map);

my $err = ""; # Error string a la pcap
my %rules; # Individual src -> dst rules
my %default; # The default range

# The range mappings specified by the user. Key is string representing
# the source range, value is reference to hash, see add_mapping() for contents.
my %mappings;

# Temporary storage for the command-line mappings
my %cli_mappings; 
my %cli_default;

my $inc = new Net::IP("0.0.0.1");

sub help
{
    print("USAGE: anon --read-pcap=<file> --write-pcap=<file>\n" .
	  "            --read-text=<file> --write-text=<file>\n" .
	  "            --read-map=<file> --write-map=<file>\n" .
	  "            --map=<from-prefix,to-prefix ...\n" .
	  "\n" .
	  "anon takes a pcap dump file or a text file (or both)\n" .
	  "as inputs and renumbers the IP addresses therein, subject\n" .
	  "to the specifications given at the command line. The resulting\n" .
	  "mapping ruleset can be written to a file, for later reuse or to\n" .
	  "consult the mapping.\n" .
	  "\n" .
	  "Renumbering is currently sequential, i.e., any previously unencountered\n" .
	  "IP address is mapped to the next unused destination address in the\n" .
	  "address's destination range.\n" .
	  "\n" .
	  "  --read-pcap=<file>        Pcap input file.\n" .
	  "  --write-pcap=<file>       Pcap output file.\n" .
	  "  --read-text=<file>        Text input file.\n" .
	  "  --write-text=<file>       Text output file.\n" .
	  "  --read-map=<file>         Mapping ruleset to use. When specifying both\n" .
	  "                            a mapping ruleset file and mappings on the\n" .
	  "                            command line, the command line ones augment\n" .
	  "                            and potentially replace the file ones.\n" .
	  "  --write-map=<file>        Save resulting mapping ruleset to file.\n" .
	  "  --map=<prefix[,prefix]>   A mapping rule. IP addresses matching the\n" .
	  "                            first prefix are translated into the second\n" .
	  "                            prefix. If only a single prefix is given,\n" .
	  "                            it becomes the default destination range.\n" .
	  "                            Can be specified repeatedly.\n" .
	  "\n" .
	  "Examples:\n" .
	  "\n" .
	  "\$ anon --read-pcap=in.trace --write-pcap=out.trace --write-map=map.txt\n" .
	  "        --map=192.168/16,10/8 --map=127.0.0.0/8\n" .
	  "\n" .
	  "Reads pcap file in.trace and writes anonymized version to out.trace.\n" .
	  "The resulting mapping ruleset is written to map.txt. The 192.168.0.0/16\n" .
	  "range is mapped into 10.0.0.0/8, everything else into 127.0.0.0/8.\n");
}

sub add_mapping
{
    my ($name, $input) = @_;
    my ($net1, $net2);
    my $mapping = { };

    if ($input =~ /(.+),(.+)/) {
	$net1 = new Net::IP($1) || die("$1 is not a valid IP address range.");
	$net2 = new Net::IP($2) || die("$2 is not a valid IP address range.");	

	$mapping->{'src'} = $net1;
	$mapping->{'dst'} = $net2;
	$mapping->{'idx'} = $net2->binadd($inc);
	$mappings{$net1->print()} = $mapping;
    } else {
	$net2 = new Net::IP($input)
	    || die("$input is not a valid IP address range.");
	
	$cli_default{'dst'} = $net2;
	$cli_default{'idx'} = $net2->binadd($inc);
    }    
}

sub load_map
{
    return unless ($opt_read_map);

    my $map = new FileHandle($opt_read_map, "r") || die("Could not read map $opt_read_map");
   
    while (<$map>) {
	# Check for mapping & default specs, otherwise it's a direct IP -> IP rule.
	if (/map (\S+) -> (\S+) @ (\S+)/) {
	    my $mapping = { };
	    $mapping->{'src'} = new Net::IP($1) || die("$1 is not a valid IP address range.");
	    $mapping->{'dst'} = new Net::IP($2) || die("$2 is not a valid IP address range.");	
	    $mapping->{'idx'} = new Net::IP($2) || die("$3 is not a valid IP.");
	    $mappings{$mapping->{'src'}->print()} = $mapping;
	} elsif (/default (\S+) @ (\S+)/) {
	    $default{'dst'} = new Net::IP($1) || die("$1 is not a valid IP address range.");
	    $default{'idx'} = new Net::IP($2) || die("$2 is not a valid IP.");
	} elsif (/(\d+\.\d+\.\d+\.\d+)\s+(\d+\.\d+\.\d+\.\d+)/) {
	    $rules{$1} = $2;
	}
    }
}

sub save_map
{
    return unless ($opt_write_map);

    my $map = new FileHandle($opt_write_map, "w") || die("Could not save map $opt_write_map");

    # Print all mapping specs.
    foreach my $src (keys %mappings) {
	my $mapping = $mappings{$src};
	$map->print("map " . $mapping->{'src'}->print() .
		    " -> " . $mapping->{'dst'}->print() .
		    " @ "  . $mapping->{'idx'}->ip() . "\n");
    }
    
    # Print default mapping, if specified.
    if (exists($default{'idx'})) {
	$map->print("default " . $default{'dst'}->print() .
		    " @ "  . $default{'idx'}->ip() . "\n");
    }
    
    # Print actual IP -> IP rules.
    my ($src, $dst);
    while (($src, $dst) = each(%rules)) {
	$map->printf("%-15s   %s\n", $src, $dst);
    }    
}

sub merge_mappings
{
    # Paste CLI mappings over file-specified ones.
    foreach my $src (keys %cli_mappings) {
	$mappings{$src} = $cli_mappings{$src};
    }

    if (exists ($cli_default{'idx'})) {
	$default{'dst'} = $cli_default{'dst'};
	$default{'idx'} = $cli_default{'idx'};
    }
}

sub subst_ip
{
    my ($ipstr) = (@_);
    
    # Don't map a few special ones.
    return "0.0.0.0" if ($ipstr eq "0.0.0.0");
    return "255.255.255.255" if ($ipstr eq "255.255.255.255");
    
    # If we have a mapping, use it.
    if (exists($rules{$ipstr})) {
	return $rules{$ipstr};
    }
    
    # Otherwise, see whether we can find a configured mapping that fits.
    my $ip = new Net::IP($ipstr);
    
    foreach my $src (keys %mappings) {
	my $mapping = $mappings{$src};
	if ($mapping->{'src'}->overlaps($ip) == $IP_B_IN_A_OVERLAP) {
	    $mapping->{'idx'} = $mapping->{'idx'}->binadd($inc);
	    $rules{$ipstr} = $mapping->{'idx'}->ip();
	    return $rules{$ipstr};
	}
    }
    
    # If all fails, use the default mapping.
    $default{'idx'} = $default{'idx'}->binadd($inc);
    $rules{$ipstr} = $default{'idx'}->ip();
    
    return $rules{$ipstr};
}

sub process_packet
{
    my ($meta, $header, $packet) = @_;
    my $off_src = $meta->{'ipoff'} + 12;
    my $off_dst = $meta->{'ipoff'} + 16;

    # For DLT_RAW/NULL we assume all is IP, for Ethernet
    # we use different offsets in case of ARP:
    if ($meta->{'dlt'} == DLT_EN10MB && unpack("n", substr($packet, 12, 2)) == 0x0806) {
	$off_src = 28; $off_dst = 38;
    }
    
    my ($s1, $s2, $s3, $s4) = unpack("CCCC", substr($packet, $off_src, 4));
    my ($d1, $d2, $d3, $d4) = unpack("CCCC", substr($packet, $off_dst, 4));    
    my $src = subst_ip("$s1.$s2.$s3.$s4");
    my $dst = subst_ip("$d1.$d2.$d3.$d4");       
    
    $src =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)/;
    substr($packet, $off_src, 4) = pack("CCCC", ($1,$2,$3,$4));
    
    $dst =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)/;
    substr($packet, $off_dst, 4) = pack("CCCC", ($1,$2,$3,$4));
  DUMP:
    Net::Pcap::dump($meta->{'dumper'}, $header, $packet);    
}

sub process_pcap
{
    return unless ($opt_read_pcap && $opt_write_pcap);

    my $pcap = Net::Pcap::open_offline($opt_read_pcap, \$err) || die("Could not open $opt_read_pcap");
    my $dumper = Net::Pcap::dump_open($pcap, $opt_write_pcap) || die("Could not save $opt_write_pcap");

    my %meta;
    $meta{'pcap'} = $pcap;
    $meta{'dumper'} = $dumper;
    $meta{'dlt'} = Net::Pcap::datalink($pcap);

    # Figure out offset to IP header. XXX add more --cpk
    if (Net::Pcap::datalink($pcap) == DLT_EN10MB) {
	$meta{'ipoff'} = 14;
    } elsif (Net::Pcap::datalink($pcap) == DLT_RAW ||
	     Net::Pcap::datalink($pcap) == DLT_NULL) {
	$meta{'ipoff'} = 0;
    } else {
	die("Unsupported DLT " . Net::Pcap::datalink($pcap) . ", sorry.");
    }
    
    Net::Pcap::loop($pcap, -1, \&process_packet, \%meta);
}

sub process_text
{
    return unless ($opt_read_text && $opt_write_text);

    my $in = new FileHandle($opt_read_text, "r") || die("Could not read text file $opt_read_text");
    my $out = new FileHandle($opt_write_text, "w") || die("Could not write text file $opt_write_text");

    while (<$in>) {
	s/(\d+\.\d+\.\d+\.\d+)/subst_ip($1)/eg;
	$out->print($_);
    }
}

GetOptions("read-pcap=s" => \$opt_read_pcap, "write-pcap=s" => \$opt_write_pcap,
	   "read-text=s" => \$opt_read_text, "write-text=s" => \$opt_write_text,
	   "read-map=s" => \$opt_read_map, "write-map=s" => \$opt_write_map,
	   "map=s" => \&add_mapping,
	   "help" => \&help);

load_map();
merge_mappings();

die("Need at least a default mapping region (see --map)")
    unless (exists($default{'idx'}) || $opt_read_map);

process_pcap();
process_text();
save_map();
