#!/usr/bin/env python
import os
import sys
import zipfile

"""
Take a MaxMind GeoIP database as input and replace A1 entries with the
country code and name of their predecessor iff the preceding (subsequent)
entry ends (starts) directly before (after) the A1 entry and if both
preceding and subsequent entries contain the same country code
information.

Usage:
  python deanonymind.py [GeoIPCountryCSV.zip] [NewGeoIPCountryWhois.csv]
"""
def main():
    (input_file, output_file) = parse_args()
    output_lines = process_input_file(input_file)
    write_output_file(output_file, output_lines)

def parse_args():
    if len(sys.argv) != 3:
        print('Usage: python %s [GeoIPCountryCSV.zip] '
              '[NewGeoIPCountryWhois.csv]' % (sys.argv[0], ))
        sys.exit(1)
    input_file = sys.argv[1]
    if not os.path.exists(input_file) or not input_file.endswith('.zip'):
        print 'Input file "%s" does not exist or is not a .zip file.' % (
                input_file, )
        sys.exit(1)
    output_file = sys.argv[2]
    return (input_file, output_file)

def process_input_file(input_file):
    result_lines = []
    zip_file = zipfile.ZipFile(input_file)
    csv_content = zip_file.read('GeoIPCountryWhois.csv')
    prev_line = None
    a1_lines = []
    for line in csv_content.split('\n'):
        if '"A1"' in line:
            a1_lines.append(line)
        else:
            if len(a1_lines) > 0:
                new_a1_lines = process_a1_lines(prev_line, a1_lines, line)
                for new_a1_line in new_a1_lines:
                    result_lines.append(new_a1_line)
                a1_lines = []
            result_lines.append(line)
            prev_line = line
    if len(a1_lines) > 0:
        new_a1_lines = process_a1_lines(prev_line, a1_lines, None)
        for new_a1_line in new_a1_lines:
            result_lines.append(new_a1_line)
    return result_lines

def process_a1_lines(prev_line, a1_lines, next_line):
    if not prev_line or not next_line:
        # Can't merge first or last line in file.
        print "Can't merge first or last line in file."
        return a1_lines
    if len(a1_lines) > 1:
        # Can't merge more than 1 line at once.
        print "Can't merge more than 1 line at once."
        return a1_lines
    a1_line = a1_lines[0].strip()
    prev_entry = parse_line(prev_line)
    a1_entry = parse_line(a1_line)
    next_entry = parse_line(next_line)
    touches_prev_entry = int(prev_entry['end_num']) + 1 == \
            int(a1_entry['start_num'])
    touches_next_entry = int(a1_entry['end_num']) + 1 == \
            int(next_entry['start_num'])
    same_country_code = prev_entry['country_code'] == \
            next_entry['country_code']
    if touches_prev_entry and touches_next_entry and same_country_code:
        return format_line_with_other_country(a1_entry, prev_entry)
    else:
        return a1_lines

def parse_line(line):
    if not line:
        return None
    keys = ['start_str', 'end_str', 'start_num', 'end_num',
            'country_code', 'country_name']
    stripped_line = line.replace('"', '').strip()
    parts = stripped_line.split(',')
    entry = dict((k, v) for k, v in zip(keys, parts))
    return entry

def format_line_with_other_country(a1_entry, other_entry):
    return ['"%s","%s","%s","%s","%s","%s"' % (a1_entry['start_str'],
            a1_entry['end_str'], a1_entry['start_num'],
            a1_entry['end_num'], other_entry['country_code'],
            other_entry['country_name'], )]

def write_output_file(output_file, lines):
    out_file = open(output_file, 'w')
    out_file.write('\n'.join(lines))
    out_file.close()

if __name__ == '__main__':
    main()

