#!/usr/bin/perl
#
# This program tries to screen-scrape one of SourceForge's intolerable
# bug report lists and extract usable information.
#
# To use, get a bug listing (for example, from 
#     https://sourceforge.net/tracker/?group_id=248804&atid=1126676
# ) and save it to an HTML file. Then pass the HTML file through this program.
#
# 20090330 MJD
#

use Time::Local;

# Skip preamble; I hope this is reliable
while (<>) {
    last if /^Permalink/;
}

while (1) {
    my @buf ;
    my $line;
    my ($id, $desc, $status, $opened, $assignee, $submitter, $priority);
    do {
	push @buf, scalar <>;
	last if eof; 
	$line = join "", @buf;
#	$line =~ s/<.*?>//g;
#	$line =~ tr/\n/\t/;
	trim($line);

	($id, $desc, $status, $opened, $assignee, $submitter, $priority) =
           $line =~
	   m/(\d{7})\s+     # ID number is terminated by spaces
             ([^\n]*)       # the description then continues to EOL
	     .*?            # then a load of garbage
             (Open|Closed)\b# and fortunately the status is easy to 
                            # recognize so we can resynchronize on it
             \s+ (\d\d\d\d-\d\d-\d\d)  # Then the submitted date
             \s+ (\w+)                 # assignee
             .*? \t         # Then a *huge* load of garbage
             (\w+) .*?      # Submitter, and another huge load
             \s+ (\d) \b    # Priority is a single digit, 
                            #  surrounded by WS or EOL
                            # and thank God we are out of that garbage dump
                            # Fuck you, SourceForge
	    /sx
    }  until $priority =~ /^\d$/;

    my ($suby, $subm, $subd) = split /-/, $opened;
    my $date = timelocal(0,0,9,$subd,$subm-1,$suby-1900);
    my $start_of_this_week = start_of_week(time());
    my $start_of_last_week = start_of_week(time() - 7 * 86400);

    if ($priority < 5) {
	$low_priority++;
	next;
    }
    print qq{#$id  "$desc"\n};
    print qq{          Assigned to $assignee\n} if $assignee ne "nobody";
    print qq{          (New since last week)\n} if $date >= $start_of_last_week;
    print qq{\n\n};
    $high_priority++;
}

if ($high_priority) {
    print "\nTotal open bugs to fix before beta: $high_priority.\n";
}

if ($low_priority) {
    print "\nThere are also $low_priority open low-priority bugs.\n";
}

sub trim {
    for (@_) {
	s/^\s+//;
	s/\s+$//;
    }
}

sub show {
    my $s = shift;
    $s =~ s/\t/>>/g;
    $s =~ s/ /./g;
    $s =~ s/\n/\$\n/g;
    return $s;
}

# given an epoch time, return the epoch time of the most recent
# preceding Monday 9 am (ET) 
sub start_of_week {
    my $t = shift;
    
    # Back up a day at a time until $t points to Monday
    $t-- until (localtime($t))[6] == 1;
    
    # Extract date of $t, combine with time=09:00:00
    return timelocal(0,0,9, (localtime($t))[3,4,5]);
}
