#!/usr/bin/perl

## Turns an HTML table into a tab-delimited table
##
##  usage: html2tab [-c N] < input > output
##
## Used for manually building USPS rate tables from
## their HTML versions.

use Getopt::Std;

getopts('c:');

undef $/;
my $data = <>;

my $max = $opt_c - 1;

$opt_c = undef if $max < 1;

while($data =~ s{<tr.*?>(.*?)</tr>}{}is) {
	my $row = $1;
	my @cells;
	my $i = 0;
	while($row =~ s{<td.*?>(.*?)</td>}{}is) {
		my $col = $1;
		$col =~ s{</?\w.*?>}{}gs;
		$col =~ s/^\s+//;
		$col =~ s/\s+$//;
		push @cells, $col;
		if($opt_c and $i++ >= $max) {
			print join "\t", @cells;
			print "\n";
			@cells = ();
			$i = 0;
		}
	}
	if(@cells) {
		print join "\t", @cells;
		print "\n";
	}
}
