#!/usr/bin/perl # # Convert Damon's HTML resume to text! Woohoo! # Copyright (C) 1999 Damon Harper $MARGIN=76; @splitheads=('References'); readentities(); if($ENV{GATEWAY_INTERFACE}=~m{^CGI/}) { print "Content-type: text/plain\n\n"; if($ENV{QUERY_STRING} eq 'source') { if(!open(IN, 'resume_damon_text.cgi')) { print "Couldn't open resume_damon_text.cgi!\n"; die "Couldn't open resume_damon_text.cgi!\n"; } while() { print; } close(IN); exit; } if(!open(IN, 'index.html')) { print "Couldn't open index.html!\n"; die "Couldn't open index.html!"; } } else { my($file)=$ARGV[0]; $file ne '' or $file='index.html'; open(IN, $file) or die "Couldn't open $file!"; } $text=join('', ); $text=~s///sg; foreach(split(/\n/, $text)) { if(s/(.*)\<\/tr\>//i && $intr) { $tr.=$1; my($left, $right)=split(/\<\/td\>(?:\)?/i, $tr); if(!($left=~s/^.*?\<[ui]\>([^<]+).*/$1/si)) { $left=''; } if($left!~/^\s*$/ || $right!~/^\s*$/) { my(@t)=split(/ /, $left); $leftwidth=max($leftwidth, map(length, @t)); $right=totext($right); for($c=0; $c<@splitheads; $c++) { if($left eq $splitheads[$c]) { push(@rows, [$left, '']); push(@rows, ['', $right]); last; } } $copyright=($right=~s/(Copyright \(C\) .*)\Z(?!\n)//s) ? $1 : ''; push(@rows, [$left, $right]) if $c>=@splitheads; if($copyright) { $copyright=~s{You can generate a plaintext version .*}{This resume is also available online at http://www.jumeaux.bc.ca/damon/resume/\n\nThis plaintext version was generated from the canonical html file via a custom Perl script.}; push(@rows, ['', $copyright]); } } $tr=''; $intr=0; } if(s/.*\]*\>//i && !$intr) { $intr=1; } $tr.=$_ if $intr; } close(IN); for($c=0; $c<@rows; $c++) { $_=$rows[$c]; if(length($_->[0])>$leftwidth) { $_->[0]=join("\n", split(/\s+/, $_->[0])); } } for($c=0; $c<@rows; $c++) { my($left, $right)=@{$rows[$c]}; if($c==$#rows) { # printsep(); printblank(); } printsep() if !$right; printsection($left, wordwrap($right, $MARGIN-$leftwidth-3)); printblank(2) if !$right && $rows[$c+1]->[0]; printblank() if $right; } sub wordwrap { my($text, $maxwidth, $minwidth)=@_; my($res); $minwidth=1 if !$minwidth; my($maxwidthminus)=$maxwidth-1; while(length($text)>$maxwidth) { while($text=~s/^(.{0,$maxwidth}(?:\n|\Z(?!\n)))//) { $res.=$1; last if $text eq ''; } last if $text eq ''; $text=~s/^(.{$minwidth,$maxwidthminus}\S)\s+//; if($1) { $res.=$1; } else { $text=~s/^(.{$minwidth,$maxwidthminus})//; $res.=$1; $res.='-'; } $res.="\n" if $text ne ''; } $res.=$text; return $res; } sub printsection { my($left, $right)=@_; my(@left)=split(/\n/, $left); my(@right)=split(/\n/, $right); my($c); for($c=0; $c<@left || $c<@right; $c++) { print ' ' x ($leftwidth-length($left[$c])) if $right; print $left[$c]; print ' ' if $right; print ' ' x ($leftwidth-length($left[$c])+1) if !$right; print "| "; print $right[$c]; print "\n"; } } sub printblank { my($n)=@_; $n=1 if !$n; while($n) { printsection(' '); $n--; } } sub printsep { print '-' x ($leftwidth+1); print '+'; print '-' x ($MARGIN-$leftwidth-1); print "\n"; } sub max { my($max); foreach(@_) { $max=$_ if $_>$max; } return $max; } sub totext { ($_)=@_; s/[\r\n]//g; s/\s+/ /g; s/\\s*/\n\n/ig; s/\\s*/\n/ig; s/\s*\/ * /i; s/\s*\/\n * /ig; s/\<[^>]+\>//g; s/^\s+//; s/\s+$//; s/&([A-Za-z]{2,6});/exists($entity{$1}) ? $entity{$1} : "&$1;"/sge; return $_; } sub readentities { %entity=( 'copy' => '(C)', 'trade' => '(TM)', 'reg' => '(R)', 'gt' => '>', 'lt' => '<', 'quot' => '"', 'amp' => '&', 'nbsp' => ' ', 'Agrave' => 'A', 'agrave' => 'a', 'Aacute' => 'A', 'aacute' => 'a', 'Acirc' => 'A', 'acirc' => 'a', 'Atilde' => 'A', 'atilde' => 'a', 'Auml' => 'A', 'auml' => 'a', 'Aring' => 'A', 'aring' => 'a', 'AElig' => 'AE', 'aelig' => 'ae', 'Ccedil' => 'C', 'ccedil' => 'c', 'Egrave' => 'E', 'egrave' => 'e', 'Eacute' => 'E', 'eacute' => 'e', 'Ecirc' => 'E', 'ecirc' => 'e', 'Euml' => 'E', 'euml' => 'e', 'Igrave' => 'I', 'igrave' => 'i', 'Iacute' => 'I', 'iacute' => 'i', 'Icirc' => 'I', 'icirc' => 'i', 'Iuml' => 'I', 'iuml' => 'i', 'ETH' => 'Th', 'eth' => 'th', 'Ntilde' => 'N', 'ntilde' => 'n', 'Ograve' => 'O', 'ograve' => 'o', 'Oacute' => 'O', 'oacute' => 'o', 'Ocirc' => 'O', 'ocirc' => 'o', 'Otilde' => 'O', 'otilde' => 'o', 'Ouml' => 'O', 'ouml' => 'o', 'Oslash' => 'O', 'oslash' => 'o', 'Ugrave' => 'U', 'ugrave' => 'u', 'Uacute' => 'U', 'uacute' => 'u', 'Ucirc' => 'U', 'ucirc' => 'u', 'Uuml' => 'U', 'uuml' => 'u', 'Yacute' => 'Y', 'yacute' => 'y', 'yuml' => 'y', 'THORN' => 'Th', 'thorn' => 'th', 'szlig' => 'ss' ); }