#!/usr/bin/perl # # Crudely borkifies an HTML file. # Usage: borkify.pl < source.html > dest.html # use strict; $/ = undef; my $data = ""; if (!scalar(@ARGV)) { $data = ; print borkifyHTML($data); } elsif (scalar(@ARGV) == 2) { my $infile = $ARGV[0]; open(INFILE, "<$infile"); $data = ; close(INFILE); my $infile = $ARGV[1]; open(OUTFILE, ">$infile"); print OUTFILE borkifyHTML($data); close(OUTFILE); } sub borkifyHTML($) { my $data = shift; my @paras = split(/(]*)?>)/i, $data); my $lastborked = 1; my $retdata = ""; my $nextispara = 0; foreach my $para (@paras) { if ($para =~ /^]*)?>$/) { $nextispara = 1; $retdata .= $para; } elsif ($nextispara) { if ($lastborked) { $lastborked = 0; $retdata .= $para; } else { $retdata .= borkifyTagContents($para); $lastborked = 1; } } else { $retdata .= $para; } } return $retdata; } sub borkifyTagContents($) { my $para = shift; my @tags = split(/(<[^>]+>|[&][^;]+;)/, $para); my $retdata = ""; foreach my $tag (@tags) { if ($tag =~ /(<[^>]+>|[&][^;]+;)/) { $retdata .= $tag; # Pass HTML tags through unaltered. } else { $retdata .= borkifyString($tag); } } return $retdata; } sub borkifyString($) { my $string = shift; my @parts = split(/(an|An|au|Au|a[A-Za-z']|A[A-Za-z']en|ew|e[^A-Za-z']|ir|ow|the|The|th[^A-Za-z']|tion|.)/, $string); # print "\n".scalar(@parts)."\n"; my $i_seen = 0; my $in_word = 0; my $retdata = ""; foreach my $part (@parts) { my ($newpart, $new_in_word, $new_i_seen) = borkpart($part, $in_word, $i_seen); $retdata .= $newpart; $in_word = $new_in_word; $i_seen = $new_i_seen; } # die("RD: \"$retdata\"\n"); return $retdata; } sub borkpart($) { my $part = shift; my $in_word = shift; my $i_seen = shift; if (!$part) { return ($part, $in_word, $i_seen); } my $retdata = ""; # print "PART: \"$part\"\n"; if ($part =~ /[.!?]/) { $retdata .= $part; $retdata .= " Bork! Bork! Bork!"; # print STDERR "IN WORD -> 0\n"; $in_word = 0; } elsif ($part =~ /[^A-Za-z']/) { # non-word per borkifier rules $i_seen = 0; $in_word = 0; # print STDERR "IN WORD -> 0\n"; $retdata .= $part; } else { # print STDERR "IN WORD -> 1\n"; SWITCH: { ($part eq "an") && do { $retdata .= "un"; next; }; ($part eq "An") && do { $in_word = 1; $retdata .= "Un"; next; }; ($part eq "au") && do { $in_word = 1; $retdata .= "oo"; next; }; ($part eq "Au") && do { $in_word = 1; $retdata .= "Oo"; next; }; ($part =~ /a([A-Za-z'])/) && do { $in_word = 1; $retdata .= "e"; my ($newpart, $new_in_word, $new_i_seen) = borkpart($1, $in_word, $i_seen); $retdata .= $newpart; $in_word = $new_in_word; $i_seen = $new_i_seen; next; }; ($part =~ /A([A-Za-z'])/) && do { $in_word = 1; $retdata .= "E"; my ($newpart, $new_in_word, $new_i_seen) = borkpart($1, $in_word, $i_seen); $retdata .= $newpart; $in_word = $new_in_word; $i_seen = $new_i_seen; next; }; ($part =~ /en([^A-Za-z'])/) && do { $in_word = 1; $retdata .= "ee"; my ($newpart, $new_in_word, $new_i_seen) = borkpart($1, $in_word, $i_seen); $retdata .= $newpart; $in_word = $new_in_word; $i_seen = $new_i_seen; next; }; ($in_word && $part eq "ew") && do { $in_word = 1; $retdata .= "oo"; next; }; ($in_word && $part =~ /e([^A-Za-z'])/) && do { $in_word = 1; $retdata .= "e-a"; my ($newpart, $new_in_word, $new_i_seen) = borkpart($1, $in_word, $i_seen); $retdata .= $newpart; $in_word = $new_in_word; $i_seen = $new_i_seen; next; }; (!$in_word && $part eq "e") && do { $in_word = 1; $retdata .= "i"; next; }; (!$in_word && $part eq "E") && do { $in_word = 1; $retdata .= "I"; next; }; ($in_word && $part eq "f") && do { $in_word = 1; $retdata .= "ff"; next; }; ($in_word && $part eq "ir") && do { $in_word = 1; $retdata .= "ur"; next; }; ($in_word && $part eq "i") && do { $in_word = 1; if ($i_seen) { $retdata .= "i"; } else { $retdata .= "ee"; $i_seen = 1; } next; }; ($in_word && $part eq "ow") && do { $in_word = 1; $retdata .= "oo"; next; }; (!$in_word && $part eq "o") && do { $in_word = 1; $retdata .= "oo"; next; }; (!$in_word && $part eq "O") && do { $in_word = 1; $retdata .= "Oo"; next; }; ($in_word && $part eq "o") && do { $in_word = 1; $retdata .= "u"; next; }; ($part eq "the") && do { $in_word = 1; $retdata .= "zee"; next; }; ($part eq "The") && do { $in_word = 1; $retdata .= "Zee"; next; }; ($part =~ /th([^A-Za-z'])/) && do { $in_word = 1; $retdata .= "t"; my ($newpart, $new_in_word, $new_i_seen) = borkpart($1, $in_word, $i_seen); $retdata .= $newpart; $in_word = $new_in_word; $i_seen = $new_i_seen; next; }; ($in_word && $part eq "tion") && do { $in_word = 1; $retdata .= "shun"; next; }; ($in_word && $part eq "u") && do { $in_word = 1; $retdata .= "oo"; next; }; ($in_word && $part eq "U") && do { $in_word = 1; $retdata .= "Oo"; next; }; ($part eq "v") && do { $in_word = 1; $retdata .= "f"; next; }; ($part eq "V") && do { $in_word = 1; $retdata .= "F"; next; }; ($part eq "w") && do { $in_word = 1; $retdata .= "v"; next; }; ($part eq "W") && do { $in_word = 1; $retdata .= "V"; next; }; { $in_word = 1; $retdata .= $part; }; } } return ($retdata, $in_word, $i_seen); }