#!/usr/bin/perl # # Tested with # # $ ./proctut.pl tutorial.html > foo && diff foo tutorial.html | head -20 # # BTW, tutorial.html is from: # # http://www.geda.seul.org/docs/current/tutorials/gsch2pcb/tutorial.html # # # Alternatively, # # http://diberri.dyndns.org/wikipedia/html2wiki/ # # might do everything I was hoping this script would do. # # # I just did (as wpd): # # $ perl -MCPAN -e 'install Bundle::HTMLWikiConverter' # # to install it, but I'm not sure it worked. # # # I tried again, but it still looks like it didn't work. Eventually I # installed it as root, was able to run it, wasn't too happy with the # results, and wasn't confident of my ability to change the script. So # now I'm back here. # # As of 05/03/07, I've been running this as: # # $ ./proctut.pl < tutorial.html > tutorial.wpd # # And looking at, and sometimes uploading, the results. # use strict; my $list_indent = 0; my @list_char_stack; my $list_char = ""; my $list_item = 0; my $in_href = 0; my $in_code = 0; while (<>) { next if /^/; # skip the
... header line */ s/>/>/g; # Replaces ">" with ">" s/</>/g; # Replaces "<" with "<" s/<\/?(center|blockquote)>//g; # Get rid ofpairs s/<\/?(table|tbody|tr|td).*>//g; # Get rid of table related stuff # andpairs s/(.*)<\/h2>/====== \1 ======/g; # Change
lines to headline level 1 s/
(.*)<\/h3>/===== \1 =====/g; # Change
lines to headline level 2 s/
(.*)<\/h4>/==== \1 ====/g; # Change
lines to headline level 3 s/
(.*)<\/h5>/**\1**/g; # Bill appears to use
to mean "Bold", # At least that's the way it's rendered # in my browswer for the one place he # he used it. s/(.*?)<\/b>/**\1**/g; # Change ... tags s/
/----/g; # horizontal line s/(.*)<\/i>/\/\/\1\/\//g; # Change ...<\i> to //...// # Gee, that was fun with all of the escape # characters! s/ /{{wiki:\1}}/g; # Deal with embedded images s/<\/p>//; # Get rid of next if /^\s*$/; # skip blank lines s/
/\\\\\n/; # Change
to forced linebreak (\\\n) s/^\s+//; # Compress whitespace at the beginning of # a line. if (s//\n/) { $list_indent += 2; push(@list_char_stack, $list_char); $list_char = "*"; # print STDERR "++", $list_indent; } if (s/<\/ul>//) { if ($list_indent >= 2) { $list_indent -= 2; $list_char = pop(@list_char_stack); } else { die "Bad value for list_indent"; } # print STDERR "--", $list_indent; } if (s/
- //) { $list_item = 1; print " " x $list_indent . $list_char; } if (s/<\/li>/\n/) { $list_item = 0; } # Deal with
andby calling themandif (s///g) { $in_code = 1; } if (s/<\/pre>/<\/code>/g) { $in_code = 0; } chomp if ($list_item && !$in_code); # Deal with external references # if (s//\[\[\1|/) { # $in_href = 1; # } if (s//\[\[\1|/) { $in_href = 1; } # I'm not sure what all of the tags are in the HTML file, # I could probably read something about it and learn what they do, but # for now, I'm just going to get rid of them. s/\s*//g; if ($in_href) { if (s/<\/a>/\]\]/) { $in_href = 0; } } else { # Eliminate the other 's s/<\/a>//g; } # s/ s//\n/; # Now that we've skipped blank lines, replace #
with a blank line print " " if ($list_item && !$in_code); # We chomped off the newlines at the end # of the lines inside a list item, so make # sure we replace them with a space. print; }