#!/usr/local/bin/perl5.005
# Quick-n-dirty, HIGHLY LIMITED, RTF -> HTML converter
# jason@jclark.org
#
# Only supports bold (as ), italic (as ), underline,
# and paragraphs. Corrects improperly nested tags, handles rtf's
# \plain. Ignores any RTF aside from the list above. Emits valid
# xhtml fragments (i.e., wrap in a ). Quite certainly
# contains bugs. Use as use wish, at your own risk.
foreach my $file (@ARGV) {
open(IN, "< $file");
local $/; #slurp mode
my $rtfdoc = !g;
s!\\u!
!g;
s!\\par!
!g; #convert para's
s!\\[^\s\\<]+(?=[\\ <])!!g; #remove any leftover rtf tags
#fix , wrongly nested tags
my @dom=split /(<.+?>)/;
my @elems;
foreach my $elem (@dom) {
if ($elem =~ m!|| !)
{ unshift(@elems, "$elem")}
elsif ($elem =~ m!|
|||
')) { $elem .= closetag(shift(@elems)); } } } #unwind remainder of stack foreach my $elem (@elems) { push @dom, closetag($elem); } $_ = join('',@dom); $_ = "
" . $_; #first opening para 1 while s!<(\w+)>\s*\1>!!g ; #eliminate empty tag pairs return $_; } sub closetag { $tag = shift; $tag =~ s{<(?!/)}{}; return $tag; }