From 1f19efe295cce5744737ce352aab249b38461f59 Mon Sep 17 00:00:00 2001 From: kts Date: Mon, 1 Dec 2014 22:25:41 -0800 Subject: [PATCH] kettext now exists as a front-end script and a module. The module separates parsing from conversion as well. --- kettext.pl | 423 +-------------------------------------------------- kettext.pm | 436 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 440 insertions(+), 419 deletions(-) create mode 100644 kettext.pm diff --git a/kettext.pl b/kettext.pl index 1fcc825..4c46ac4 100755 --- a/kettext.pl +++ b/kettext.pl @@ -1,15 +1,12 @@ #!/usr/bin/perl -use strict; -use warnings; -# -my %cmd_settings = (); +require kettext; # arg check / usage if ($#ARGV+1 == 0) { printf "Usage: kettext.pl file_name.ktx\n"; exit; } my $file_name; - +my %cmd_settings = (); for (my $arg_i = 0; $arg_i < $#ARGV+1; $arg_i++) { if ($ARGV[$arg_i] =~ m/^-(-|)(.*)/) { if ($2 =~ m/^(h(elp|))$/) { @@ -24,417 +21,5 @@ for (my $arg_i = 0; $arg_i < $#ARGV+1; $arg_i++) { $file_name = $ARGV[$arg_i]; } } -# create file handler -binmode STDOUT, ":utf8"; -use utf8; -open(my $fh, '<:encoding(UTF-8)', $file_name) - or die "ERR: could not open file '$file_name' $!"; -# create our parse "enum" -use constant { - PARSE_HEADER_BIG => 1 << 1, - PARSE_HEADER_SMALL => 1 << 2, - PARSE_HEADER_BOTH => 1 << 1 | 1 << 2, -}; -use constant { - TYPE_HEADER => 1, - TYPE_PARAGRAPH => 2, - TYPE_PRE => 3, - TYPE_RULE => 4, - TYPE_SPACER => 5, - TYPE_BREAK => 6, - TYPE_CODE => 7, - TYPE_BLOCKQUOTE => 8, - TYPE_LIST => 9 -}; -# create our general data -my %sections; -my @elements = (); # our list of ALL distinct elements -my %block = (); # some block of data, such as paragraph, pre, etc. -my %settings = (); -# main logic: line reading -chomp(my @lines = <$fh>); close($fh); -for (my $i = 0; $i <= $#lines; $i++) { - # check for imply rules on first line - if ($i == 0 || $i == $#lines) { - if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) { - if ($1 =~ m/^.*\((.*)\)/) { - my @options = split(',', $1); - foreach my $opt (@options) { - my @value = split('=', $opt); - $settings{$value[0]} = ($value[1] ? $value[1] : 1); - } - } - next; - } - } - # 1. check for indent-related lines (pre, code, list, etc.) - if ($lines[$i] =~ m/^(\t|\s\s|\s\s\s\s)(.)(.*)/) { - # code - if ($2 eq '#') { - if (!%block) { - if ($3 =~ m/^([^\s]*)(\..*\)(\s|))(.*)/) { - %block = (type => TYPE_CODE, text => $3, opts => parseOptions($2)); - } else { - %block = (type => TYPE_CODE, text => $3); - } - } else { - if ($block{"type"} != TYPE_CODE) { - push @elements, {%block}; - if ($3 =~ m/^([^\s*])(\..*\)(\s|))(.*)/) { - %block = (type => TYPE_CODE, text => $3, opts => parseOptions($2)); - } else { - %block = (type => TYPE_CODE, text => $3); - } - } else { - $block{'text'} .= "\n".$3; - } - } - # lists - this is ugly, expensive, and by far the most shameful of this code - } elsif ($lines[$i] =~ m/^(\t{1,}|\s\s{1,})(\*\s|\~\s|\-\s|\+\s|.{1,3}\.\s)(.*)/) { - # this is hackish - I would prefer to get # of matches from above - my $depth = length($1) / (substr($1, 0, 1) eq ' ' ? 2 : 1); - if (%block) { - if ($block{"type"} != TYPE_LIST) { - push @elements, {%block}; - %block = (type => TYPE_LIST, depth => 0, children => []); - } - } else { - %block = (type => TYPE_LIST, depth => 0, children => []); - } - my %this_item = (type => TYPE_LIST, text => parseText($3), depth => $depth, parent => \%block, children => []); - my $child_count = @{$block{'children'}}; - if ($child_count == 0) { - push @{$block{'children'}}, {%this_item}; - } else { - my $root = \%block; - my $root_children = \@{${$root}{'children'}}; - my $diff = $depth-${$root}{'depth'}; - # travel down the list until we get our proper context. - while ($diff > 0) { - $root_children = \@{${$root}{'children'}}; - if (@{${$root}{'children'}} <= 0) { - push @{${$root}{'children'}}, {%this_item}; - $diff = 0; - } else { - # diff 1 = we have proper parent, 0 = we're one step too deep - $root = $root_children->[-1]; - $diff = $depth-${$root}{'depth'}; - if ($diff == 1) { - push @{${$root}{'children'}}, {%this_item}; - $diff = 0; - } elsif ($diff == 0) { - push @{${${$root}{'parent'}}{'children'}}, {%this_item}; - } - } - } - } - # preformatted (default) - } else { - if (!%block) { - if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) { - %block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1)); - } else { - %block = (type => TYPE_PRE, text => $2.$3); - } - } else { - if ($block{"type"} != TYPE_PRE) { - push @elements, {%block}; %block=(); - if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) { - %block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1)); - } else { - %block = (type => TYPE_PRE, text => $2.$3); - } - } else { - $block{'text'} .= "\n".$2.$3; - } - } - } - next; - } - # 2. check for big headers, e.g., - # ,,,, - # header - # ```` - # conditions: first and third lines must have at least 4 repeating chars, and the second line must not match header conditions. - if ($i+1 < $#lines) { - if ($lines[$i] =~ m/(\S\S)\1{2,}/) { - my $size = $+[0]; - # only check closing header if contents appear to not be a header - if ($lines[$i+1] !~ m/(\S\S)\1{2,}/g) { - # check closing line for header appearance - if ($lines[$i+2] =~ m/(\S\S)\1{2,}/g) { - if (%block) { push @elements, {%block}; %block=() } - if ($lines[$i+1] =~ m/^(.*?)(\..*\))/) { - push @elements, { - type => TYPE_HEADER, - size => $size, - text => parseText($1), - opts => parseOptions($2) - }; - } else { - push @elements, { - type => TYPE_HEADER, - size => $size, - text => parseText($lines[$i+1]) - }; - } - $i += 2; - next; - } - } - } - } - # 3. check for small headers - if ($lines[$i] =~ m/^(\S)\1{2,}/) { - my $size = $+[0]; - my $match = substr($lines[$i], $-[0], $+[0]-$-[0]); - my $post = substr($lines[$i], $+[0]); - # two conditions: - # match repeated $match if something else came between - # e.g., ==== header ==== - # otherwise match if there is some text following - # e.g., ==== header - if ($post =~ m/(\t*|\s*|)(.*)\Q$match\E$/g) { - if (%block) { push @elements, {%block}; %block=() } - if ($2 =~ m/^(.*?)(\..*\))/) { - push @elements, { - type => TYPE_HEADER, - size => $size, - text => parseText($1), - opts => parseOptions($2) - }; - } else { - push @elements, { - type => TYPE_HEADER, - size => $size, - text => parseText($2) - }; - } - next; - } elsif ($post =~ m/^(\t*|\s*|)(.*[^(\n|\r|\s|\t)])/g) { - if (%block) { push @elements, {%block}; %block=() } - if ($post =~ m/^(.*?)(\..*\))/) { - push @elements, { - type => TYPE_HEADER, - size => $size, - text => parseText($1), - opts => parseOptions($2) - }; - } else { - push @elements, { - type => TYPE_HEADER, - size => $size, - text => parseText($post) - }; - } - next; - } - } - # 4. check for spacers - if ($lines[$i] =~ m/^(\S)\1{2,}/) { - if (%block) { push @elements, {%block}; %block=() } - push @elements, { - type => TYPE_SPACER, - size => $+[0]-$-[0] - }; - next; - } - # discover blank lines - these usually signify end of some type of block - if ($lines[$i] =~ m/^\s*$/) { - if (%block) { push @elements, {%block}; %block=() } - push @elements, { - type => TYPE_BREAK - }; - next; - } - # check for blockquotes - if ($lines[$i] =~ m/^\>(.*)$/) { - if (!%block) { - if ($1 =~ m/^(\..*\)(\s|))(.*)/) { - %block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1)); - } else { - %block = (type => TYPE_BLOCKQUOTE, text => $1); - } - } else { - if ($block{'type'} != TYPE_BLOCKQUOTE) { - push @elements, {%block}; - if ($1 =~ m/^(\..*\)(\s|))(.*)/) { - %block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1)); - } else { - %block = (type => TYPE_BLOCKQUOTE, text => $1); - } - } else { - $block{'text'} .= "\n".$1; - } - } - next; - } - # finally, create a paragraph or append to it. - if (!%block) { - if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) { - %block = (type => TYPE_PARAGRAPH, text => parseText($2), opts => parseOptions($1)); - } else { - %block = (type => TYPE_PARAGRAPH, text => parseText($lines[$i])); - } - } else { - $block{'text'} .= "\n".parseText($lines[$i]); - } -} -# push final block if it exists -if (%block) { push @elements, {%block}; %block=() } -# merge file settings with command-line settings -@settings{ keys %cmd_settings } = values %cmd_settings; -# get our various header sizes and organize an array with unique values from smallest to largest. -my @headers; -foreach (@elements) { - my $element = $_; - if ($element->{type} == TYPE_HEADER) { - if (@headers) { - my $h_i = 0; - my $last_size = $headers[0]; - for (my $h_i = 0; $h_i < scalar @headers; $h_i++) { - if ($element->{size} == $headers[$h_i]) { - $h_i = scalar @headers; - } elsif ($element->{size} < $headers[$h_i]) { - splice @headers, $h_i, 0, $element->{size}; - $h_i = scalar @headers; - } elsif ($element->{size} > $headers[$h_i]) { - if ($h_i+1 >= scalar @headers) { - splice @headers, $h_i+1, 0, $element->{size}; - $h_i = scalar @headers; - } - } - } - } else { - $headers[0] = $element->{size}; - } - } -} -# this is really dumb, but convert our array to a hash -my %header_map; -for (my $h_i = 0; $h_i < scalar @headers; $h_i++) { - if ($settings{'header.reverse'}) { - $header_map{$headers[$h_i]} = $h_i+1+($settings{'header.depth'} ? $settings{'header.depth'} : 0); - } else { - $header_map{$headers[$h_i]} = scalar ($settings{'header.depth'} ? $settings{'header.depth'} : 0) + @headers - $h_i; - } -} - -# set up our array for header numbering. The first element -my @header_numbers; -for (my $h_i = 0; $h_i < scalar @headers; $h_i++) { - $header_numbers[$h_i] = 0; -} -my $header_number = 0; # at which numbering depth are we - -# print 'em out -if ($settings{'toc'}) { - foreach (@elements) { - if ($_->{type} == TYPE_HEADER) { - - } - } -} else { - my $previous_type = 0; - my $indent = ("\t" x ($settings{"indent_level"} ? $settings{"indent_level"} : 0)); - foreach (@elements) { - if ($_->{type} == TYPE_HEADER) { - my $hsize = $header_map{$_->{size}}; - my $htext = $_->{text}; - if ($settings{"header.numbering"}) { - $header_number = $hsize; - for (my $h_i = $header_number+1; $h_i < scalar @headers; $h_i++) { - $header_numbers[$h_i] = 0; - } - $header_numbers[$header_number]++; - $htext = ($settings{"header.numbering"} ? $header_numbers[$header_number].'. ' : '') . $htext; - } - my $hid = $_->{text}; - $hid =~ s/ /_/g; - print("{opts} ? $_->{opts} : '').($settings{'header.ids'} ? " id=\"".$hid."\"" : '').">$htext\n"); - } elsif ($_->{type} == TYPE_BREAK) { - if ($previous_type == TYPE_BREAK) { - print("
\n"); - } - } elsif ($_->{type} == TYPE_PARAGRAPH) { - my $text = $_->{text}; - $text =~ s/\n/
\n/g; - print("{opts} ? $_->{opts} : '').">$text

\n"); - } elsif ($_->{type} == TYPE_PRE) { - print("{opts} ? $_->{opts} : '').">$_->{text}\n"); - } elsif ($_->{type} == TYPE_CODE) { - print("
{opts} ? $_->{opts} : '').">$_->{text}
\n"); - } elsif ($_->{type} == TYPE_BLOCKQUOTE) { - print("{opts} ? $_->{opts} : '').">".($settings{'blockquote.parse'} ? parseText($_->{text}) : $_->{text})."\n"); - } elsif ($_->{type} == TYPE_SPACER) { - print("
\n"); - } elsif ($_->{type} == TYPE_LIST) { - printf("

"); # @@ should this be optional? - printList($_); - } - $previous_type = $_->{type}; - } -} -if (!$settings{"version.none"}) { - if ($settings{"version.hide"}) { - print "\n"; - } else { - print "$file_name generated by kettext 0.1\n"; - } -} - -sub printList { - my $list = $_[0]; - my $child_count = @{$list->{'children'}}; - #my $child = $children->[$child_i]; - if ($list->{'text'}) { - print "
  • $list->{'text'}"; - } - if ($child_count > 0) { - print "
      \n"; - my $children = \@{$list->{'children'}}; - my $child_i = 0; - for (my $child = $children->[$child_i]; $child_i < $child_count; $child = $children->[++$child_i]) { - printList($child); - } - print "
    \n"; - } - if ($list->{'text'}) { - print "
  • \n"; - } -} - -sub parseText { - my $text = $_[0]; - my @text_patterns = ('\*\*','\*\*', '\/\/','\/\/', '\[\[','\]\]', '\(\(','\)\)', '``', '``'); - my @text_replace = ('','', '','', '','', '','', '',''); - my $i = 0; - my $new_text = $text; - for (my $p = 0; $p <= $#text_patterns; $p+=2) { - my $open = $text_patterns[$p]; - my $close = $text_patterns[$p+1]; - while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*?\)|)/g) { - #while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*\)|)/g) { - my $converted_text; - if ($4) { - my $ts = substr($text_replace[$i], 0, -1); - my $te = substr($text_replace[$i], -1); - $converted_text = $ts.parseOptions($4).$te.$2.$text_replace[$i+1]; - } else { - $converted_text = $text_replace[$i].$2.$text_replace[$i+1]; - } - $new_text =~ s/\Q$1$2$3$4\E/$converted_text/g; - } - $i +=2; - } - return $new_text; -} - -sub parseOptions { - my $text = $_[0]; - my $opts; - while ($text =~ m/(\.)([a-zA-Z]*[^\(])(\()([^\)]*)/g) { - $opts.=" $2=\"$4\""; - } - return $opts; -} +my %parsed_file = kettext::parseFile($file_name, \%cmd_settings); +kettext::convertTo::HTML(%parsed_file); diff --git a/kettext.pm b/kettext.pm new file mode 100644 index 0000000..7a24045 --- /dev/null +++ b/kettext.pm @@ -0,0 +1,436 @@ +#!/usr/bin/perl +use strict; +use warnings; +package kettext; +use constant VERSION => 0.2; +# create our parse "enum" +use constant { + TYPE_HEADER => 1, + TYPE_PARAGRAPH => 2, + TYPE_PRE => 3, + TYPE_RULE => 4, + TYPE_SPACER => 5, + TYPE_BREAK => 6, + TYPE_CODE => 7, + TYPE_BLOCKQUOTE => 8, + TYPE_LIST => 9 +}; +# create our general data +# parseFile("my_file", %cmd_settings) +sub parseFile { + my $file_name = $_[0]; + my %cmd_settings = ($_[1] ? %{$_[1]} : ()); + my @elements = (); # our list of ALL distinct elements + my %block = (); # some block of data, such as paragraph, pre, etc. + my %settings = (); + # create file handler and read into @lines + binmode STDOUT, ":utf8"; + use utf8; + open(my $fh, '<:encoding(UTF-8)', $file_name) + or die "ERR: could not open file '$file_name' $!"; + chomp(my @lines = <$fh>); close($fh); + # main logic: line reading + for (my $i = 0; $i <= $#lines; $i++) { + # check for imply rules on first line + if ($i == 0 || $i == $#lines) { + if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) { + if ($1 =~ m/^.*\((.*)\)/) { + my @options = split(',', $1); + foreach my $opt (@options) { + my @value = split('=', $opt); + $settings{$value[0]} = ($value[1] ? $value[1] : 1); + } + } + next; + } + } + # 1. check for indent-related lines (pre, code, list, etc.) + if ($lines[$i] =~ m/^(\t|\s\s|\s\s\s\s)(.)(.*)/) { + # code + if ($2 eq '#') { + if (!%block) { + if ($3 =~ m/^([^\s]*)(\..*\)(\s|))(.*)/) { + %block = (type => TYPE_CODE, text => $3, opts => parseOptions($2)); + } else { + %block = (type => TYPE_CODE, text => $3); + } + } else { + if ($block{"type"} != TYPE_CODE) { + push @elements, {%block}; + if ($3 =~ m/^([^\s*])(\..*\)(\s|))(.*)/) { + %block = (type => TYPE_CODE, text => $3, opts => parseOptions($2)); + } else { + %block = (type => TYPE_CODE, text => $3); + } + } else { + $block{'text'} .= "\n".$3; + } + } + # lists - this is ugly, expensive, and by far the most shameful of this code + } elsif ($lines[$i] =~ m/^(\t{1,}|\s\s{1,})(\*\s|\~\s|\-\s|\+\s|.{1,3}\.\s)(.*)/) { + # this is hackish - I would prefer to get # of matches from above + my $depth = length($1) / (substr($1, 0, 1) eq ' ' ? 2 : 1); + if (%block) { + if ($block{"type"} != TYPE_LIST) { + push @elements, {%block}; + %block = (type => TYPE_LIST, depth => 0, children => []); + } + } else { + %block = (type => TYPE_LIST, depth => 0, children => []); + } + my %this_item = (type => TYPE_LIST, text => $3, depth => $depth, parent => \%block, children => []); + my $child_count = @{$block{'children'}}; + if ($child_count == 0) { + push @{$block{'children'}}, {%this_item}; + } else { + my $root = \%block; + my $root_children = \@{${$root}{'children'}}; + my $diff = $depth-${$root}{'depth'}; + # travel down the list until we get our proper context. + while ($diff > 0) { + $root_children = \@{${$root}{'children'}}; + if (@{${$root}{'children'}} <= 0) { + push @{${$root}{'children'}}, {%this_item}; + $diff = 0; + } else { + # diff 1 = we have proper parent, 0 = we're one step too deep + $root = $root_children->[-1]; + $diff = $depth-${$root}{'depth'}; + if ($diff == 1) { + push @{${$root}{'children'}}, {%this_item}; + $diff = 0; + } elsif ($diff == 0) { + push @{${${$root}{'parent'}}{'children'}}, {%this_item}; + } + } + } + } + # preformatted (default) + } else { + if (!%block) { + if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) { + %block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1)); + } else { + %block = (type => TYPE_PRE, text => $2.$3); + } + } else { + if ($block{"type"} != TYPE_PRE) { + push @elements, {%block}; %block=(); + if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) { + %block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1)); + } else { + %block = (type => TYPE_PRE, text => $2.$3); + } + } else { + $block{'text'} .= "\n".$2.$3; + } + } + } + next; + } + # 2. check for big headers, e.g., + # ,,,, + # header + # ```` + # conditions: first and third lines must have at least 4 repeating chars, and the second line must not match header conditions. + if ($i+1 < $#lines) { + if ($lines[$i] =~ m/(\S\S)\1{2,}/) { + my $size = $+[0]; + # only check closing header if contents appear to not be a header + if ($lines[$i+1] !~ m/(\S\S)\1{2,}/g) { + # check closing line for header appearance + if ($lines[$i+2] =~ m/(\S\S)\1{2,}/g) { + if (%block) { push @elements, {%block}; %block=() } + if ($lines[$i+1] =~ m/^(.*?)(\..*\))/) { + push @elements, { + type => TYPE_HEADER, + size => $size, + text => $1, + opts => parseOptions($2) + }; + } else { + push @elements, { + type => TYPE_HEADER, + size => $size, + text => $lines[$i+1] + }; + } + $i += 2; + next; + } + } + } + } + # 3. check for small headers + if ($lines[$i] =~ m/^(\S)\1{2,}/) { + my $size = $+[0]; + my $match = substr($lines[$i], $-[0], $+[0]-$-[0]); + my $post = substr($lines[$i], $+[0]); + # two conditions: + # match repeated $match if something else came between + # e.g., ==== header ==== + # otherwise match if there is some text following + # e.g., ==== header + if ($post =~ m/(\t*|\s*|)(.*)\Q$match\E$/g) { + if (%block) { push @elements, {%block}; %block=() } + if ($2 =~ m/^(.*?)(\..*\))/) { + push @elements, { + type => TYPE_HEADER, + size => $size, + text => $1, + opts => parseOptions($2) + }; + } else { + push @elements, { + type => TYPE_HEADER, + size => $size, + text => $2 + }; + } + next; + } elsif ($post =~ m/^(\t*|\s*|)(.*[^(\n|\r|\s|\t)])/g) { + if (%block) { push @elements, {%block}; %block=() } + if ($post =~ m/^(.*?)(\..*\))/) { + push @elements, { + type => TYPE_HEADER, + size => $size, + text => $1, + opts => parseOptions($2) + }; + } else { + push @elements, { + type => TYPE_HEADER, + size => $size, + text => $post + }; + } + next; + } + } + # 4. check for spacers + if ($lines[$i] =~ m/^(\S)\1{2,}/) { + if (%block) { push @elements, {%block}; %block=() } + push @elements, { + type => TYPE_SPACER, + size => $+[0]-$-[0] + }; + next; + } + # discover blank lines - these usually signify end of some type of block + if ($lines[$i] =~ m/^\s*$/) { + if (%block) { push @elements, {%block}; %block=() } + push @elements, { + type => TYPE_BREAK + }; + next; + } + # check for blockquotes + if ($lines[$i] =~ m/^\>(.*)$/) { + if (!%block) { + if ($1 =~ m/^(\..*\)(\s|))(.*)/) { + %block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1)); + } else { + %block = (type => TYPE_BLOCKQUOTE, text => $1); + } + } else { + if ($block{'type'} != TYPE_BLOCKQUOTE) { + push @elements, {%block}; + if ($1 =~ m/^(\..*\)(\s|))(.*)/) { + %block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1)); + } else { + %block = (type => TYPE_BLOCKQUOTE, text => $1); + } + } else { + $block{'text'} .= "\n".$1; + } + } + next; + } + # finally, create a paragraph or append to it. + if (!%block) { + if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) { + %block = (type => TYPE_PARAGRAPH, text => $2, opts => parseOptions($1)); + } else { + %block = (type => TYPE_PARAGRAPH, text => $lines[$i]); + } + } else { + $block{'text'} .= "\n".$lines[$i]; + } + } + # push final block if it exists + if (%block) { push @elements, {%block}; %block=() } + # merge file settings with command-line settings + @settings{ keys %cmd_settings } = values %cmd_settings; + # get our various header sizes and organize an array with unique values from smallest to largest. + my @headers; + foreach (@elements) { + my $element = $_; + if ($element->{type} == TYPE_HEADER) { + if (@headers) { + my $h_i = 0; + my $last_size = $headers[0]; + for (my $h_i = 0; $h_i < scalar @headers; $h_i++) { + if ($element->{size} == $headers[$h_i]) { + $h_i = scalar @headers; + } elsif ($element->{size} < $headers[$h_i]) { + splice @headers, $h_i, 0, $element->{size}; + $h_i = scalar @headers; + } elsif ($element->{size} > $headers[$h_i]) { + if ($h_i+1 >= scalar @headers) { + splice @headers, $h_i+1, 0, $element->{size}; + $h_i = scalar @headers; + } + } + } + } else { + $headers[0] = $element->{size}; + } + } + } + # this is really dumb, but convert our array to a hash + my %header_map; + for (my $h_i = 0; $h_i < scalar @headers; $h_i++) { + if ($settings{'header.reverse'}) { + $header_map{$headers[$h_i]} = $h_i+1+($settings{'header.depth'} ? $settings{'header.depth'} : 0); + } else { + $header_map{$headers[$h_i]} = scalar ($settings{'header.depth'} ? $settings{'header.depth'} : 0) + @headers - $h_i; + } + } + + # set up our array for header numbering. The first element + my @header_numbers; + for (my $h_i = 0; $h_i < scalar @headers; $h_i++) { + $header_numbers[$h_i] = 0; + } + my $header_number = 0; # at which numbering depth are we + # prepend to headers + foreach (@elements) { + if ($_->{type} == TYPE_HEADER) { + my $hsize = $header_map{$_->{size}}; + $_->{hsize} = $hsize; + $header_number = $hsize; + for (my $h_i = $header_number+1; $h_i < scalar @headers; $h_i++) { + $header_numbers[$h_i] = 0; + } + $header_numbers[$header_number]++; + # prepend an ordered number to headers based on depth + if ($settings{"header.numbering"}) { + $_->{text} = $header_numbers[$header_number].'. '.$_->{text}; + } + } + } + my %ret = ( + "elements" => \@elements, + "settings" => \%settings, + "filename" => $file_name + ); + return %ret; +} +sub parseOptions { + my $text = $_[0]; + my $opts; + while ($text =~ m/(\.)([a-zA-Z]*[^\(])(\()([^\)]*)/g) { + $opts.=" $2=\"$4\""; + } + return $opts; +} + +package kettext::convertTo; +sub HTML { + my %data = @_; + my @elements = @{$data{elements}}; + my %settings = %{$data{settings}}; + my $file_name = $data{filename}; + # print 'em out + if ($settings{'toc'}) { + foreach (@elements) { + if ($_->{type} == kettext::TYPE_HEADER) { + + } + } + } else { + my $previous_type = 0; + my $indent = ("\t" x ($settings{"indent_level"} ? $settings{"indent_level"} : 0)); + foreach (@elements) { + if ($_->{type} == kettext::TYPE_HEADER) { + my $hid = $_->{text}; + $hid =~ s/ /_/g; + print("{hsize}".($_->{opts} ? $_->{opts} : '').($settings{'header.ids'} ? " id=\"".$hid."\"" : '').">".convertText($_->{text})."{hsize}>\n"); + } elsif ($_->{type} == kettext::TYPE_BREAK) { + if ($previous_type == kettext::TYPE_BREAK) { + print("
    \n"); + } + } elsif ($_->{type} == kettext::TYPE_PARAGRAPH) { + my $text = convertText($_->{text}); + $text =~ s/\n/
    \n/g; + print("{opts} ? $_->{opts} : '').">$text

    \n"); + } elsif ($_->{type} == kettext::TYPE_PRE) { + print("{opts} ? $_->{opts} : '').">$_->{text}\n"); + } elsif ($_->{type} == kettext::TYPE_CODE) { + print("
    {opts} ? $_->{opts} : '').">$_->{text}
    \n"); + } elsif ($_->{type} == kettext::TYPE_BLOCKQUOTE) { + print("{opts} ? $_->{opts} : '').">".($settings{'blockquote.parse'} ? convertText($_->{text}) : $_->{text})."\n"); + } elsif ($_->{type} == kettext::TYPE_SPACER) { + print("
    \n"); + } elsif ($_->{type} == kettext::TYPE_LIST) { + printf("

    "); # @@ should this be optional? + printList($_); + } + $previous_type = $_->{type}; + } + } + if (!$settings{"version.none"}) { + if ($settings{"version.hide"}) { + print "\n"; + } else { + print "$file_name generated by kettext ".kettext::VERSION."\n"; + } + } + + sub printList { + my $list = $_[0]; + my $child_count = @{$list->{'children'}}; + #my $child = $children->[$child_i]; + if ($list->{'text'}) { + print "
  • ".convertText($list->{'text'}); + } + if ($child_count > 0) { + print "
      \n"; + my $children = \@{$list->{'children'}}; + my $child_i = 0; + for (my $child = $children->[$child_i]; $child_i < $child_count; $child = $children->[++$child_i]) { + printList($child); + } + print "
    \n"; + } + if ($list->{'text'}) { + print "
  • \n"; + } + } + sub convertText { + my $text = $_[0]; + my @text_patterns = ('\*\*','\*\*', '\/\/','\/\/', '\[\[','\]\]', '\(\(','\)\)', '``', '``'); + my @text_replace = ('','', '','', '','', '','', '',''); + my $i = 0; + my $new_text = $text; + for (my $p = 0; $p <= $#text_patterns; $p+=2) { + my $open = $text_patterns[$p]; + my $close = $text_patterns[$p+1]; + while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*?\)|)/g) { + #while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*\)|)/g) { + my $converted_text; + if ($4) { + my $ts = substr($text_replace[$i], 0, -1); + my $te = substr($text_replace[$i], -1); + $converted_text = $ts.kettext::parseOptions($4).$te.$2.$text_replace[$i+1]; + } else { + $converted_text = $text_replace[$i].$2.$text_replace[$i+1]; + } + $new_text =~ s/\Q$1$2$3$4\E/$converted_text/g; + } + $i +=2; + } + return $new_text; + } +} +1;