kettext now exists as a front-end script and a module. The module separates parsing from conversion as well.

2014-12-01 22:25:41 -08:00 · 2014-12-01 22:25:41 -08:00 · 1f19efe295
parent 9f541689a0
commit 1f19efe295
2 changed files with 440 additions and 419 deletions
--- a/kettext.pl
+++ b/kettext.pl
@ -1,15 +1,12 @@
 #!/usr/bin/perl
-use strict;
-use warnings;
-#
-my %cmd_settings = ();
+require kettext;
 # arg check / usage
 if ($#ARGV+1 == 0) {
    printf "Usage: kettext.pl file_name.ktx\n";
    exit;
 }
 my $file_name;
-
+my %cmd_settings = ();
 for (my $arg_i = 0; $arg_i < $#ARGV+1; $arg_i++) {
  if ($ARGV[$arg_i] =~ m/^-(-|)(.*)/) {
    if ($2 =~ m/^(h(elp|))$/) {
@ -24,417 +21,5 @@ for (my $arg_i = 0; $arg_i < $#ARGV+1; $arg_i++) {
    $file_name = $ARGV[$arg_i];
  }
 }
-# create file handler
-binmode STDOUT, ":utf8";
-use utf8;
-open(my $fh, '<:encoding(UTF-8)', $file_name)
-  or die "ERR: could not open file '$file_name' $!";
-# create our parse "enum"
-use constant {
-  PARSE_HEADER_BIG => 1 << 1,
-  PARSE_HEADER_SMALL => 1 << 2,
-  PARSE_HEADER_BOTH => 1 << 1 | 1 << 2,
-};
-use constant {
-  TYPE_HEADER => 1,
-  TYPE_PARAGRAPH => 2,
-  TYPE_PRE => 3,
-  TYPE_RULE => 4,
-  TYPE_SPACER => 5,
-  TYPE_BREAK => 6,
-  TYPE_CODE => 7,
-  TYPE_BLOCKQUOTE => 8,
-  TYPE_LIST => 9
-};
-# create our general data
-my %sections;
-my @elements = ();    # our list of ALL distinct elements
-my %block = ();       # some block of data, such as paragraph, pre, etc.
-my %settings = ();
-# main logic: line reading
-chomp(my @lines = <$fh>); close($fh);
-for (my $i = 0; $i <= $#lines; $i++) {
-  # check for imply rules on first line
-  if ($i == 0 || $i == $#lines) {
-    if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) {
-      if ($1 =~ m/^.*\((.*)\)/) {
-        my @options = split(',', $1);
-        foreach my $opt (@options) {
-          my @value = split('=', $opt);
-          $settings{$value[0]} = ($value[1] ? $value[1] : 1);
-        }
-      }
-      next;
-    }
-  }
-  # 1. check for indent-related lines (pre, code, list, etc.)
-  if ($lines[$i] =~ m/^(\t|\s\s|\s\s\s\s)(.)(.*)/) {
-    # code
-    if ($2 eq '#') {
-      if (!%block) {
-        if ($3 =~ m/^([^\s]*)(\..*\)(\s|))(.*)/) {
-          %block = (type => TYPE_CODE, text => $3, opts => parseOptions($2));
-        } else {
-          %block = (type => TYPE_CODE, text => $3);
-        }
-      } else {
-        if ($block{"type"} != TYPE_CODE) {
-          push @elements, {%block};
-          if ($3 =~ m/^([^\s*])(\..*\)(\s|))(.*)/) {
-            %block = (type => TYPE_CODE, text => $3, opts => parseOptions($2));
-          } else {
-            %block = (type => TYPE_CODE, text => $3);
-          }
-        } else {
-          $block{'text'} .= "\n".$3;
-        }
-      }
-    # lists - this is ugly, expensive, and by far the most shameful of this code
-    } elsif ($lines[$i] =~ m/^(\t{1,}|\s\s{1,})(\*\s|\~\s|\-\s|\+\s|.{1,3}\.\s)(.*)/) {
-      # this is hackish - I would prefer to get # of matches from above
-      my $depth = length($1) / (substr($1, 0, 1) eq ' ' ? 2 : 1);
-      if (%block) {
-        if ($block{"type"} != TYPE_LIST) {
-          push @elements, {%block};
-          %block = (type => TYPE_LIST, depth => 0, children => []);
-        }
-      } else {
-        %block = (type => TYPE_LIST, depth => 0, children => []);
-      }
-      my %this_item = (type => TYPE_LIST, text => parseText($3), depth => $depth, parent => \%block, children => []);
-      my $child_count = @{$block{'children'}};
-      if ($child_count == 0) {
-        push @{$block{'children'}}, {%this_item};
-      } else {
-        my $root = \%block;
-        my $root_children = \@{${$root}{'children'}};
-        my $diff = $depth-${$root}{'depth'};
-        # travel down the list until we get our proper context.
-        while ($diff > 0) {
-          $root_children = \@{${$root}{'children'}};
-          if (@{${$root}{'children'}} <= 0) {
-            push @{${$root}{'children'}}, {%this_item};
-            $diff = 0;
-          } else {
-            # diff 1 = we have proper parent, 0 = we're one step too deep
-            $root = $root_children->[-1];
-            $diff = $depth-${$root}{'depth'};
-            if ($diff == 1) {
-              push @{${$root}{'children'}}, {%this_item};
-              $diff = 0;
-            } elsif ($diff == 0) {
-              push @{${${$root}{'parent'}}{'children'}}, {%this_item};
-            }
-          }
-        }
-      }
-    # preformatted (default)
-    } else {
-      if (!%block) {
-        if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) {
-          %block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1));
-        } else {
-          %block = (type => TYPE_PRE, text => $2.$3);
-        }
-      } else {
-        if ($block{"type"} != TYPE_PRE) {
-          push @elements, {%block}; %block=();
-          if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) {
-            %block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1));
-          } else {
-            %block = (type => TYPE_PRE, text => $2.$3);
-          }
-        } else {
-          $block{'text'} .= "\n".$2.$3;
-        }
-      }
-    }
-    next;
-  }
-  # 2. check for big headers, e.g.,
-  # ,,,,
-  # header
-  # ````
-  #   conditions: first and third lines must have at least 4 repeating chars, and the second line must not match header conditions.
-  if ($i+1 < $#lines) {
-    if ($lines[$i] =~ m/(\S\S)\1{2,}/) {
-      my $size = $+[0];
-      # only check closing header if contents appear to not be a header
-      if ($lines[$i+1] !~ m/(\S\S)\1{2,}/g) {
-        # check closing line for header appearance
-        if ($lines[$i+2] =~ m/(\S\S)\1{2,}/g) {
-          if (%block) { push @elements, {%block}; %block=() }
-          if ($lines[$i+1] =~ m/^(.*?)(\..*\))/) {
-            push @elements, {
-              type => TYPE_HEADER,
-              size => $size,
-              text => parseText($1),
-              opts => parseOptions($2)
-            };
-          } else {
-            push @elements, {
-              type => TYPE_HEADER,
-              size => $size,
-              text => parseText($lines[$i+1])
-            };
-          }
-          $i += 2;
-          next;
-        }
-      }
-    }
-  }
-  # 3. check for small headers
-  if ($lines[$i] =~ m/^(\S)\1{2,}/) {
-    my $size = $+[0];
-    my $match = substr($lines[$i], $-[0], $+[0]-$-[0]);
-    my $post = substr($lines[$i], $+[0]);
-    # two conditions:
-    #   match repeated $match if something else came between
-    #     e.g., ==== header ====
-    #   otherwise match if there is some text following
-    #     e.g., ==== header
-    if ($post =~ m/(\t*|\s*|)(.*)\Q$match\E$/g) {
-      if (%block) { push @elements, {%block}; %block=() }
-      if ($2 =~ m/^(.*?)(\..*\))/) {
-        push @elements, {
-          type => TYPE_HEADER,
-          size => $size,
-          text => parseText($1),
-          opts => parseOptions($2)
-        };
-      } else {
-        push @elements, {
-          type => TYPE_HEADER,
-          size => $size,
-          text => parseText($2)
-        };
-      }
-      next;
-    } elsif ($post =~ m/^(\t*|\s*|)(.*[^(\n|\r|\s|\t)])/g) {
-      if (%block) { push @elements, {%block}; %block=() }
-      if ($post =~ m/^(.*?)(\..*\))/) {
-        push @elements, {
-          type => TYPE_HEADER,
-          size => $size,
-          text => parseText($1),
-          opts => parseOptions($2)
-        };
-      } else {
-        push @elements, {
-          type => TYPE_HEADER,
-          size => $size,
-          text => parseText($post)
-        };
-      }
-      next;
-    }
-  }
-  # 4. check for spacers
-  if ($lines[$i] =~ m/^(\S)\1{2,}/) {
-    if (%block) { push @elements, {%block}; %block=() }
-    push @elements, {
-      type => TYPE_SPACER,
-      size => $+[0]-$-[0]
-    };
-    next;
-  }
-  # discover blank lines - these usually signify end of some type of block
-  if ($lines[$i] =~ m/^\s*$/) {
-    if (%block) { push @elements, {%block}; %block=() }
-    push @elements, {
-      type => TYPE_BREAK
-    };
-    next;
-  }
-  # check for blockquotes
-  if ($lines[$i] =~ m/^\>(.*)$/) {
-    if (!%block) {
-      if ($1 =~ m/^(\..*\)(\s|))(.*)/) {
-        %block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1));
-      } else {
-        %block = (type => TYPE_BLOCKQUOTE, text => $1);
-      }
-    } else {
-      if ($block{'type'} != TYPE_BLOCKQUOTE) {
-        push @elements, {%block};
-        if ($1 =~ m/^(\..*\)(\s|))(.*)/) {
-          %block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1));
-        } else {
-          %block = (type => TYPE_BLOCKQUOTE, text => $1);
-        }
-      } else {
-        $block{'text'} .= "\n".$1;
-      }
-    }
-    next;
-  }
-  # finally, create a paragraph or append to it.
-  if (!%block) {
-    if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) {
-      %block = (type => TYPE_PARAGRAPH, text => parseText($2), opts => parseOptions($1));
-    } else {
-      %block = (type => TYPE_PARAGRAPH, text => parseText($lines[$i]));
-    }
-  } else {
-    $block{'text'} .= "\n".parseText($lines[$i]);
-  }
-}
-# push final block if it exists
-if (%block) { push @elements, {%block}; %block=() }
-# merge file settings with command-line settings
-@settings{ keys %cmd_settings } = values %cmd_settings;
-# get our various header sizes and organize an array with unique values from smallest to largest.
-my @headers;
-foreach (@elements) {
-  my $element = $_;
-  if ($element->{type} == TYPE_HEADER) {
-    if (@headers) {
-      my $h_i = 0;
-      my $last_size = $headers[0];
-      for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
-        if ($element->{size} == $headers[$h_i]) {
-          $h_i = scalar @headers;
-        } elsif ($element->{size} < $headers[$h_i]) {
-          splice @headers, $h_i, 0, $element->{size};
-          $h_i = scalar @headers;
-        } elsif ($element->{size} > $headers[$h_i]) {
-          if ($h_i+1 >= scalar @headers) {
-            splice @headers, $h_i+1, 0, $element->{size};
-            $h_i = scalar @headers;
-          }
-        }
-      }
-    } else {
-      $headers[0] = $element->{size};
-    }
-  }
-}
-# this is really dumb, but convert our array to a hash
-my %header_map;
-for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
-  if ($settings{'header.reverse'}) {
-    $header_map{$headers[$h_i]} = $h_i+1+($settings{'header.depth'} ? $settings{'header.depth'} : 0);
-  } else {
-    $header_map{$headers[$h_i]} = scalar ($settings{'header.depth'} ? $settings{'header.depth'} : 0) + @headers - $h_i;
-  }
-}
-
-# set up our array for header numbering. The first element 
-my @header_numbers;
-for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
-  $header_numbers[$h_i] = 0;
-}
-my $header_number = 0; # at which numbering depth are we
-
-# print 'em out
-if ($settings{'toc'}) {
-  foreach (@elements) {
-    if ($_->{type} == TYPE_HEADER) {
-
-    }
-  }
-} else {
-  my $previous_type = 0;
-  my $indent = ("\t" x ($settings{"indent_level"} ? $settings{"indent_level"} : 0));
-  foreach (@elements) {
-    if ($_->{type} == TYPE_HEADER) {
-      my $hsize = $header_map{$_->{size}};
-      my $htext = $_->{text};
-      if ($settings{"header.numbering"}) {
-        $header_number = $hsize;
-        for (my $h_i = $header_number+1; $h_i < scalar @headers; $h_i++) {
-          $header_numbers[$h_i] = 0;
-        }
-        $header_numbers[$header_number]++;
-        $htext = ($settings{"header.numbering"} ? $header_numbers[$header_number].'. ' : '') . $htext;
-      }
-      my $hid = $_->{text};
-      $hid =~ s/ /_/g;
-      print("<h$hsize".($_->{opts} ? $_->{opts} : '').($settings{'header.ids'} ? " id=\"".$hid."\"" : '').">$htext</h$hsize>\n");
-    } elsif ($_->{type} == TYPE_BREAK) {
-      if ($previous_type == TYPE_BREAK) {
-        print("<br />\n");
-      }
-    } elsif ($_->{type} == TYPE_PARAGRAPH) {
-      my $text = $_->{text};
-      $text =~ s/\n/<br>\n/g;
-      print("<p".($_->{opts} ? $_->{opts} : '').">$text</p>\n");
-    } elsif ($_->{type} == TYPE_PRE) {
-      print("<pre".($_->{opts} ? $_->{opts} : '').">$_->{text}</pre>\n");
-    } elsif ($_->{type} == TYPE_CODE) {
-      print("<pre><code".($_->{opts} ? $_->{opts} : '').">$_->{text}</code></pre>\n");
-    } elsif ($_->{type} == TYPE_BLOCKQUOTE) {
-      print("<blockquote".($_->{opts} ? $_->{opts} : '').">".($settings{'blockquote.parse'} ? parseText($_->{text}) : $_->{text})."</blockquote>\n");
-    } elsif ($_->{type} == TYPE_SPACER) {
-      print("<hr />\n");
-    } elsif ($_->{type} == TYPE_LIST) {
-      printf("<p></p>"); # @@ should this be optional?
-      printList($_);
-    }
-    $previous_type = $_->{type};
-  }
-}
-if (!$settings{"version.none"}) {
-  if ($settings{"version.hide"}) {
-    print "<!-- $file_name generated by kettext 0.1 -->\n";
-  } else {
-    print "<small>$file_name generated by kettext 0.1</small>\n";
-  }
-}
-
-sub printList {
-  my $list = $_[0];
-  my $child_count = @{$list->{'children'}};
-  #my $child = $children->[$child_i];
-  if ($list->{'text'}) {
-    print "<li>$list->{'text'}";
-  }
-  if ($child_count > 0) {
-    print "<ul>\n";
-    my $children = \@{$list->{'children'}};
-    my $child_i = 0;
-    for (my $child = $children->[$child_i]; $child_i < $child_count; $child = $children->[++$child_i]) {
-      printList($child);
-    }
-    print "</ul>\n";
-  }
-  if ($list->{'text'}) {
-    print "</li>\n";
-  }
-}
-
-sub parseText {
-  my $text = $_[0];
-  my @text_patterns = ('\*\*','\*\*', '\/\/','\/\/', '\[\[','\]\]', '\(\(','\)\)', '``', '``');
-  my @text_replace = ('<b>','</b>', '<i>','</i>', '<a>','</a>', '<small>','</small>', '<code>','</code>');
-  my $i = 0;
-  my $new_text = $text;
-  for (my $p = 0; $p <= $#text_patterns; $p+=2) {
-    my $open = $text_patterns[$p];
-    my $close = $text_patterns[$p+1];
-    while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*?\)|)/g) {
-    #while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*\)|)/g) {
-      my $converted_text;
-      if ($4) {
-        my $ts = substr($text_replace[$i], 0, -1);
-        my $te = substr($text_replace[$i], -1);
-        $converted_text = $ts.parseOptions($4).$te.$2.$text_replace[$i+1];
-      } else {
-        $converted_text = $text_replace[$i].$2.$text_replace[$i+1];
-      }
-      $new_text =~ s/\Q$1$2$3$4\E/$converted_text/g;
-    }
-    $i +=2;
-  }
-  return $new_text;
-}
-
-sub parseOptions {
-  my $text = $_[0];
-  my $opts;
-  while ($text =~ m/(\.)([a-zA-Z]*[^\(])(\()([^\)]*)/g) {
-    $opts.=" $2=\"$4\"";
-  }
-  return $opts;
-}
+my %parsed_file = kettext::parseFile($file_name, \%cmd_settings);
+kettext::convertTo::HTML(%parsed_file);
--- a/kettext.pm
+++ b/kettext.pm
@ -0,0 +1,436 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+package kettext;
+use constant VERSION => 0.2;
+# create our parse "enum"
+use constant {
+  TYPE_HEADER => 1,
+  TYPE_PARAGRAPH => 2,
+  TYPE_PRE => 3,
+  TYPE_RULE => 4,
+  TYPE_SPACER => 5,
+  TYPE_BREAK => 6,
+  TYPE_CODE => 7,
+  TYPE_BLOCKQUOTE => 8,
+  TYPE_LIST => 9
+};
+# create our general data
+# parseFile("my_file", %cmd_settings)
+sub parseFile {
+  my $file_name = $_[0];
+  my %cmd_settings = ($_[1] ? %{$_[1]} : ());
+  my @elements = ();    # our list of ALL distinct elements
+  my %block = ();       # some block of data, such as paragraph, pre, etc.
+  my %settings = ();
+  # create file handler and read into @lines
+  binmode STDOUT, ":utf8";
+  use utf8;
+  open(my $fh, '<:encoding(UTF-8)', $file_name)
+    or die "ERR: could not open file '$file_name' $!";
+  chomp(my @lines = <$fh>); close($fh);
+  # main logic: line reading
+  for (my $i = 0; $i <= $#lines; $i++) {
+    # check for imply rules on first line
+    if ($i == 0 || $i == $#lines) {
+      if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) {
+        if ($1 =~ m/^.*\((.*)\)/) {
+          my @options = split(',', $1);
+          foreach my $opt (@options) {
+            my @value = split('=', $opt);
+            $settings{$value[0]} = ($value[1] ? $value[1] : 1);
+          }
+        }
+        next;
+      }
+    }
+    # 1. check for indent-related lines (pre, code, list, etc.)
+    if ($lines[$i] =~ m/^(\t|\s\s|\s\s\s\s)(.)(.*)/) {
+      # code
+      if ($2 eq '#') {
+        if (!%block) {
+          if ($3 =~ m/^([^\s]*)(\..*\)(\s|))(.*)/) {
+            %block = (type => TYPE_CODE, text => $3, opts => parseOptions($2));
+          } else {
+            %block = (type => TYPE_CODE, text => $3);
+          }
+        } else {
+          if ($block{"type"} != TYPE_CODE) {
+            push @elements, {%block};
+            if ($3 =~ m/^([^\s*])(\..*\)(\s|))(.*)/) {
+              %block = (type => TYPE_CODE, text => $3, opts => parseOptions($2));
+            } else {
+              %block = (type => TYPE_CODE, text => $3);
+            }
+          } else {
+            $block{'text'} .= "\n".$3;
+          }
+        }
+      # lists - this is ugly, expensive, and by far the most shameful of this code
+      } elsif ($lines[$i] =~ m/^(\t{1,}|\s\s{1,})(\*\s|\~\s|\-\s|\+\s|.{1,3}\.\s)(.*)/) {
+        # this is hackish - I would prefer to get # of matches from above
+        my $depth = length($1) / (substr($1, 0, 1) eq ' ' ? 2 : 1);
+        if (%block) {
+          if ($block{"type"} != TYPE_LIST) {
+            push @elements, {%block};
+            %block = (type => TYPE_LIST, depth => 0, children => []);
+          }
+        } else {
+          %block = (type => TYPE_LIST, depth => 0, children => []);
+        }
+        my %this_item = (type => TYPE_LIST, text => $3, depth => $depth, parent => \%block, children => []);
+        my $child_count = @{$block{'children'}};
+        if ($child_count == 0) {
+          push @{$block{'children'}}, {%this_item};
+        } else {
+          my $root = \%block;
+          my $root_children = \@{${$root}{'children'}};
+          my $diff = $depth-${$root}{'depth'};
+          # travel down the list until we get our proper context.
+          while ($diff > 0) {
+            $root_children = \@{${$root}{'children'}};
+            if (@{${$root}{'children'}} <= 0) {
+              push @{${$root}{'children'}}, {%this_item};
+              $diff = 0;
+            } else {
+              # diff 1 = we have proper parent, 0 = we're one step too deep
+              $root = $root_children->[-1];
+              $diff = $depth-${$root}{'depth'};
+              if ($diff == 1) {
+                push @{${$root}{'children'}}, {%this_item};
+                $diff = 0;
+              } elsif ($diff == 0) {
+                push @{${${$root}{'parent'}}{'children'}}, {%this_item};
+              }
+            }
+          }
+        }
+      # preformatted (default)
+      } else {
+        if (!%block) {
+          if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) {
+            %block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1));
+          } else {
+            %block = (type => TYPE_PRE, text => $2.$3);
+          }
+        } else {
+          if ($block{"type"} != TYPE_PRE) {
+            push @elements, {%block}; %block=();
+            if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) {
+              %block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1));
+            } else {
+              %block = (type => TYPE_PRE, text => $2.$3);
+            }
+          } else {
+            $block{'text'} .= "\n".$2.$3;
+          }
+        }
+      }
+      next;
+    }
+    # 2. check for big headers, e.g.,
+    # ,,,,
+    # header
+    # ````
+    #   conditions: first and third lines must have at least 4 repeating chars, and the second line must not match header conditions.
+    if ($i+1 < $#lines) {
+      if ($lines[$i] =~ m/(\S\S)\1{2,}/) {
+        my $size = $+[0];
+        # only check closing header if contents appear to not be a header
+        if ($lines[$i+1] !~ m/(\S\S)\1{2,}/g) {
+          # check closing line for header appearance
+          if ($lines[$i+2] =~ m/(\S\S)\1{2,}/g) {
+            if (%block) { push @elements, {%block}; %block=() }
+            if ($lines[$i+1] =~ m/^(.*?)(\..*\))/) {
+              push @elements, {
+                type => TYPE_HEADER,
+                size => $size,
+                text => $1,
+                opts => parseOptions($2)
+              };
+            } else {
+              push @elements, {
+                type => TYPE_HEADER,
+                size => $size,
+                text => $lines[$i+1]
+              };
+            }
+            $i += 2;
+            next;
+          }
+        }
+      }
+    }
+    # 3. check for small headers
+    if ($lines[$i] =~ m/^(\S)\1{2,}/) {
+      my $size = $+[0];
+      my $match = substr($lines[$i], $-[0], $+[0]-$-[0]);
+      my $post = substr($lines[$i], $+[0]);
+      # two conditions:
+      #   match repeated $match if something else came between
+      #     e.g., ==== header ====
+      #   otherwise match if there is some text following
+      #     e.g., ==== header
+      if ($post =~ m/(\t*|\s*|)(.*)\Q$match\E$/g) {
+        if (%block) { push @elements, {%block}; %block=() }
+        if ($2 =~ m/^(.*?)(\..*\))/) {
+          push @elements, {
+            type => TYPE_HEADER,
+            size => $size,
+            text => $1,
+            opts => parseOptions($2)
+          };
+        } else {
+          push @elements, {
+            type => TYPE_HEADER,
+            size => $size,
+            text => $2
+          };
+        }
+        next;
+      } elsif ($post =~ m/^(\t*|\s*|)(.*[^(\n|\r|\s|\t)])/g) {
+        if (%block) { push @elements, {%block}; %block=() }
+        if ($post =~ m/^(.*?)(\..*\))/) {
+          push @elements, {
+            type => TYPE_HEADER,
+            size => $size,
+            text => $1,
+            opts => parseOptions($2)
+          };
+        } else {
+          push @elements, {
+            type => TYPE_HEADER,
+            size => $size,
+            text => $post
+          };
+        }
+        next;
+      }
+    }
+    # 4. check for spacers
+    if ($lines[$i] =~ m/^(\S)\1{2,}/) {
+      if (%block) { push @elements, {%block}; %block=() }
+      push @elements, {
+        type => TYPE_SPACER,
+        size => $+[0]-$-[0]
+      };
+      next;
+    }
+    # discover blank lines - these usually signify end of some type of block
+    if ($lines[$i] =~ m/^\s*$/) {
+      if (%block) { push @elements, {%block}; %block=() }
+      push @elements, {
+        type => TYPE_BREAK
+      };
+      next;
+    }
+    # check for blockquotes
+    if ($lines[$i] =~ m/^\>(.*)$/) {
+      if (!%block) {
+        if ($1 =~ m/^(\..*\)(\s|))(.*)/) {
+          %block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1));
+        } else {
+          %block = (type => TYPE_BLOCKQUOTE, text => $1);
+        }
+      } else {
+        if ($block{'type'} != TYPE_BLOCKQUOTE) {
+          push @elements, {%block};
+          if ($1 =~ m/^(\..*\)(\s|))(.*)/) {
+            %block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1));
+          } else {
+            %block = (type => TYPE_BLOCKQUOTE, text => $1);
+          }
+        } else {
+          $block{'text'} .= "\n".$1;
+        }
+      }
+      next;
+    }
+    # finally, create a paragraph or append to it.
+    if (!%block) {
+      if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) {
+        %block = (type => TYPE_PARAGRAPH, text => $2, opts => parseOptions($1));
+      } else {
+        %block = (type => TYPE_PARAGRAPH, text => $lines[$i]);
+      }
+    } else {
+      $block{'text'} .= "\n".$lines[$i];
+    }
+  }
+  # push final block if it exists
+  if (%block) { push @elements, {%block}; %block=() }
+  # merge file settings with command-line settings
+  @settings{ keys %cmd_settings } = values %cmd_settings;
+  # get our various header sizes and organize an array with unique values from smallest to largest.
+  my @headers;
+  foreach (@elements) {
+    my $element = $_;
+    if ($element->{type} == TYPE_HEADER) {
+      if (@headers) {
+        my $h_i = 0;
+        my $last_size = $headers[0];
+        for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
+          if ($element->{size} == $headers[$h_i]) {
+            $h_i = scalar @headers;
+          } elsif ($element->{size} < $headers[$h_i]) {
+            splice @headers, $h_i, 0, $element->{size};
+            $h_i = scalar @headers;
+          } elsif ($element->{size} > $headers[$h_i]) {
+            if ($h_i+1 >= scalar @headers) {
+              splice @headers, $h_i+1, 0, $element->{size};
+              $h_i = scalar @headers;
+            }
+          }
+        }
+      } else {
+        $headers[0] = $element->{size};
+      }
+    }
+  }
+  # this is really dumb, but convert our array to a hash
+  my %header_map;
+  for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
+    if ($settings{'header.reverse'}) {
+      $header_map{$headers[$h_i]} = $h_i+1+($settings{'header.depth'} ? $settings{'header.depth'} : 0);
+    } else {
+      $header_map{$headers[$h_i]} = scalar ($settings{'header.depth'} ? $settings{'header.depth'} : 0) + @headers - $h_i;
+    }
+  }
+ 
+  # set up our array for header numbering. The first element 
+  my @header_numbers;
+  for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
+    $header_numbers[$h_i] = 0;
+  }
+  my $header_number = 0; # at which numbering depth are we
+  # prepend to headers
+  foreach (@elements) {
+    if ($_->{type} == TYPE_HEADER) {
+      my $hsize = $header_map{$_->{size}};
+      $_->{hsize} = $hsize;
+      $header_number = $hsize;
+      for (my $h_i = $header_number+1; $h_i < scalar @headers; $h_i++) {
+        $header_numbers[$h_i] = 0;
+      }
+      $header_numbers[$header_number]++;
+      # prepend an ordered number to headers based on depth
+      if ($settings{"header.numbering"}) {
+        $_->{text} = $header_numbers[$header_number].'. '.$_->{text};
+      }
+    }
+  }
+  my %ret = (
+    "elements" => \@elements,
+    "settings" => \%settings,
+    "filename" => $file_name
+  );
+  return %ret;
+}
+sub parseOptions {
+  my $text = $_[0];
+  my $opts;
+  while ($text =~ m/(\.)([a-zA-Z]*[^\(])(\()([^\)]*)/g) {
+    $opts.=" $2=\"$4\"";
+  }
+  return $opts;
+}
+
+package kettext::convertTo;
+sub HTML {
+  my %data = @_;
+  my @elements = @{$data{elements}};
+  my %settings = %{$data{settings}};
+  my $file_name = $data{filename};
+  # print 'em out
+  if ($settings{'toc'}) {
+    foreach (@elements) {
+      if ($_->{type} == kettext::TYPE_HEADER) {
+  
+      }
+    }
+  } else {
+    my $previous_type = 0;
+    my $indent = ("\t" x ($settings{"indent_level"} ? $settings{"indent_level"} : 0));
+    foreach (@elements) {
+      if ($_->{type} == kettext::TYPE_HEADER) {
+        my $hid = $_->{text};
+        $hid =~ s/ /_/g;
+        print("<h$_->{hsize}".($_->{opts} ? $_->{opts} : '').($settings{'header.ids'} ? " id=\"".$hid."\"" : '').">".convertText($_->{text})."</h$_->{hsize}>\n");
+      } elsif ($_->{type} == kettext::TYPE_BREAK) {
+        if ($previous_type == kettext::TYPE_BREAK) {
+          print("<br />\n");
+        }
+      } elsif ($_->{type} == kettext::TYPE_PARAGRAPH) {
+        my $text = convertText($_->{text});
+        $text =~ s/\n/<br>\n/g;
+        print("<p".($_->{opts} ? $_->{opts} : '').">$text</p>\n");
+      } elsif ($_->{type} == kettext::TYPE_PRE) {
+        print("<pre".($_->{opts} ? $_->{opts} : '').">$_->{text}</pre>\n");
+      } elsif ($_->{type} == kettext::TYPE_CODE) {
+        print("<pre><code".($_->{opts} ? $_->{opts} : '').">$_->{text}</code></pre>\n");
+      } elsif ($_->{type} == kettext::TYPE_BLOCKQUOTE) {
+        print("<blockquote".($_->{opts} ? $_->{opts} : '').">".($settings{'blockquote.parse'} ? convertText($_->{text}) : $_->{text})."</blockquote>\n");
+      } elsif ($_->{type} == kettext::TYPE_SPACER) {
+        print("<hr />\n");
+      } elsif ($_->{type} == kettext::TYPE_LIST) {
+        printf("<p></p>"); # @@ should this be optional?
+        printList($_);
+      }
+      $previous_type = $_->{type};
+    }
+  }
+  if (!$settings{"version.none"}) {
+    if ($settings{"version.hide"}) {
+      print "<!-- $file_name generated by kettext ".kettext::VERSION." -->\n";
+    } else {
+      print "<small>$file_name generated by kettext ".kettext::VERSION."</small>\n";
+    }
+  }
+  
+  sub printList {
+    my $list = $_[0];
+    my $child_count = @{$list->{'children'}};
+    #my $child = $children->[$child_i];
+    if ($list->{'text'}) {
+      print "<li>".convertText($list->{'text'});
+    }
+    if ($child_count > 0) {
+      print "<ul>\n";
+      my $children = \@{$list->{'children'}};
+      my $child_i = 0;
+      for (my $child = $children->[$child_i]; $child_i < $child_count; $child = $children->[++$child_i]) {
+        printList($child);
+      }
+      print "</ul>\n";
+    }
+    if ($list->{'text'}) {
+      print "</li>\n";
+    }
+  }
+  sub convertText {
+    my $text = $_[0];
+    my @text_patterns = ('\*\*','\*\*', '\/\/','\/\/', '\[\[','\]\]', '\(\(','\)\)', '``', '``');
+    my @text_replace = ('<b>','</b>', '<i>','</i>', '<a>','</a>', '<small>','</small>', '<code>','</code>');
+    my $i = 0;
+    my $new_text = $text;
+    for (my $p = 0; $p <= $#text_patterns; $p+=2) {
+      my $open = $text_patterns[$p];
+      my $close = $text_patterns[$p+1];
+      while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*?\)|)/g) {
+      #while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*\)|)/g) {
+        my $converted_text;
+        if ($4) {
+          my $ts = substr($text_replace[$i], 0, -1);
+          my $te = substr($text_replace[$i], -1);
+          $converted_text = $ts.kettext::parseOptions($4).$te.$2.$text_replace[$i+1];
+        } else {
+          $converted_text = $text_replace[$i].$2.$text_replace[$i+1];
+        }
+        $new_text =~ s/\Q$1$2$3$4\E/$converted_text/g;
+      }
+      $i +=2;
+    }
+    return $new_text;
+  }
+}
+1;