kettext now exists as a front-end script and a module. The module separates parsing from conversion as well.
parent
9f541689a0
commit
1f19efe295
423
kettext.pl
423
kettext.pl
|
@ -1,15 +1,12 @@
|
|||
#!/usr/bin/perl
|
||||
use strict;
|
||||
use warnings;
|
||||
#
|
||||
my %cmd_settings = ();
|
||||
require kettext;
|
||||
# arg check / usage
|
||||
if ($#ARGV+1 == 0) {
|
||||
printf "Usage: kettext.pl file_name.ktx\n";
|
||||
exit;
|
||||
}
|
||||
my $file_name;
|
||||
|
||||
my %cmd_settings = ();
|
||||
for (my $arg_i = 0; $arg_i < $#ARGV+1; $arg_i++) {
|
||||
if ($ARGV[$arg_i] =~ m/^-(-|)(.*)/) {
|
||||
if ($2 =~ m/^(h(elp|))$/) {
|
||||
|
@ -24,417 +21,5 @@ for (my $arg_i = 0; $arg_i < $#ARGV+1; $arg_i++) {
|
|||
$file_name = $ARGV[$arg_i];
|
||||
}
|
||||
}
|
||||
# create file handler
|
||||
binmode STDOUT, ":utf8";
|
||||
use utf8;
|
||||
open(my $fh, '<:encoding(UTF-8)', $file_name)
|
||||
or die "ERR: could not open file '$file_name' $!";
|
||||
# create our parse "enum"
|
||||
use constant {
|
||||
PARSE_HEADER_BIG => 1 << 1,
|
||||
PARSE_HEADER_SMALL => 1 << 2,
|
||||
PARSE_HEADER_BOTH => 1 << 1 | 1 << 2,
|
||||
};
|
||||
use constant {
|
||||
TYPE_HEADER => 1,
|
||||
TYPE_PARAGRAPH => 2,
|
||||
TYPE_PRE => 3,
|
||||
TYPE_RULE => 4,
|
||||
TYPE_SPACER => 5,
|
||||
TYPE_BREAK => 6,
|
||||
TYPE_CODE => 7,
|
||||
TYPE_BLOCKQUOTE => 8,
|
||||
TYPE_LIST => 9
|
||||
};
|
||||
# create our general data
|
||||
my %sections;
|
||||
my @elements = (); # our list of ALL distinct elements
|
||||
my %block = (); # some block of data, such as paragraph, pre, etc.
|
||||
my %settings = ();
|
||||
# main logic: line reading
|
||||
chomp(my @lines = <$fh>); close($fh);
|
||||
for (my $i = 0; $i <= $#lines; $i++) {
|
||||
# check for imply rules on first line
|
||||
if ($i == 0 || $i == $#lines) {
|
||||
if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) {
|
||||
if ($1 =~ m/^.*\((.*)\)/) {
|
||||
my @options = split(',', $1);
|
||||
foreach my $opt (@options) {
|
||||
my @value = split('=', $opt);
|
||||
$settings{$value[0]} = ($value[1] ? $value[1] : 1);
|
||||
}
|
||||
}
|
||||
next;
|
||||
}
|
||||
}
|
||||
# 1. check for indent-related lines (pre, code, list, etc.)
|
||||
if ($lines[$i] =~ m/^(\t|\s\s|\s\s\s\s)(.)(.*)/) {
|
||||
# code
|
||||
if ($2 eq '#') {
|
||||
if (!%block) {
|
||||
if ($3 =~ m/^([^\s]*)(\..*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_CODE, text => $3, opts => parseOptions($2));
|
||||
} else {
|
||||
%block = (type => TYPE_CODE, text => $3);
|
||||
}
|
||||
} else {
|
||||
if ($block{"type"} != TYPE_CODE) {
|
||||
push @elements, {%block};
|
||||
if ($3 =~ m/^([^\s*])(\..*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_CODE, text => $3, opts => parseOptions($2));
|
||||
} else {
|
||||
%block = (type => TYPE_CODE, text => $3);
|
||||
}
|
||||
} else {
|
||||
$block{'text'} .= "\n".$3;
|
||||
}
|
||||
}
|
||||
# lists - this is ugly, expensive, and by far the most shameful of this code
|
||||
} elsif ($lines[$i] =~ m/^(\t{1,}|\s\s{1,})(\*\s|\~\s|\-\s|\+\s|.{1,3}\.\s)(.*)/) {
|
||||
# this is hackish - I would prefer to get # of matches from above
|
||||
my $depth = length($1) / (substr($1, 0, 1) eq ' ' ? 2 : 1);
|
||||
if (%block) {
|
||||
if ($block{"type"} != TYPE_LIST) {
|
||||
push @elements, {%block};
|
||||
%block = (type => TYPE_LIST, depth => 0, children => []);
|
||||
}
|
||||
} else {
|
||||
%block = (type => TYPE_LIST, depth => 0, children => []);
|
||||
}
|
||||
my %this_item = (type => TYPE_LIST, text => parseText($3), depth => $depth, parent => \%block, children => []);
|
||||
my $child_count = @{$block{'children'}};
|
||||
if ($child_count == 0) {
|
||||
push @{$block{'children'}}, {%this_item};
|
||||
} else {
|
||||
my $root = \%block;
|
||||
my $root_children = \@{${$root}{'children'}};
|
||||
my $diff = $depth-${$root}{'depth'};
|
||||
# travel down the list until we get our proper context.
|
||||
while ($diff > 0) {
|
||||
$root_children = \@{${$root}{'children'}};
|
||||
if (@{${$root}{'children'}} <= 0) {
|
||||
push @{${$root}{'children'}}, {%this_item};
|
||||
$diff = 0;
|
||||
} else {
|
||||
# diff 1 = we have proper parent, 0 = we're one step too deep
|
||||
$root = $root_children->[-1];
|
||||
$diff = $depth-${$root}{'depth'};
|
||||
if ($diff == 1) {
|
||||
push @{${$root}{'children'}}, {%this_item};
|
||||
$diff = 0;
|
||||
} elsif ($diff == 0) {
|
||||
push @{${${$root}{'parent'}}{'children'}}, {%this_item};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# preformatted (default)
|
||||
} else {
|
||||
if (!%block) {
|
||||
if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1));
|
||||
} else {
|
||||
%block = (type => TYPE_PRE, text => $2.$3);
|
||||
}
|
||||
} else {
|
||||
if ($block{"type"} != TYPE_PRE) {
|
||||
push @elements, {%block}; %block=();
|
||||
if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1));
|
||||
} else {
|
||||
%block = (type => TYPE_PRE, text => $2.$3);
|
||||
}
|
||||
} else {
|
||||
$block{'text'} .= "\n".$2.$3;
|
||||
}
|
||||
}
|
||||
}
|
||||
next;
|
||||
}
|
||||
# 2. check for big headers, e.g.,
|
||||
# ,,,,
|
||||
# header
|
||||
# ````
|
||||
# conditions: first and third lines must have at least 4 repeating chars, and the second line must not match header conditions.
|
||||
if ($i+1 < $#lines) {
|
||||
if ($lines[$i] =~ m/(\S\S)\1{2,}/) {
|
||||
my $size = $+[0];
|
||||
# only check closing header if contents appear to not be a header
|
||||
if ($lines[$i+1] !~ m/(\S\S)\1{2,}/g) {
|
||||
# check closing line for header appearance
|
||||
if ($lines[$i+2] =~ m/(\S\S)\1{2,}/g) {
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
if ($lines[$i+1] =~ m/^(.*?)(\..*\))/) {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => parseText($1),
|
||||
opts => parseOptions($2)
|
||||
};
|
||||
} else {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => parseText($lines[$i+1])
|
||||
};
|
||||
}
|
||||
$i += 2;
|
||||
next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# 3. check for small headers
|
||||
if ($lines[$i] =~ m/^(\S)\1{2,}/) {
|
||||
my $size = $+[0];
|
||||
my $match = substr($lines[$i], $-[0], $+[0]-$-[0]);
|
||||
my $post = substr($lines[$i], $+[0]);
|
||||
# two conditions:
|
||||
# match repeated $match if something else came between
|
||||
# e.g., ==== header ====
|
||||
# otherwise match if there is some text following
|
||||
# e.g., ==== header
|
||||
if ($post =~ m/(\t*|\s*|)(.*)\Q$match\E$/g) {
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
if ($2 =~ m/^(.*?)(\..*\))/) {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => parseText($1),
|
||||
opts => parseOptions($2)
|
||||
};
|
||||
} else {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => parseText($2)
|
||||
};
|
||||
}
|
||||
next;
|
||||
} elsif ($post =~ m/^(\t*|\s*|)(.*[^(\n|\r|\s|\t)])/g) {
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
if ($post =~ m/^(.*?)(\..*\))/) {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => parseText($1),
|
||||
opts => parseOptions($2)
|
||||
};
|
||||
} else {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => parseText($post)
|
||||
};
|
||||
}
|
||||
next;
|
||||
}
|
||||
}
|
||||
# 4. check for spacers
|
||||
if ($lines[$i] =~ m/^(\S)\1{2,}/) {
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
push @elements, {
|
||||
type => TYPE_SPACER,
|
||||
size => $+[0]-$-[0]
|
||||
};
|
||||
next;
|
||||
}
|
||||
# discover blank lines - these usually signify end of some type of block
|
||||
if ($lines[$i] =~ m/^\s*$/) {
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
push @elements, {
|
||||
type => TYPE_BREAK
|
||||
};
|
||||
next;
|
||||
}
|
||||
# check for blockquotes
|
||||
if ($lines[$i] =~ m/^\>(.*)$/) {
|
||||
if (!%block) {
|
||||
if ($1 =~ m/^(\..*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1));
|
||||
} else {
|
||||
%block = (type => TYPE_BLOCKQUOTE, text => $1);
|
||||
}
|
||||
} else {
|
||||
if ($block{'type'} != TYPE_BLOCKQUOTE) {
|
||||
push @elements, {%block};
|
||||
if ($1 =~ m/^(\..*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1));
|
||||
} else {
|
||||
%block = (type => TYPE_BLOCKQUOTE, text => $1);
|
||||
}
|
||||
} else {
|
||||
$block{'text'} .= "\n".$1;
|
||||
}
|
||||
}
|
||||
next;
|
||||
}
|
||||
# finally, create a paragraph or append to it.
|
||||
if (!%block) {
|
||||
if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_PARAGRAPH, text => parseText($2), opts => parseOptions($1));
|
||||
} else {
|
||||
%block = (type => TYPE_PARAGRAPH, text => parseText($lines[$i]));
|
||||
}
|
||||
} else {
|
||||
$block{'text'} .= "\n".parseText($lines[$i]);
|
||||
}
|
||||
}
|
||||
# push final block if it exists
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
# merge file settings with command-line settings
|
||||
@settings{ keys %cmd_settings } = values %cmd_settings;
|
||||
# get our various header sizes and organize an array with unique values from smallest to largest.
|
||||
my @headers;
|
||||
foreach (@elements) {
|
||||
my $element = $_;
|
||||
if ($element->{type} == TYPE_HEADER) {
|
||||
if (@headers) {
|
||||
my $h_i = 0;
|
||||
my $last_size = $headers[0];
|
||||
for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
|
||||
if ($element->{size} == $headers[$h_i]) {
|
||||
$h_i = scalar @headers;
|
||||
} elsif ($element->{size} < $headers[$h_i]) {
|
||||
splice @headers, $h_i, 0, $element->{size};
|
||||
$h_i = scalar @headers;
|
||||
} elsif ($element->{size} > $headers[$h_i]) {
|
||||
if ($h_i+1 >= scalar @headers) {
|
||||
splice @headers, $h_i+1, 0, $element->{size};
|
||||
$h_i = scalar @headers;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$headers[0] = $element->{size};
|
||||
}
|
||||
}
|
||||
}
|
||||
# this is really dumb, but convert our array to a hash
|
||||
my %header_map;
|
||||
for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
|
||||
if ($settings{'header.reverse'}) {
|
||||
$header_map{$headers[$h_i]} = $h_i+1+($settings{'header.depth'} ? $settings{'header.depth'} : 0);
|
||||
} else {
|
||||
$header_map{$headers[$h_i]} = scalar ($settings{'header.depth'} ? $settings{'header.depth'} : 0) + @headers - $h_i;
|
||||
}
|
||||
}
|
||||
|
||||
# set up our array for header numbering. The first element
|
||||
my @header_numbers;
|
||||
for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
|
||||
$header_numbers[$h_i] = 0;
|
||||
}
|
||||
my $header_number = 0; # at which numbering depth are we
|
||||
|
||||
# print 'em out
|
||||
if ($settings{'toc'}) {
|
||||
foreach (@elements) {
|
||||
if ($_->{type} == TYPE_HEADER) {
|
||||
|
||||
}
|
||||
}
|
||||
} else {
|
||||
my $previous_type = 0;
|
||||
my $indent = ("\t" x ($settings{"indent_level"} ? $settings{"indent_level"} : 0));
|
||||
foreach (@elements) {
|
||||
if ($_->{type} == TYPE_HEADER) {
|
||||
my $hsize = $header_map{$_->{size}};
|
||||
my $htext = $_->{text};
|
||||
if ($settings{"header.numbering"}) {
|
||||
$header_number = $hsize;
|
||||
for (my $h_i = $header_number+1; $h_i < scalar @headers; $h_i++) {
|
||||
$header_numbers[$h_i] = 0;
|
||||
}
|
||||
$header_numbers[$header_number]++;
|
||||
$htext = ($settings{"header.numbering"} ? $header_numbers[$header_number].'. ' : '') . $htext;
|
||||
}
|
||||
my $hid = $_->{text};
|
||||
$hid =~ s/ /_/g;
|
||||
print("<h$hsize".($_->{opts} ? $_->{opts} : '').($settings{'header.ids'} ? " id=\"".$hid."\"" : '').">$htext</h$hsize>\n");
|
||||
} elsif ($_->{type} == TYPE_BREAK) {
|
||||
if ($previous_type == TYPE_BREAK) {
|
||||
print("<br />\n");
|
||||
}
|
||||
} elsif ($_->{type} == TYPE_PARAGRAPH) {
|
||||
my $text = $_->{text};
|
||||
$text =~ s/\n/<br>\n/g;
|
||||
print("<p".($_->{opts} ? $_->{opts} : '').">$text</p>\n");
|
||||
} elsif ($_->{type} == TYPE_PRE) {
|
||||
print("<pre".($_->{opts} ? $_->{opts} : '').">$_->{text}</pre>\n");
|
||||
} elsif ($_->{type} == TYPE_CODE) {
|
||||
print("<pre><code".($_->{opts} ? $_->{opts} : '').">$_->{text}</code></pre>\n");
|
||||
} elsif ($_->{type} == TYPE_BLOCKQUOTE) {
|
||||
print("<blockquote".($_->{opts} ? $_->{opts} : '').">".($settings{'blockquote.parse'} ? parseText($_->{text}) : $_->{text})."</blockquote>\n");
|
||||
} elsif ($_->{type} == TYPE_SPACER) {
|
||||
print("<hr />\n");
|
||||
} elsif ($_->{type} == TYPE_LIST) {
|
||||
printf("<p></p>"); # @@ should this be optional?
|
||||
printList($_);
|
||||
}
|
||||
$previous_type = $_->{type};
|
||||
}
|
||||
}
|
||||
if (!$settings{"version.none"}) {
|
||||
if ($settings{"version.hide"}) {
|
||||
print "<!-- $file_name generated by kettext 0.1 -->\n";
|
||||
} else {
|
||||
print "<small>$file_name generated by kettext 0.1</small>\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub printList {
|
||||
my $list = $_[0];
|
||||
my $child_count = @{$list->{'children'}};
|
||||
#my $child = $children->[$child_i];
|
||||
if ($list->{'text'}) {
|
||||
print "<li>$list->{'text'}";
|
||||
}
|
||||
if ($child_count > 0) {
|
||||
print "<ul>\n";
|
||||
my $children = \@{$list->{'children'}};
|
||||
my $child_i = 0;
|
||||
for (my $child = $children->[$child_i]; $child_i < $child_count; $child = $children->[++$child_i]) {
|
||||
printList($child);
|
||||
}
|
||||
print "</ul>\n";
|
||||
}
|
||||
if ($list->{'text'}) {
|
||||
print "</li>\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub parseText {
|
||||
my $text = $_[0];
|
||||
my @text_patterns = ('\*\*','\*\*', '\/\/','\/\/', '\[\[','\]\]', '\(\(','\)\)', '``', '``');
|
||||
my @text_replace = ('<b>','</b>', '<i>','</i>', '<a>','</a>', '<small>','</small>', '<code>','</code>');
|
||||
my $i = 0;
|
||||
my $new_text = $text;
|
||||
for (my $p = 0; $p <= $#text_patterns; $p+=2) {
|
||||
my $open = $text_patterns[$p];
|
||||
my $close = $text_patterns[$p+1];
|
||||
while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*?\)|)/g) {
|
||||
#while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*\)|)/g) {
|
||||
my $converted_text;
|
||||
if ($4) {
|
||||
my $ts = substr($text_replace[$i], 0, -1);
|
||||
my $te = substr($text_replace[$i], -1);
|
||||
$converted_text = $ts.parseOptions($4).$te.$2.$text_replace[$i+1];
|
||||
} else {
|
||||
$converted_text = $text_replace[$i].$2.$text_replace[$i+1];
|
||||
}
|
||||
$new_text =~ s/\Q$1$2$3$4\E/$converted_text/g;
|
||||
}
|
||||
$i +=2;
|
||||
}
|
||||
return $new_text;
|
||||
}
|
||||
|
||||
sub parseOptions {
|
||||
my $text = $_[0];
|
||||
my $opts;
|
||||
while ($text =~ m/(\.)([a-zA-Z]*[^\(])(\()([^\)]*)/g) {
|
||||
$opts.=" $2=\"$4\"";
|
||||
}
|
||||
return $opts;
|
||||
}
|
||||
my %parsed_file = kettext::parseFile($file_name, \%cmd_settings);
|
||||
kettext::convertTo::HTML(%parsed_file);
|
||||
|
|
|
@ -0,0 +1,436 @@
|
|||
#!/usr/bin/perl
|
||||
use strict;
|
||||
use warnings;
|
||||
package kettext;
|
||||
use constant VERSION => 0.2;
|
||||
# create our parse "enum"
|
||||
use constant {
|
||||
TYPE_HEADER => 1,
|
||||
TYPE_PARAGRAPH => 2,
|
||||
TYPE_PRE => 3,
|
||||
TYPE_RULE => 4,
|
||||
TYPE_SPACER => 5,
|
||||
TYPE_BREAK => 6,
|
||||
TYPE_CODE => 7,
|
||||
TYPE_BLOCKQUOTE => 8,
|
||||
TYPE_LIST => 9
|
||||
};
|
||||
# create our general data
|
||||
# parseFile("my_file", %cmd_settings)
|
||||
sub parseFile {
|
||||
my $file_name = $_[0];
|
||||
my %cmd_settings = ($_[1] ? %{$_[1]} : ());
|
||||
my @elements = (); # our list of ALL distinct elements
|
||||
my %block = (); # some block of data, such as paragraph, pre, etc.
|
||||
my %settings = ();
|
||||
# create file handler and read into @lines
|
||||
binmode STDOUT, ":utf8";
|
||||
use utf8;
|
||||
open(my $fh, '<:encoding(UTF-8)', $file_name)
|
||||
or die "ERR: could not open file '$file_name' $!";
|
||||
chomp(my @lines = <$fh>); close($fh);
|
||||
# main logic: line reading
|
||||
for (my $i = 0; $i <= $#lines; $i++) {
|
||||
# check for imply rules on first line
|
||||
if ($i == 0 || $i == $#lines) {
|
||||
if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) {
|
||||
if ($1 =~ m/^.*\((.*)\)/) {
|
||||
my @options = split(',', $1);
|
||||
foreach my $opt (@options) {
|
||||
my @value = split('=', $opt);
|
||||
$settings{$value[0]} = ($value[1] ? $value[1] : 1);
|
||||
}
|
||||
}
|
||||
next;
|
||||
}
|
||||
}
|
||||
# 1. check for indent-related lines (pre, code, list, etc.)
|
||||
if ($lines[$i] =~ m/^(\t|\s\s|\s\s\s\s)(.)(.*)/) {
|
||||
# code
|
||||
if ($2 eq '#') {
|
||||
if (!%block) {
|
||||
if ($3 =~ m/^([^\s]*)(\..*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_CODE, text => $3, opts => parseOptions($2));
|
||||
} else {
|
||||
%block = (type => TYPE_CODE, text => $3);
|
||||
}
|
||||
} else {
|
||||
if ($block{"type"} != TYPE_CODE) {
|
||||
push @elements, {%block};
|
||||
if ($3 =~ m/^([^\s*])(\..*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_CODE, text => $3, opts => parseOptions($2));
|
||||
} else {
|
||||
%block = (type => TYPE_CODE, text => $3);
|
||||
}
|
||||
} else {
|
||||
$block{'text'} .= "\n".$3;
|
||||
}
|
||||
}
|
||||
# lists - this is ugly, expensive, and by far the most shameful of this code
|
||||
} elsif ($lines[$i] =~ m/^(\t{1,}|\s\s{1,})(\*\s|\~\s|\-\s|\+\s|.{1,3}\.\s)(.*)/) {
|
||||
# this is hackish - I would prefer to get # of matches from above
|
||||
my $depth = length($1) / (substr($1, 0, 1) eq ' ' ? 2 : 1);
|
||||
if (%block) {
|
||||
if ($block{"type"} != TYPE_LIST) {
|
||||
push @elements, {%block};
|
||||
%block = (type => TYPE_LIST, depth => 0, children => []);
|
||||
}
|
||||
} else {
|
||||
%block = (type => TYPE_LIST, depth => 0, children => []);
|
||||
}
|
||||
my %this_item = (type => TYPE_LIST, text => $3, depth => $depth, parent => \%block, children => []);
|
||||
my $child_count = @{$block{'children'}};
|
||||
if ($child_count == 0) {
|
||||
push @{$block{'children'}}, {%this_item};
|
||||
} else {
|
||||
my $root = \%block;
|
||||
my $root_children = \@{${$root}{'children'}};
|
||||
my $diff = $depth-${$root}{'depth'};
|
||||
# travel down the list until we get our proper context.
|
||||
while ($diff > 0) {
|
||||
$root_children = \@{${$root}{'children'}};
|
||||
if (@{${$root}{'children'}} <= 0) {
|
||||
push @{${$root}{'children'}}, {%this_item};
|
||||
$diff = 0;
|
||||
} else {
|
||||
# diff 1 = we have proper parent, 0 = we're one step too deep
|
||||
$root = $root_children->[-1];
|
||||
$diff = $depth-${$root}{'depth'};
|
||||
if ($diff == 1) {
|
||||
push @{${$root}{'children'}}, {%this_item};
|
||||
$diff = 0;
|
||||
} elsif ($diff == 0) {
|
||||
push @{${${$root}{'parent'}}{'children'}}, {%this_item};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# preformatted (default)
|
||||
} else {
|
||||
if (!%block) {
|
||||
if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1));
|
||||
} else {
|
||||
%block = (type => TYPE_PRE, text => $2.$3);
|
||||
}
|
||||
} else {
|
||||
if ($block{"type"} != TYPE_PRE) {
|
||||
push @elements, {%block}; %block=();
|
||||
if ($2 eq '.' && $3 =~ m/^(.*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_PRE, text => $3, opts => parseOptions('.'.$1));
|
||||
} else {
|
||||
%block = (type => TYPE_PRE, text => $2.$3);
|
||||
}
|
||||
} else {
|
||||
$block{'text'} .= "\n".$2.$3;
|
||||
}
|
||||
}
|
||||
}
|
||||
next;
|
||||
}
|
||||
# 2. check for big headers, e.g.,
|
||||
# ,,,,
|
||||
# header
|
||||
# ````
|
||||
# conditions: first and third lines must have at least 4 repeating chars, and the second line must not match header conditions.
|
||||
if ($i+1 < $#lines) {
|
||||
if ($lines[$i] =~ m/(\S\S)\1{2,}/) {
|
||||
my $size = $+[0];
|
||||
# only check closing header if contents appear to not be a header
|
||||
if ($lines[$i+1] !~ m/(\S\S)\1{2,}/g) {
|
||||
# check closing line for header appearance
|
||||
if ($lines[$i+2] =~ m/(\S\S)\1{2,}/g) {
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
if ($lines[$i+1] =~ m/^(.*?)(\..*\))/) {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => $1,
|
||||
opts => parseOptions($2)
|
||||
};
|
||||
} else {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => $lines[$i+1]
|
||||
};
|
||||
}
|
||||
$i += 2;
|
||||
next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# 3. check for small headers
|
||||
if ($lines[$i] =~ m/^(\S)\1{2,}/) {
|
||||
my $size = $+[0];
|
||||
my $match = substr($lines[$i], $-[0], $+[0]-$-[0]);
|
||||
my $post = substr($lines[$i], $+[0]);
|
||||
# two conditions:
|
||||
# match repeated $match if something else came between
|
||||
# e.g., ==== header ====
|
||||
# otherwise match if there is some text following
|
||||
# e.g., ==== header
|
||||
if ($post =~ m/(\t*|\s*|)(.*)\Q$match\E$/g) {
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
if ($2 =~ m/^(.*?)(\..*\))/) {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => $1,
|
||||
opts => parseOptions($2)
|
||||
};
|
||||
} else {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => $2
|
||||
};
|
||||
}
|
||||
next;
|
||||
} elsif ($post =~ m/^(\t*|\s*|)(.*[^(\n|\r|\s|\t)])/g) {
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
if ($post =~ m/^(.*?)(\..*\))/) {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => $1,
|
||||
opts => parseOptions($2)
|
||||
};
|
||||
} else {
|
||||
push @elements, {
|
||||
type => TYPE_HEADER,
|
||||
size => $size,
|
||||
text => $post
|
||||
};
|
||||
}
|
||||
next;
|
||||
}
|
||||
}
|
||||
# 4. check for spacers
|
||||
if ($lines[$i] =~ m/^(\S)\1{2,}/) {
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
push @elements, {
|
||||
type => TYPE_SPACER,
|
||||
size => $+[0]-$-[0]
|
||||
};
|
||||
next;
|
||||
}
|
||||
# discover blank lines - these usually signify end of some type of block
|
||||
if ($lines[$i] =~ m/^\s*$/) {
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
push @elements, {
|
||||
type => TYPE_BREAK
|
||||
};
|
||||
next;
|
||||
}
|
||||
# check for blockquotes
|
||||
if ($lines[$i] =~ m/^\>(.*)$/) {
|
||||
if (!%block) {
|
||||
if ($1 =~ m/^(\..*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1));
|
||||
} else {
|
||||
%block = (type => TYPE_BLOCKQUOTE, text => $1);
|
||||
}
|
||||
} else {
|
||||
if ($block{'type'} != TYPE_BLOCKQUOTE) {
|
||||
push @elements, {%block};
|
||||
if ($1 =~ m/^(\..*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_BLOCKQUOTE, text => $2, opts => parseOptions($1));
|
||||
} else {
|
||||
%block = (type => TYPE_BLOCKQUOTE, text => $1);
|
||||
}
|
||||
} else {
|
||||
$block{'text'} .= "\n".$1;
|
||||
}
|
||||
}
|
||||
next;
|
||||
}
|
||||
# finally, create a paragraph or append to it.
|
||||
if (!%block) {
|
||||
if ($lines[$i] =~ m/^(\..*\)(\s|))(.*)/) {
|
||||
%block = (type => TYPE_PARAGRAPH, text => $2, opts => parseOptions($1));
|
||||
} else {
|
||||
%block = (type => TYPE_PARAGRAPH, text => $lines[$i]);
|
||||
}
|
||||
} else {
|
||||
$block{'text'} .= "\n".$lines[$i];
|
||||
}
|
||||
}
|
||||
# push final block if it exists
|
||||
if (%block) { push @elements, {%block}; %block=() }
|
||||
# merge file settings with command-line settings
|
||||
@settings{ keys %cmd_settings } = values %cmd_settings;
|
||||
# get our various header sizes and organize an array with unique values from smallest to largest.
|
||||
my @headers;
|
||||
foreach (@elements) {
|
||||
my $element = $_;
|
||||
if ($element->{type} == TYPE_HEADER) {
|
||||
if (@headers) {
|
||||
my $h_i = 0;
|
||||
my $last_size = $headers[0];
|
||||
for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
|
||||
if ($element->{size} == $headers[$h_i]) {
|
||||
$h_i = scalar @headers;
|
||||
} elsif ($element->{size} < $headers[$h_i]) {
|
||||
splice @headers, $h_i, 0, $element->{size};
|
||||
$h_i = scalar @headers;
|
||||
} elsif ($element->{size} > $headers[$h_i]) {
|
||||
if ($h_i+1 >= scalar @headers) {
|
||||
splice @headers, $h_i+1, 0, $element->{size};
|
||||
$h_i = scalar @headers;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$headers[0] = $element->{size};
|
||||
}
|
||||
}
|
||||
}
|
||||
# this is really dumb, but convert our array to a hash
|
||||
my %header_map;
|
||||
for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
|
||||
if ($settings{'header.reverse'}) {
|
||||
$header_map{$headers[$h_i]} = $h_i+1+($settings{'header.depth'} ? $settings{'header.depth'} : 0);
|
||||
} else {
|
||||
$header_map{$headers[$h_i]} = scalar ($settings{'header.depth'} ? $settings{'header.depth'} : 0) + @headers - $h_i;
|
||||
}
|
||||
}
|
||||
|
||||
# set up our array for header numbering. The first element
|
||||
my @header_numbers;
|
||||
for (my $h_i = 0; $h_i < scalar @headers; $h_i++) {
|
||||
$header_numbers[$h_i] = 0;
|
||||
}
|
||||
my $header_number = 0; # at which numbering depth are we
|
||||
# prepend to headers
|
||||
foreach (@elements) {
|
||||
if ($_->{type} == TYPE_HEADER) {
|
||||
my $hsize = $header_map{$_->{size}};
|
||||
$_->{hsize} = $hsize;
|
||||
$header_number = $hsize;
|
||||
for (my $h_i = $header_number+1; $h_i < scalar @headers; $h_i++) {
|
||||
$header_numbers[$h_i] = 0;
|
||||
}
|
||||
$header_numbers[$header_number]++;
|
||||
# prepend an ordered number to headers based on depth
|
||||
if ($settings{"header.numbering"}) {
|
||||
$_->{text} = $header_numbers[$header_number].'. '.$_->{text};
|
||||
}
|
||||
}
|
||||
}
|
||||
my %ret = (
|
||||
"elements" => \@elements,
|
||||
"settings" => \%settings,
|
||||
"filename" => $file_name
|
||||
);
|
||||
return %ret;
|
||||
}
|
||||
sub parseOptions {
|
||||
my $text = $_[0];
|
||||
my $opts;
|
||||
while ($text =~ m/(\.)([a-zA-Z]*[^\(])(\()([^\)]*)/g) {
|
||||
$opts.=" $2=\"$4\"";
|
||||
}
|
||||
return $opts;
|
||||
}
|
||||
|
||||
package kettext::convertTo;
|
||||
sub HTML {
|
||||
my %data = @_;
|
||||
my @elements = @{$data{elements}};
|
||||
my %settings = %{$data{settings}};
|
||||
my $file_name = $data{filename};
|
||||
# print 'em out
|
||||
if ($settings{'toc'}) {
|
||||
foreach (@elements) {
|
||||
if ($_->{type} == kettext::TYPE_HEADER) {
|
||||
|
||||
}
|
||||
}
|
||||
} else {
|
||||
my $previous_type = 0;
|
||||
my $indent = ("\t" x ($settings{"indent_level"} ? $settings{"indent_level"} : 0));
|
||||
foreach (@elements) {
|
||||
if ($_->{type} == kettext::TYPE_HEADER) {
|
||||
my $hid = $_->{text};
|
||||
$hid =~ s/ /_/g;
|
||||
print("<h$_->{hsize}".($_->{opts} ? $_->{opts} : '').($settings{'header.ids'} ? " id=\"".$hid."\"" : '').">".convertText($_->{text})."</h$_->{hsize}>\n");
|
||||
} elsif ($_->{type} == kettext::TYPE_BREAK) {
|
||||
if ($previous_type == kettext::TYPE_BREAK) {
|
||||
print("<br />\n");
|
||||
}
|
||||
} elsif ($_->{type} == kettext::TYPE_PARAGRAPH) {
|
||||
my $text = convertText($_->{text});
|
||||
$text =~ s/\n/<br>\n/g;
|
||||
print("<p".($_->{opts} ? $_->{opts} : '').">$text</p>\n");
|
||||
} elsif ($_->{type} == kettext::TYPE_PRE) {
|
||||
print("<pre".($_->{opts} ? $_->{opts} : '').">$_->{text}</pre>\n");
|
||||
} elsif ($_->{type} == kettext::TYPE_CODE) {
|
||||
print("<pre><code".($_->{opts} ? $_->{opts} : '').">$_->{text}</code></pre>\n");
|
||||
} elsif ($_->{type} == kettext::TYPE_BLOCKQUOTE) {
|
||||
print("<blockquote".($_->{opts} ? $_->{opts} : '').">".($settings{'blockquote.parse'} ? convertText($_->{text}) : $_->{text})."</blockquote>\n");
|
||||
} elsif ($_->{type} == kettext::TYPE_SPACER) {
|
||||
print("<hr />\n");
|
||||
} elsif ($_->{type} == kettext::TYPE_LIST) {
|
||||
printf("<p></p>"); # @@ should this be optional?
|
||||
printList($_);
|
||||
}
|
||||
$previous_type = $_->{type};
|
||||
}
|
||||
}
|
||||
if (!$settings{"version.none"}) {
|
||||
if ($settings{"version.hide"}) {
|
||||
print "<!-- $file_name generated by kettext ".kettext::VERSION." -->\n";
|
||||
} else {
|
||||
print "<small>$file_name generated by kettext ".kettext::VERSION."</small>\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub printList {
|
||||
my $list = $_[0];
|
||||
my $child_count = @{$list->{'children'}};
|
||||
#my $child = $children->[$child_i];
|
||||
if ($list->{'text'}) {
|
||||
print "<li>".convertText($list->{'text'});
|
||||
}
|
||||
if ($child_count > 0) {
|
||||
print "<ul>\n";
|
||||
my $children = \@{$list->{'children'}};
|
||||
my $child_i = 0;
|
||||
for (my $child = $children->[$child_i]; $child_i < $child_count; $child = $children->[++$child_i]) {
|
||||
printList($child);
|
||||
}
|
||||
print "</ul>\n";
|
||||
}
|
||||
if ($list->{'text'}) {
|
||||
print "</li>\n";
|
||||
}
|
||||
}
|
||||
sub convertText {
|
||||
my $text = $_[0];
|
||||
my @text_patterns = ('\*\*','\*\*', '\/\/','\/\/', '\[\[','\]\]', '\(\(','\)\)', '``', '``');
|
||||
my @text_replace = ('<b>','</b>', '<i>','</i>', '<a>','</a>', '<small>','</small>', '<code>','</code>');
|
||||
my $i = 0;
|
||||
my $new_text = $text;
|
||||
for (my $p = 0; $p <= $#text_patterns; $p+=2) {
|
||||
my $open = $text_patterns[$p];
|
||||
my $close = $text_patterns[$p+1];
|
||||
while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*?\)|)/g) {
|
||||
#while ($new_text =~ m/($open)([^$close]*)($close)(\.\S*\(.*\)|)/g) {
|
||||
my $converted_text;
|
||||
if ($4) {
|
||||
my $ts = substr($text_replace[$i], 0, -1);
|
||||
my $te = substr($text_replace[$i], -1);
|
||||
$converted_text = $ts.kettext::parseOptions($4).$te.$2.$text_replace[$i+1];
|
||||
} else {
|
||||
$converted_text = $text_replace[$i].$2.$text_replace[$i+1];
|
||||
}
|
||||
$new_text =~ s/\Q$1$2$3$4\E/$converted_text/g;
|
||||
}
|
||||
$i +=2;
|
||||
}
|
||||
return $new_text;
|
||||
}
|
||||
}
|
||||
1;
|
Loading…
Reference in New Issue