Index: MANIFEST =================================================================== --- MANIFEST (revision 35086) +++ MANIFEST (working copy) @@ -2796,14 +2796,18 @@ lib/Pod/Simple/DumpAsText.pm [devel] lib/Pod/Simple/DumpAsXML.pm [devel] lib/Pod/Simple/HTML.pm [devel] +lib/Pod/Simple/HTMLBatch.pm [devel] +lib/Pod/Simple/HTMLLegacy.pm [devel] lib/Pod/Simple/LinkSection.pm [devel] lib/Pod/Simple/Methody.pm [devel] +lib/Pod/Simple/Progress.pm [devel] lib/Pod/Simple/PullParser.pm [devel] lib/Pod/Simple/PullParserEndToken.pm [devel] lib/Pod/Simple/PullParserStartToken.pm [devel] lib/Pod/Simple/PullParserTextToken.pm [devel] lib/Pod/Simple/PullParserToken.pm [devel] lib/Pod/Simple/RTF.pm [devel] +lib/Pod/Simple/Search.pm [devel] lib/Pod/Simple/SimpleTree.pm [devel] lib/Pod/Simple/Text.pm [devel] lib/Pod/Simple/TextContent.pm [devel] @@ -2811,6 +2815,7 @@ lib/Pod/Simple/Transcode.pm [devel] lib/Pod/Simple/TranscodeDumb.pm [devel] lib/Pod/Simple/TranscodeSmart.pm [devel] +lib/Pod/Simple/XHTML.pm [devel] lib/Pod/Simple/XMLOutStream.pm [devel] parrot.spec [] parrotbug [] Index: lib/Pod/Simple.pm =================================================================== --- lib/Pod/Simple.pm (revision 35086) +++ lib/Pod/Simple.pm (working copy) @@ -18,7 +18,7 @@ ); @ISA = ('Pod::Simple::BlackBox'); -$VERSION = '2.05'; +$VERSION = '3.07'; @Known_formatting_codes = qw(I B C L E F S X Z); %Known_formatting_codes = map(($_=>1), @Known_formatting_codes); @@ -80,13 +80,19 @@ 'bare_output', # For some subclasses: whether to prepend # header-code and postpend footer-code + 'fullstop_space_harden', # Whether to turn ". " into ".[nbsp] "; + 'nix_X_codes', # whether to ignore X<...> codes 'merge_text', # whether to avoid breaking a single piece of # text up into several events + 'preserve_whitespace', # whether to try to keep whitespace as-is + 'content_seen', # whether we've seen any real Pod content 'errors_seen', # TODO: document. whether we've seen any errors (fatal or not) + 'codes_in_verbatim', # for PseudoPod extensions + 'code_handler', # coderef to call when a code (non-pod) line is seen 'cut_handler', # coderef to call when a =cut line is seen #Called like: @@ -139,9 +145,17 @@ $$x = '' unless defined $$x; DEBUG > 4 and print "# Output string set to $x ($$x)\n"; $this->{'output_fh'} = Pod::Simple::TiedOutFH->handle_on($_[0]); - return $this->{'output_string'} = ${ $this->{'output_fh'} }; + return + $this->{'output_string'} = $_[0]; + #${ ${ $this->{'output_fh'} } }; } +sub abandon_output_string { $_[0]->abandon_output_fh; delete $_[0]{'output_string'} } +sub abandon_output_fh { $_[0]->output_fh(undef) } +# These don't delete the string or close the FH -- they just delete our +# references to it/them. +# TODO: document these + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub new { @@ -969,6 +983,7 @@ # L or L # L or L or L # L + # Ltext|scheme:...> my($self,@stack) = @_; @@ -988,11 +1003,12 @@ # By here, $treelet->[$i] is definitely an L node - DEBUG > 1 and print "Ogling L node $treelet->[$i]\n"; + my $ell = $treelet->[$i]; + DEBUG > 1 and print "Ogling L node $ell\n"; # bitch if it's empty - if( @{$treelet->[$i]} == 2 - or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '') + if( @{$ell} == 2 + or (@{$ell} == 3 and $ell->[2] eq '') ) { $self->whine( $start_line, "An empty L<>" ); $treelet->[$i] = 'L<>'; # just make it a text node @@ -1000,53 +1016,70 @@ } # Catch URLs: - # URLs can, alas, contain E<...> sequences, so we can't /assume/ - # that this is one text node. But it has to START with one text - # node... - if(! ref $treelet->[$i][2] and - $treelet->[$i][2] =~ m/^\w+:[^:\s]\S*$/s + + # there are a number of possible cases: + # 1) text node containing url: http://foo.com + # -> [ 'http://foo.com' ] + # 2) text node containing url and text: foo|http://foo.com + # -> [ 'foo|http://foo.com' ] + # 3) text node containing url start: mailto:xEfoo.com + # -> [ 'mailto:x', [ E ... ], 'foo.com' ] + # 4) text node containing url start and text: foo|mailto:xEfoo.com + # -> [ 'foo|mailto:x', [ E ... ], 'foo.com' ] + # 5) other nodes containing text and url start: OE<39>Malley|http://foo.com + # -> [ 'O', [ E ... ], 'Malley', '|http://foo.com' ] + # ... etc. + + # anything before the url is part of the text. + # anything after it is part of the url. + # the url text node itself may contain parts of both. + + if (my ($url_index, $text_part, $url_part) = + # grep is no good here; we want to bail out immediately so that we can + # use $1, $2, etc. without having to do the match twice. + sub { + for (2..$#$ell) { + next if ref $ell->[$_]; + next unless $ell->[$_] =~ m/^(?:([^|]*)\|)?(\w+:[^:\s]\S*)$/s; + return ($_, $1, $2); + } + return; + }->() ) { - $treelet->[$i][1]{'type'} = 'url'; - $treelet->[$i][1]{'content-implicit'} = 'yes'; - - if( 3 == @{ $treelet->[$i] } ) { - # But if it IS just one text node (most common case) - DEBUG > 1 and printf qq{Catching "%s as " as ho-hum L link.\n}, - $treelet->[$i][2] - ; - $treelet->[$i][1]{'to'} = Pod::Simple::LinkSection->new( - $treelet->[$i][2] - ); # its own treelet - } else { - # It's a URL but complex (like "Lbar>"). Feh. - #$treelet->[$i][1]{'to'} = [ @{$treelet->[$i]} ]; - #splice @{ $treelet->[$i][1]{'to'} }, 0,2; - #DEBUG > 1 and printf qq{Catching "%s as " as complex L link.\n}, - # join '~', @{$treelet->[$i][1]{'to' }}; - - $treelet->[$i][1]{'to'} = Pod::Simple::LinkSection->new( - $treelet->[$i] # yes, clone the whole content as a treelet - ); - $treelet->[$i][1]{'to'}[0] = ''; # set the copy's tagname to nil - die "SANITY FAILURE" if $treelet->[0] eq ''; # should never happen! - DEBUG > 1 and print - qq{Catching "$treelet->[$i][1]{'to'}" as a complex L link.\n}; + $ell->[1]{'type'} = 'url'; + + my @text = @{$ell}[2..$url_index-1]; + push @text, $text_part if defined $text_part; + + my @url = @{$ell}[$url_index+1..$#$ell]; + unshift @url, $url_part; + + unless (@text) { + $ell->[1]{'content-implicit'} = 'yes'; + @text = @url; } - next; # and move on + $ell->[1]{to} = Pod::Simple::LinkSection->new( + @url == 1 + ? $url[0] + : [ '', {}, @url ], + ); + + splice @$ell, 2, $#$ell, @text; + + next; } - # Catch some very simple and/or common cases - if(@{$treelet->[$i]} == 3 and ! ref $treelet->[$i][2]) { - my $it = $treelet->[$i][2]; + if(@{$ell} == 3 and ! ref $ell->[2]) { + my $it = $ell->[2]; if($it =~ m/^[-a-zA-Z0-9]+\([-a-zA-Z0-9]+\)$/s) { # man sections # Hopefully neither too broad nor too restrictive a RE DEBUG > 1 and print "Catching \"$it\" as manpage link.\n"; - $treelet->[$i][1]{'type'} = 'man'; + $ell->[1]{'type'} = 'man'; # This's the only place where man links can get made. - $treelet->[$i][1]{'content-implicit'} = 'yes'; - $treelet->[$i][1]{'to' } = + $ell->[1]{'content-implicit'} = 'yes'; + $ell->[1]{'to' } = Pod::Simple::LinkSection->new( $it ); # treelet! next; @@ -1055,9 +1088,9 @@ # Extremely forgiving idea of what constitutes a bare # modulename link like L or even L DEBUG > 1 and print "Catching \"$it\" as ho-hum L link.\n"; - $treelet->[$i][1]{'type'} = 'pod'; - $treelet->[$i][1]{'content-implicit'} = 'yes'; - $treelet->[$i][1]{'to' } = + $ell->[1]{'type'} = 'pod'; + $ell->[1]{'content-implicit'} = 'yes'; + $ell->[1]{'to' } = Pod::Simple::LinkSection->new( $it ); # treelet! next; } @@ -1073,7 +1106,6 @@ my $link_text; # set to an arrayref if found - my $ell = $treelet->[$i]; my @ell_content = @$ell; splice @ell_content,0,2; # Knock off the 'L' and {} bits @@ -1357,7 +1389,7 @@ $i += @$to_pull_up - 1; # Make $i skip the pulled-up stuff } } else { - $treelet->[$i] =~ tr/ /\xA0/ if ASCII and $in_s; + $treelet->[$i] =~ s/\s/\xA0/g if ASCII and $in_s; # (If not in ASCIIland, we can't assume that \xA0 == nbsp.) # Note that if you apply nbsp_for_S to text, and so turn @@ -1427,7 +1459,7 @@ "\nAbout to parse source: {{\n$_[0]\n}}\n\n"; - my $parser = $class->new; + my $parser = ref $class && $class->isa(__PACKAGE__) ? $class : $class->new; $parser->hide_line_numbers(1); my $out = ''; Index: lib/Pod/Simple/XMLOutStream.pm =================================================================== --- lib/Pod/Simple/XMLOutStream.pm (revision 35086) +++ lib/Pod/Simple/XMLOutStream.pm (working copy) @@ -113,14 +113,34 @@ L is rather like this class; see its documentation for a discussion of the differences. -L, L +L, L, L -The older libraries L, L, L +L +The older (and possibly obsolete) libraries L, L + +=head1 ABOUT EXTENDING POD + +TODO: An example or two of =extend, then point to Pod::Simple::Subclassing + + +=head1 ASK ME! + +If you actually want to use Pod as a format that you want to render to +XML (particularly if to an XML instance with more elements than normal +Pod has), please email me (C) and I'll probably have +some recommendations. + +For reasons of concision and energetic laziness, some methods and +options in this module (and the dozen modules it depends on) are +undocumented; but one of those undocumented bits might be just what +you're looking for. + + =head1 COPYRIGHT AND DISCLAIMERS -Copyright (c) 2002 Sean M. Burke. All rights reserved. +Copyright (c) 2002-4 Sean M. Burke. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Index: lib/Pod/Simple/HTML.pm =================================================================== --- lib/Pod/Simple/HTML.pm (revision 35086) +++ lib/Pod/Simple/HTML.pm (working copy) @@ -3,19 +3,79 @@ package Pod::Simple::HTML; use strict; use Pod::Simple::PullParser (); -use vars qw(@ISA %Tagmap $Computerese $Lame $Linearization_Limit $VERSION); +use vars qw( + @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION + $Perldoc_URL_Prefix $Perldoc_URL_Postfix + $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex + $Doctype_decl $Content_decl +); @ISA = ('Pod::Simple::PullParser'); -$VERSION = '2.02'; +$VERSION = '3.03'; use UNIVERSAL (); -sub DEBUG () {0} +BEGIN { + if(defined &DEBUG) { } # no-op + elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG } + else { *DEBUG = sub () {0}; } +} -$Computerese = " lang='und' xml:lang='und'" unless defined $Computerese; -$Lame = ' class="pad"' unless defined $Lame; +$Doctype_decl ||= ''; # No. Just No. Don't even ask me for it. + # qq{\n}; -$Linearization_Limit = 90 unless defined $Linearization_Limit; +$Content_decl ||= + q{}; + +$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION; +$Computerese = "" unless defined $Computerese; +$LamePad = '' unless defined $LamePad; + +$Linearization_Limit = 120 unless defined $Linearization_Limit; # headings/items longer than that won't get an +$Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?' + unless defined $Perldoc_URL_Prefix; +$Perldoc_URL_Postfix = '' + unless defined $Perldoc_URL_Postfix; +$Title_Prefix = '' unless defined $Title_Prefix; +$Title_Postfix = '' unless defined $Title_Postfix; +%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text + # 'item-text' stuff in the index doesn't quite work, and may + # not be a good idea anyhow. + + +__PACKAGE__->_accessorize( + 'perldoc_url_prefix', + # In turning L into http://whatever/Foo%3a%3aBar, what + # to put before the "Foo%3a%3aBar". + # (for singleton mode only?) + 'perldoc_url_postfix', + # what to put after "Foo%3a%3aBar" in the URL. Normally "". + + 'batch_mode', # whether we're in batch mode + 'batch_mode_current_level', + # When in batch mode, how deep the current module is: 1 for "LWP", + # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc + + 'title_prefix', 'title_postfix', + # What to put before and after the title in the head. + # Should already be &-escaped + + 'html_header_before_title', + 'html_header_after_title', + 'html_footer', + + 'index', # whether to add an index at the top of each page + # (actually it's a table-of-contents, but we'll call it an index, + # out of apparently longstanding habit) + + 'html_css', # URL of CSS file to point to + 'html_javascript', # URL of CSS file to point to + + 'force_title', # should already be &-escaped + 'default_title', # should already be &-escaped +); + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ my @_to_accept; @@ -77,12 +137,13 @@ ] # no point in providing a way to get ..., I think ), - '/item-bullet' => "

\n", - '/item-number' => "

\n", - '/item-text' => "

\n", - 'Para_item' => "\n
", - '/Para_item' => "

\n", + '/item-bullet' => "$LamePad\n", + '/item-number' => "$LamePad\n", + '/item-text' => "$LamePad\n", + 'item-body' => "\n
", + '/item-body' => "
\n", + 'B' => "", '/B' => "", 'I' => "", '/I' => "", 'F' => "", '/F' => "", @@ -103,6 +164,10 @@ } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 } + # Just so we can run from the command line. No options. + # For that, use perldoc! +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub new { my $new = shift->SUPER::new(@_); @@ -112,11 +177,37 @@ $new->accept_codes('VerbatimFormatted'); $new->accept_codes(@_to_accept); DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n"; - + + $new->perldoc_url_prefix( $Perldoc_URL_Prefix ); + $new->perldoc_url_postfix( $Perldoc_URL_Postfix ); + $new->title_prefix( $Title_Prefix ); + $new->title_postfix( $Title_Postfix ); + + $new->html_header_before_title( + qq[$Doctype_decl] + ); + $new->html_header_after_title( join "\n" => + "", + $Content_decl, + "\n", + $new->version_tag_comment, + "\n", + ); + $new->html_footer( qq[\n\n\n\n] ); + $new->{'Tagmap'} = {%Tagmap}; return $new; } +sub batch_mode_page_object_init { + my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_; + DEBUG and print "Initting $self\n for $module\n", + " in $infile\n out $outfile\n depth $depth\n"; + $self->batch_mode(1); + $self->batch_mode_current_level($depth); + return $self; +} + sub run { my $self = $_[0]; return $self->do_middle if $self->bare_output; @@ -126,121 +217,266 @@ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -sub do_pod_link { - my($self, $link) = @_; - my $to = $link->attr('to'); - my $section = $link->attr('section'); - return undef unless( # should never happen - (defined $to and length $to) or - (defined $section and length $section) - ); +sub do_beginning { + my $self = $_[0]; - if(defined $to and length $to) { - $to = $self->resolve_pod_page_link($to, $section); - return undef unless defined $to and length $to; - # resolve_pod_page_link returning undef is how it - # can signal that it gives up on making a link - # (I pass it the section value, but I don't see a - # particular reason it'd use it.) + my $title; + + if(defined $self->force_title) { + $title = $self->force_title; + DEBUG and print "Forcing title to be $title\n"; + } else { + # Actually try looking for the title in the document: + $title = $self->get_short_title(); + unless($self->content_seen) { + DEBUG and print "No content seen in search for title.\n"; + return; + } + $self->{'Title'} = $title; + + if(defined $title and $title =~ m/\S/) { + $title = $self->title_prefix . esc($title) . $self->title_postfix; + } else { + $title = $self->default_title; + $title = '' unless defined $title; + DEBUG and print "Title defaults to $title\n"; + } } + - if(defined $section and length($section .= '')) { - $section =~ tr/ /_/; - $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); - $section = $self->unicode_escape_url($section); - # Turn char 1234 into "(1234)" - $section = '_' unless length $section; + my $after = $self->html_header_after_title || ''; + if($self->html_css) { + my $link = + $self->html_css =~ m/html_css # It's a big blob of markup, let's drop it in + : sprintf( # It's just a URL, so let's wrap it up + qq[\n], + $self->html_css, + ); + $after =~ s{()}{$link\n$1}i; # otherwise nevermind } - - + $self->_add_top_anchor(\$after); - foreach my $it ($to, $section) { - if( defined $it ) { - $it =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg; - $it =~ s/([^\._abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg; - # Yes, stipulate the list without a range, so that this can work right on - # all charsets that this module happens to run under. - # Altho, hmm, what about that ord? Presumably that won't work right - # under non-ASCII charsets. Something should be done about that. - } + if($self->html_javascript) { + my $link = + $self->html_javascript =~ m/html_javascript # It's a big blob of markup, let's drop it in + : sprintf( # It's just a URL, so let's wrap it up + qq[\n], + $self->html_javascript, + ); + $after =~ s{()}{$link\n$1}i; # otherwise nevermind } - - my $out = $to if defined $to and length $to; - $out .= "#" . $section if defined $section and length $section; - return undef unless length $out; - return $out; + + print {$self->{'output_fh'}} + $self->html_header_before_title || '', + $title, # already escaped + $after, + ; + + DEBUG and print "Returning from do_beginning...\n"; + return 1; } +sub _add_top_anchor { + my($self, $text_r) = @_; + unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack + $$text_r .= "\n"; + } + return; +} -sub resolve_pod_page_link { - my($self, $to) = @_; - - return 'TODO'; +sub version_tag_comment { + my $self = shift; + return sprintf + "\n", + esc( + ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(), + $], scalar(gmtime), + ), $self->_modnote(), + ; } -sub do_url_link { return $_[1]->attr('to') } +sub _modnote { + my $class = ref($_[0]) || $_[0]; + return join "\n " => grep m/\S/, split "\n", -sub do_man_link { return undef } - # But subclasses are welcome to override this if they have man - # pages somewhere URL-accessible. +qq{ +If you want to change this HTML document, you probably shouldn't do that +by changing it directly. Instead, see about changing the calling options +to $class, and/or subclassing $class, +then reconverting this document from the Pod source. +When in doubt, email the author of $class for advice. +See 'perldoc $class' for more info. +}; -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +} -sub do_link { - my($self, $token) = @_; - my $type = $token->attr('type'); - if(!defined $type) { - $self->whine("Typeless L!?", $token->attr('start_line')); - } elsif( $type eq 'pod') { return $self->do_pod_link($token); - } elsif( $type eq 'url') { return $self->do_url_link($token); - } elsif( $type eq 'man') { return $self->do_man_link($token); - } else { - $self->whine("L of unknown type $type!?", $token->attr('start_line')); +sub do_end { + my $self = $_[0]; + print {$self->{'output_fh'}} $self->html_footer || ''; + return 1; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +# Normally this would just be a call to _do_middle_main_loop -- but we +# have to do some elaborate things to emit all the content and then +# summarize it and output it /before/ the content that it's a summary of. + +sub do_middle { + my $self = $_[0]; + return $self->_do_middle_main_loop unless $self->index; + + if( $self->output_string ) { + # An efficiency hack + my $out = $self->output_string; #it's a reference to it + my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n"; + $$out .= $sneakytag; + $self->_do_middle_main_loop; + $sneakytag = quotemeta($sneakytag); + my $index = $self->index_as_html(); + if( $$out =~ s/$sneakytag/$index/s ) { + # Expected case + DEBUG and print "Inserted ", length($index), " bytes of index HTML into $out.\n"; + } else { + DEBUG and print "Odd, couldn't find where to insert the index in the output!\n"; + # I don't think this should ever happen. + } + return 1; } - return 'FNORG'; + + unless( $self->output_fh ) { + require Carp; + Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that."); + } + + # If we get here, we're outputting to a FH. So we need to do some magic. + # Namely, divert all content to a string, which we output after the index. + my $fh = $self->output_fh; + my $content = ''; + { + # Our horrible bait and switch: + $self->output_string( \$content ); + $self->_do_middle_main_loop; + $self->abandon_output_string(); + $self->output_fh($fh); + } + print $fh $self->index_as_html(); + print $fh $content; + + return 1; } +########################################################################### -sub do_middle { # the main work +sub index_as_html { my $self = $_[0]; + # This is meant to be called AFTER the input document has been parsed! + + my $points = $self->{'PSHTML_index_points'} || []; + + @$points > 1 or return qq[
\n]; + # There's no point in having a 0-item or 1-item index, I dare say. + + my(@out) = qq{\n
}; + my $level = 0; + + my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent); + foreach my $p (@$points, ['head0', '(end)']) { + ($tagname, $text) = @$p; + $anchorname = $self->section_escape($text); + if( $tagname =~ m{^head(\d+)$} ) { + $target_level = 0 + $1; + } else { # must be some kinda list item + if($previous_tagname =~ m{^head\d+$} ) { + $target_level = $level + 1; + } else { + $target_level = $level; # no change needed + } + } + + # Get to target_level by opening or closing ULs + while($level > $target_level) + { --$level; push @out, (" " x $level) . ""; } + while($level < $target_level) + { ++$level; push @out, (" " x ($level-1)) + . "
    "; } + + $previous_tagname = $tagname; + next unless $level; + + $indent = ' ' x $level; + push @out, sprintf + "%s
  • %s", + $indent, $level, $anchorname, esc($text) + ; + } + push @out, "
\n"; + return join "\n", @out; +} + +########################################################################### + +sub _do_middle_main_loop { + my $self = $_[0]; my $fh = $self->{'output_fh'}; + my $tagmap = $self->{'Tagmap'}; - my($token, $type, $tagname); + my($token, $type, $tagname, $linkto, $linktype); my @stack; my $dont_wrap = 0; + while($token = $self->get_token) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if( ($type = $token->type) eq 'start' ) { if(($tagname = $token->tagname) eq 'L') { - esc($type = $self->do_link($token)); # reuse it, why not - if(defined $type and length $type) { - print $fh ""; + $linktype = $token->attr('type') || 'insane'; + + $linkto = $self->do_link($token); + + if(defined $linkto and length $linkto) { + esc($linkto); + # (Yes, SGML-escaping applies on top of %-escaping! + # But it's rarely noticeable in practice.) + print $fh qq{}; } else { print $fh ""; # Yes, an 'a' element with no attributes! } } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) { - print $fh $self->{'Tagmap'}{$tagname} || next; + print $fh $tagmap->{$tagname} || next; my @to_unget; while(1) { push @to_unget, $self->get_token; last if $to_unget[-1]->is_end and $to_unget[-1]->tagname eq $tagname; + + # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens) } + my $name = $self->linearize_tokens(@to_unget); - if(defined $name) { # ludicrously long, so nevermind - $name =~ tr/ /_/; - print $fh ""; + print $fh "section_name_tidy( $name ) ); + print $fh qq[name="$esc"]; DEBUG and print "Linearized ", scalar(@to_unget), " tokens as \"$name\".\n"; - } else { - print $fh ""; # Yes, an 'a' element with no attributes! + push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name] + if $ToIndex{ $tagname }; + # Obviously, this discards all formatting codes (saving + # just their content), but ahwell. + + } else { # ludicrously long, so nevermind DEBUG and print "Linearized ", scalar(@to_unget), " tokens, but it was too long, so nevermind.\n"; } + print $fh "\n>"; $self->unget_token(@to_unget); } elsif ($tagname eq 'Data') { @@ -255,12 +491,13 @@ next; } else { - if( $tagname =~ m/^over-(.+)$/s ) { - push @stack, $1; - } elsif( $tagname eq 'Para') { - $tagname = 'Para_item' if @stack and $stack[-1] eq 'text'; + if( $tagname =~ m/^over-/s ) { + push @stack, ''; + } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) { + print $fh $stack[-1]; + $stack[-1] = ''; } - print $fh $self->{'Tagmap'}{$tagname} || next; + print $fh $tagmap->{$tagname} || next; ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted" or $tagname eq 'X'; } @@ -268,11 +505,21 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } elsif( $type eq 'end' ) { if( ($tagname = $token->tagname) =~ m/^over-/s ) { - pop @stack; - } elsif( $tagname eq 'Para' ) { - $tagname = 'Para_item' if @stack and $stack[-1] eq 'text'; + if( my $end = pop @stack ) { + print $fh $end; + } + } elsif( $tagname =~ m/^item-/s and @stack) { + $stack[-1] = $tagmap->{"/$tagname"}; + if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) { + $self->unget_token($next); + if( $next->type eq 'start' and $next->tagname !~ m/^item-/s ) { + print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"}; + $stack[-1] = $tagmap->{"/item-body"}; + } + } + next; } - print $fh $self->{'Tagmap'}{"/$tagname"} || next; + print $fh $tagmap->{"/$tagname"} || next; --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X'; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -286,75 +533,214 @@ return 1; } +########################################################################### +# + +sub do_link { + my($self, $token) = @_; + my $type = $token->attr('type'); + if(!defined $type) { + $self->whine("Typeless L!?", $token->attr('start_line')); + } elsif( $type eq 'pod') { return $self->do_pod_link($token); + } elsif( $type eq 'url') { return $self->do_url_link($token); + } elsif( $type eq 'man') { return $self->do_man_link($token); + } else { + $self->whine("L of unknown type $type!?", $token->attr('start_line')); + } + return 'FNORG'; # should never get called +} + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -sub do_beginning { - my $self = $_[0]; +sub do_url_link { return $_[1]->attr('to') } - my $title = $self->get_short_title(); - unless($self->content_seen) { - DEBUG and print "No content seen in search for title.\n"; - return; +sub do_man_link { return undef } + # But subclasses are welcome to override this if they have man + # pages somewhere URL-accessible. + + +sub do_pod_link { + # And now things get really messy... + my($self, $link) = @_; + my $to = $link->attr('to'); + my $section = $link->attr('section'); + return undef unless( # should never happen + (defined $to and length $to) or + (defined $section and length $section) + ); + + $section = $self->section_escape($section) + if defined $section and length($section .= ''); # (stringify) + + DEBUG and printf "Resolving \"%s\" \"%s\"...\n", + $to || "(nil)", $section || "(nil)"; + + { + # An early hack: + my $complete_url = $self->resolve_pod_link_by_table($to, $section); + if( $complete_url ) { + DEBUG > 1 and print "resolve_pod_link_by_table(T,S) gives ", + $complete_url, "\n (Returning that.)\n"; + return $complete_url; + } else { + DEBUG > 4 and print " resolve_pod_link_by_table(T,S)", + " didn't return anything interesting.\n"; + } } - $self->{'Title'} = $title; - esc($title); - print {$self->{'output_fh'}} - "\n$title\n\n\n", - $self->version_tag_comment, - "\n", - ; - # TODO: more configurability there + if(defined $to and length $to) { + # Give this routine first hack again + my $there = $self->resolve_pod_link_by_table($to); + if(defined $there and length $there) { + DEBUG > 1 + and print "resolve_pod_link_by_table(T) gives $there\n"; + } else { + $there = + $self->resolve_pod_page_link($to, $section); + # (I pass it the section value, but I don't see a + # particular reason it'd use it.) + DEBUG > 1 and print "resolve_pod_page_link gives ", $to || "(nil)", "\n"; + unless( defined $there and length $there ) { + DEBUG and print "Can't resolve $to\n"; + return undef; + } + # resolve_pod_page_link returning undef is how it + # can signal that it gives up on making a link + } + $to = $there; + } - DEBUG and print "Returning from do_beginning...\n"; - return 1; + #DEBUG and print "So far [", $to||'nil', "] [", $section||'nil', "]\n"; + + my $out = (defined $to and length $to) ? $to : ''; + $out .= "#" . $section if defined $section and length $section; + + unless(length $out) { # sanity check + DEBUG and printf "Oddly, couldn't resolve \"%s\" \"%s\"...\n", + $to || "(nil)", $section || "(nil)"; + return undef; + } + + DEBUG and print "Resolved to $out\n"; + return $out; } -sub version_tag_comment { - my $self = shift; - return sprintf - "\n", - # None of the following things should need escaping, I dare say! - ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(), - $], scalar(gmtime), - ; + +# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + +sub section_escape { + my($self, $section) = @_; + return $self->section_url_escape( + $self->section_name_tidy($section) + ); } +sub section_name_tidy { + my($self, $section) = @_; + $section =~ tr/ /_/; + $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters + $section = $self->unicode_escape_url($section); + $section = '_' unless length $section; + return $section; +} -sub do_end { - my $self = $_[0]; - print {$self->{'output_fh'}} "\n\n\n"; - # TODO: allow for a footer - return 1; +sub section_url_escape { shift->general_url_escape(@_) } +sub pagepath_url_escape { shift->general_url_escape(@_) } + +sub general_url_escape { + my($self, $string) = @_; + + $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg; + # express Unicode things as urlencode(utf(orig)). + + # A pretty conservative escaping, behoovey even for query components + # of a URL (see RFC 2396) + + $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg; + # Yes, stipulate the list without a range, so that this can work right on + # all charsets that this module happens to run under. + # Altho, hmm, what about that ord? Presumably that won't work right + # under non-ASCII charsets. Something should be done + # about that, I guess? + + return $string; } -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -sub esc { - if(defined wantarray) { - if(wantarray) { - @_ = splice @_; # break aliasing - } else { - my $x = shift; - $x =~ s/([^\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; - return $x; - } +#-------------------------------------------------------------------------- +# +# Oh look, a yawning portal to Hell! Let's play touch football right by it! +# + +sub resolve_pod_page_link { + # resolve_pod_page_link must return a properly escaped URL + my $self = shift; + return $self->batch_mode() + ? $self->resolve_pod_page_link_batch_mode(@_) + : $self->resolve_pod_page_link_singleton_mode(@_) + ; +} + +sub resolve_pod_page_link_singleton_mode { + my($self, $it) = @_; + return undef unless defined $it and length $it; + my $url = $self->pagepath_url_escape($it); + + $url =~ s{::$}{}s; # probably never comes up anyway + $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM? + + return undef unless length $url; + return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix; +} + +sub resolve_pod_page_link_batch_mode { + my($self, $to) = @_; + DEBUG > 1 and print " During batch mode, resolving $to ...\n"; + my @path = grep length($_), split m/::/s, $to, -1; + unless( @path ) { # sanity + DEBUG and print "Very odd! Splitting $to gives (nil)!\n"; + return undef; } - foreach my $x (@_) { - # Escape things very cautiously: - $x =~ s/([^\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; - # Leave out "- so that "--" won't make it thru in X-generated comments - # with text in them. + $self->batch_mode_rectify_path(\@path); + my $out = join('/', map $self->pagepath_url_escape($_), @path) + . $HTML_EXTENSION; + DEBUG > 1 and print " => $out\n"; + return $out; +} - # Yes, stipulate the list without a range, so that this can work right on - # all charsets that this module happens to run under. - # Altho, hmm, what about that ord? Presumably that won't work right - # under non-ASCII charsets. Something should be done about that. +sub batch_mode_rectify_path { + my($self, $pathbits) = @_; + my $level = $self->batch_mode_current_level; + $level--; # how many levels up to go to get to the root + if($level < 1) { + unshift @$pathbits, '.'; # just to be pretty + } else { + unshift @$pathbits, ('..') x $level; } - return @_; + return; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +sub resolve_pod_link_by_table { + # A crazy hack to allow specifying custom L => URL mappings + + return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut + + my($self, $to, $section) = @_; + + # TODO: add a method that actually populates podhtml_LOT from a file? + + if(defined $section) { + $to = '' unless defined $to and length $to; + return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef! + } else { + return $self->{'podhtml_LOT'}{$to}; # quite possibly undef! + } + return; +} + +########################################################################### + sub linearize_tokens { # self, tokens my $self = shift; my $out = ''; @@ -362,11 +748,11 @@ my $t; while($t = shift @_) { if(!ref $t or !UNIVERSAL::can($t, 'is_text')) { - $out .= $t; + $out .= $t; # a string, or some insane thing } elsif($t->is_text) { $out .= $t->text; } elsif($t->is_start and $t->tag eq 'X') { - # ignore until the end of this X<...> sequence + # Ignore until the end of this X<...> sequence: my $x_open = 1; while($x_open) { next if( ($t = shift @_)->is_text ); @@ -375,13 +761,7 @@ } } } - - $out =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); return undef if length $out > $Linearization_Limit; - - $out = $self->unicode_escape_url($out); - $out = '_' unless length $out; - return $out; } @@ -395,38 +775,104 @@ } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +sub esc { # a function. + if(defined wantarray) { + if(wantarray) { + @_ = splice @_; # break aliasing + } else { + my $x = shift; + $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; + return $x; + } + } + foreach my $x (@_) { + # Escape things very cautiously: + $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg + if defined $x; + # Leave out "- so that "--" won't make it thru in X-generated comments + # with text in them. + # Yes, stipulate the list without a range, so that this can work right on + # all charsets that this module happens to run under. + # Altho, hmm, what about that ord? Presumably that won't work right + # under non-ASCII charsets. Something should be done about that. + } + return @_; +} + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + 1; __END__ =head1 NAME -TODO - TODO +Pod::Simple::HTML - convert Pod to HTML =head1 SYNOPSIS - TODO + perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod - perl -MPod::Simple::HTML -e \ - "exit Pod::Simple::HTML->filter(shift)->errors_seen" \ - thingy.pod - =head1 DESCRIPTION -This class is for TODO. +This class is for making an HTML rendering of a Pod document. + This is a subclass of L and inherits all its -methods. +methods (and options). +Note that if you want to do a batch conversion of a lot of Pod +documents to HTML, you should see the module L. + + + +=head1 CALLING FROM THE COMMAND LINE + TODO + perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html + + + +=head1 CALLING FROM PERL + +TODO make a new object, set any options, and use parse_from_file + + +=head1 METHODS + +TODO +all (most?) accessorized methods + + +=head1 SUBCLASSING + +TODO + + can just set any of: html_css html_javascript title_prefix + 'html_header_before_title', + 'html_header_after_title', + 'html_footer', + +maybe override do_pod_link + +maybe override do_beginning do_end + + + =head1 SEE ALSO -L +L, L + +TODO: a corpus of sample Pod input and HTML output? Or common +idioms? + + + =head1 COPYRIGHT AND DISCLAIMERS -Copyright (c) 2002 Sean M. Burke. All rights reserved. +Copyright (c) 2002-2004 Sean M. Burke. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Index: lib/Pod/Simple/BlackBox.pm =================================================================== --- lib/Pod/Simple/BlackBox.pm (revision 35086) +++ lib/Pod/Simple/BlackBox.pm (working copy) @@ -525,237 +525,17 @@ DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (", $self->_dump_curr_open(), ")\n"; - if($para_type eq '=for') { #////////////////////////////////////////////// - # Fake it out as a begin/end - my $target; + if($para_type eq '=for') { + next if $self->_ponder_for($para,$curr_open,$paras); - if(grep $_->[1]{'~ignore'}, @$curr_open) { - DEBUG > 1 and print "Ignoring ignorable =for\n"; - next; - } + } elsif($para_type eq '=begin') { + next if $self->_ponder_begin($para,$curr_open,$paras); - for(my $i = 2; $i < @$para; ++$i) { - if($para->[$i] =~ s/^\s*(\S+)\s*//s) { - $target = $1; - last; - } - } - unless(defined $target) { - $self->whine( - $para->[1]{'start_line'}, - "=for without a target?" - ); - next; - } - DEBUG > 1 and - print "Faking out a =for $target as a =begin $target / =end $target\n"; - - $para->[0] = 'Data'; - - unshift @$paras, - ['=begin', - {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, - $target, - ], - $para, - ['=end', - {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, - $target, - ], - ; - - next; - - } elsif($para_type eq '=begin') { #/////////////////////////////////////// + } elsif($para_type eq '=end') { + next if $self->_ponder_end($para,$curr_open,$paras); - my $content = join ' ', splice @$para, 2; - $content =~ s/^\s+//s; - $content =~ s/\s+$//s; - unless(length($content)) { - $self->whine( - $para->[1]{'start_line'}, - "=begin without a target?" - ); - DEBUG and print "Ignoring targetless =begin\n"; - next; - } - - unless($content =~ m/^\S+$/s) { # i.e., unless it's one word - $self->whine( - $para->[1]{'start_line'}, - "'=begin' only takes one parameter, not several as in '=begin $content'" - ); - DEBUG and print "Ignoring unintelligible =begin $content\n"; - next; - } - - - $para->[1]{'target'} = $content; # without any ':' - - $content =~ s/^:!/!:/s; - my $neg; # whether this is a negation-match - $neg = 1 if $content =~ s/^!//s; - my $to_resolve; # whether to process formatting codes - $to_resolve = 1 if $content =~ s/^://s; - - my $dont_ignore; # whether this target matches us - - foreach my $target_name ( - split(',', $content, -1), - $neg ? () : '*' - ) { - DEBUG > 2 and - print " Considering whether =begin $content matches $target_name\n"; - next unless $self->{'accept_targets'}{$target_name}; - - DEBUG > 2 and - print " It DOES match the acceptable target $target_name!\n"; - $to_resolve = 1 - if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; - $dont_ignore = 1; - $para->[1]{'target_matching'} = $target_name; - last; # stop looking at other target names - } - - if($neg) { - if( $dont_ignore ) { - $dont_ignore = ''; - delete $para->[1]{'target_matching'}; - DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n"; - } else { - $dont_ignore = 1; - $para->[1]{'target_matching'} = '!'; - DEBUG > 2 and print " But the leading ! means that this IS a match!\n"; - } - } - - $para->[0] = '=for'; # Just what we happen to call these, internally - $para->[1]{'~really'} ||= '=begin'; - $para->[1]{'~ignore'} = (! $dont_ignore) || 0; - $para->[1]{'~resolve'} = $to_resolve || 0; - - DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '', - "ignore contents of this region\n"; - DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ", - ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; - DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n"; - - push @$curr_open, $para; - if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { - DEBUG > 1 and print "Ignoring ignorable =begin\n"; - } else { - $self->{'content_seen'} ||= 1; - $self->_handle_element_start(($scratch='for'), $para->[1]); - } - - next; - - } elsif($para_type eq '=end') { #///////////////////////////////////////// - - my $content = join ' ', splice @$para, 2; - $content =~ s/^\s+//s; - $content =~ s/\s+$//s; - DEBUG and print "Ogling '=end $content' directive\n"; - - unless(length($content)) { - $self->whine( - $para->[1]{'start_line'}, - "'=end' without a target?" . ( - ( @$curr_open and $curr_open->[-1][0] eq '=for' ) - ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) - : '' - ) - ); - DEBUG and print "Ignoring targetless =end\n"; - next; - } - - unless($content =~ m/^\S+$/) { # i.e., unless it's one word - $self->whine( - $para->[1]{'start_line'}, - "'=end $content' is invalid. (Stack: " - . $self->_dump_curr_open() . ')' - ); - DEBUG and print "Ignoring mistargetted =end $content\n"; - next; - } - - unless(@$curr_open and $curr_open->[-1][0] eq '=for') { - $self->whine( - $para->[1]{'start_line'}, - "=end $content without matching =begin. (Stack: " - . $self->_dump_curr_open() . ')' - ); - DEBUG and print "Ignoring mistargetted =end $content\n"; - next; - } - - unless($content eq $curr_open->[-1][1]{'target'}) { - $self->whine( - $para->[1]{'start_line'}, - "=end $content doesn't match =begin " - . $curr_open->[-1][1]{'target'} - . ". (Stack: " - . $self->_dump_curr_open() . ')' - ); - DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; - next; - } - - # Else it's okay to close... - if(grep $_->[1]{'~ignore'}, @$curr_open) { - DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n"; - # And that may be because of this to-be-closed =for region, or some - # other one, but it doesn't matter. - } else { - $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; - # what's that for? - - $self->{'content_seen'} ||= 1; - $self->_handle_element_end( $scratch = 'for' ); - } - DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; - pop @$curr_open; - - next; - - } elsif($para_type eq '~end') { #///////////////////////////////////////// - # The virtual end-document signal - - if(@$curr_open) { # Deal with things left open - DEBUG and print "Stack is nonempty at end-document: (", - $self->_dump_curr_open(), ")\n"; - - DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n"; - unshift @$paras, $self->_closers_for_all_curr_open; - # Make sure there is exactly one ~end in the parastack, at the end: - @$paras = grep $_->[0] ne '~end', @$paras; - push @$paras, $para, $para; - # We need two -- once for the next cycle where we - # generate errata, and then another to be at the end - # when that loop back around to process the errata. - next; - - } else { - DEBUG and print "Okay, stack is empty now.\n"; - } - - # Try generating errata section, if applicable - unless($self->{'~tried_gen_errata'}) { - $self->{'~tried_gen_errata'} = 1; - my @extras = $self->_gen_errata(); - if(@extras) { - unshift @$paras, @extras; - DEBUG and print "Generated errata... relooping...\n"; - next; # I.e., loop around again to process these fake-o paragraphs - } - } - - splice @$paras; # Well, that's that for this paragraph buffer. - DEBUG and print "Throwing end-document event.\n"; - - $self->_handle_element_end( $scratch = 'Document' ); - next; # Hasta la byebye + } elsif($para_type eq '~end') { # The virtual end-document signal + next if $self->_ponder_doc_end($para,$curr_open,$paras); } @@ -769,97 +549,17 @@ #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - if($para_type eq '=pod') { #////////////////////////////////////////////// - $self->whine( - $para->[1]{'start_line'}, - "=pod directives shouldn't be over one line long! Ignoring all " - . (@$para - 2) . " lines of content" - ) if @$para > 3; - # Content is always ignored. - + if($para_type eq '=pod') { + $self->_ponder_pod($para,$curr_open,$paras); - } elsif($para_type eq '=over') { #//////////////////////////////////////// - next unless @$paras; - my $list_type; + } elsif($para_type eq '=over') { + next if $self->_ponder_over($para,$curr_open,$paras); - if($paras->[0][0] eq '=item') { # most common case - $list_type = $self->_get_initial_item_type($paras->[0]); + } elsif($para_type eq '=back') { + next if $self->_ponder_back($para,$curr_open,$paras); - } elsif($paras->[0][0] eq '=back') { - # Ignore empty lists. TODO: make this an option? - shift @$paras; - next; - - } elsif($paras->[0][0] eq '~end') { - $self->whine( - $para->[1]{'start_line'}, - "=over is the last thing in the document?!" - ); - next; # But feh, ignore it. - } else { - $list_type = 'block'; - } - $para->[1]{'~type'} = $list_type; - push @$curr_open, $para; - # yes, we reuse the paragraph as a stack item - - my $content = join ' ', splice @$para, 2; - my $overness; - if($content =~ m/^\s*$/s) { - $para->[1]{'indent'} = 4; - } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { - no integer; - $para->[1]{'indent'} = $1; - if($1 == 0) { - $self->whine( - $para->[1]{'start_line'}, - "Can't have a 0 in =over $content" - ); - $para->[1]{'indent'} = 4; - } - } else { - $self->whine( - $para->[1]{'start_line'}, - "=over should be: '=over' or '=over positive_number'" - ); - $para->[1]{'indent'} = 4; - } - DEBUG > 1 and print "=over found of type $list_type\n"; - - $self->{'content_seen'} ||= 1; - $self->_handle_element_start(($scratch = 'over-' . $list_type), $para->[1]); - - } elsif($para_type eq '=back') { #//////////////////////////////////////// + } else { - # TODO: fire off or or ?? - - my $content = join ' ', splice @$para, 2; - if($content =~ m/\S/) { - $self->whine( - $para->[1]{'start_line'}, - "=back doesn't take any parameters, but you said =back $content" - ); - } - - if(@$curr_open and $curr_open->[-1][0] eq '=over') { - DEBUG > 1 and print "=back happily closes matching =over\n"; - # Expected case: we're closing the most recently opened thing - #my $over = pop @$curr_open; - $self->{'content_seen'} ||= 1; - $self->_handle_element_end( $scratch = - 'over-' . ( (pop @$curr_open)->[1]{'~type'} ) - ); - } else { - DEBUG > 1 and print "=back found without a matching =over. Stack: (", - join(', ', map $_->[0], @$curr_open), ").\n"; - $self->whine( - $para->[1]{'start_line'}, - '=back without =over' - ); - next; # and ignore it - } - - } else { #//////////////////////////////////////////////////////////////// # All non-magical codes!!! # Here we start using $para_type for our own twisted purposes, to @@ -1123,54 +823,11 @@ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if($para_type eq 'Plain') { - DEBUG and print " giving plain treatment...\n"; - unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) - or $para->[1]{'~cooked'} - ) { - push @$para, - @{$self->_make_treelet( - join("\n", splice(@$para, 2)), - $para->[1]{'start_line'} - )}; - } - # Empty paragraphs don't need a treelet for any reason I can see. - # And precooked paragraphs already have a treelet. - + $self->_ponder_Plain($para); } elsif($para_type eq 'Verbatim') { - DEBUG and print " giving verbatim treatment...\n"; - - $para->[1]{'xml:space'} = 'preserve'; - for($i = 2; $i < @$para; $i++) { - foreach my $line ($para->[$i]) { # just for aliasing - while( $line =~ - # Sort of adapted from Text::Tabs -- yes, it's hardwired in that - # tabs are at every EIGHTH column. For portability, it has to be - # one setting everywhere, and 8th wins. - s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e - ) {} - - # TODO: whinge about (or otherwise treat) unindented or overlong lines - - } - } - - # Now the VerbatimFormatted hoodoo... - if( $self->{'accept_codes'} and - $self->{'accept_codes'}{'VerbatimFormatted'} - ) { - while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } - # Kill any number of terminal newlines - $self->_verbatim_format($para); - } else { - push @$para, join "\n", splice(@$para, 2) if @$para > 3; - $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines - } - + $self->_ponder_Verbatim($para); } elsif($para_type eq 'Data') { - DEBUG and print " giving data treatment...\n"; - $para->[1]{'xml:space'} = 'preserve'; - push @$para, join "\n", splice(@$para, 2) if @$para > 3; - + $self->_ponder_Data($para); } else { die "\$para type is $para_type -- how did that happen?"; # Shouldn't happen. @@ -1190,6 +847,576 @@ return; } +########################################################################### +# The sub-ponderers... + + + +sub _ponder_for { + my ($self,$para,$curr_open,$paras) = @_; + + # Fake it out as a begin/end + my $target; + + if(grep $_->[1]{'~ignore'}, @$curr_open) { + DEBUG > 1 and print "Ignoring ignorable =for\n"; + return 1; + } + + for(my $i = 2; $i < @$para; ++$i) { + if($para->[$i] =~ s/^\s*(\S+)\s*//s) { + $target = $1; + last; + } + } + unless(defined $target) { + $self->whine( + $para->[1]{'start_line'}, + "=for without a target?" + ); + return 1; + } + DEBUG > 1 and + print "Faking out a =for $target as a =begin $target / =end $target\n"; + + $para->[0] = 'Data'; + + unshift @$paras, + ['=begin', + {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, + $target, + ], + $para, + ['=end', + {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, + $target, + ], + ; + + return 1; +} + +sub _ponder_begin { + my ($self,$para,$curr_open,$paras) = @_; + my $content = join ' ', splice @$para, 2; + $content =~ s/^\s+//s; + $content =~ s/\s+$//s; + unless(length($content)) { + $self->whine( + $para->[1]{'start_line'}, + "=begin without a target?" + ); + DEBUG and print "Ignoring targetless =begin\n"; + return 1; + } + + my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; + $para->[1]{'title'} = $title if ($title); + $para->[1]{'target'} = $target; # without any ':' + $content = $target; # strip off the title + + $content =~ s/^:!/!:/s; + my $neg; # whether this is a negation-match + $neg = 1 if $content =~ s/^!//s; + my $to_resolve; # whether to process formatting codes + $to_resolve = 1 if $content =~ s/^://s; + + my $dont_ignore; # whether this target matches us + + foreach my $target_name ( + split(',', $content, -1), + $neg ? () : '*' + ) { + DEBUG > 2 and + print " Considering whether =begin $content matches $target_name\n"; + next unless $self->{'accept_targets'}{$target_name}; + + DEBUG > 2 and + print " It DOES match the acceptable target $target_name!\n"; + $to_resolve = 1 + if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; + $dont_ignore = 1; + $para->[1]{'target_matching'} = $target_name; + last; # stop looking at other target names + } + + if($neg) { + if( $dont_ignore ) { + $dont_ignore = ''; + delete $para->[1]{'target_matching'}; + DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n"; + } else { + $dont_ignore = 1; + $para->[1]{'target_matching'} = '!'; + DEBUG > 2 and print " But the leading ! means that this IS a match!\n"; + } + } + + $para->[0] = '=for'; # Just what we happen to call these, internally + $para->[1]{'~really'} ||= '=begin'; + $para->[1]{'~ignore'} = (! $dont_ignore) || 0; + $para->[1]{'~resolve'} = $to_resolve || 0; + + DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '', + "ignore contents of this region\n"; + DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ", + ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; + DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n"; + + push @$curr_open, $para; + if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { + DEBUG > 1 and print "Ignoring ignorable =begin\n"; + } else { + $self->{'content_seen'} ||= 1; + $self->_handle_element_start((my $scratch='for'), $para->[1]); + } + + return 1; +} + +sub _ponder_end { + my ($self,$para,$curr_open,$paras) = @_; + my $content = join ' ', splice @$para, 2; + $content =~ s/^\s+//s; + $content =~ s/\s+$//s; + DEBUG and print "Ogling '=end $content' directive\n"; + + unless(length($content)) { + $self->whine( + $para->[1]{'start_line'}, + "'=end' without a target?" . ( + ( @$curr_open and $curr_open->[-1][0] eq '=for' ) + ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) + : '' + ) + ); + DEBUG and print "Ignoring targetless =end\n"; + return 1; + } + + unless($content =~ m/^\S+$/) { # i.e., unless it's one word + $self->whine( + $para->[1]{'start_line'}, + "'=end $content' is invalid. (Stack: " + . $self->_dump_curr_open() . ')' + ); + DEBUG and print "Ignoring mistargetted =end $content\n"; + return 1; + } + + unless(@$curr_open and $curr_open->[-1][0] eq '=for') { + $self->whine( + $para->[1]{'start_line'}, + "=end $content without matching =begin. (Stack: " + . $self->_dump_curr_open() . ')' + ); + DEBUG and print "Ignoring mistargetted =end $content\n"; + return 1; + } + + unless($content eq $curr_open->[-1][1]{'target'}) { + $self->whine( + $para->[1]{'start_line'}, + "=end $content doesn't match =begin " + . $curr_open->[-1][1]{'target'} + . ". (Stack: " + . $self->_dump_curr_open() . ')' + ); + DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; + return 1; + } + + # Else it's okay to close... + if(grep $_->[1]{'~ignore'}, @$curr_open) { + DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n"; + # And that may be because of this to-be-closed =for region, or some + # other one, but it doesn't matter. + } else { + $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; + # what's that for? + + $self->{'content_seen'} ||= 1; + $self->_handle_element_end( my $scratch = 'for' ); + } + DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; + pop @$curr_open; + + return 1; +} + +sub _ponder_doc_end { + my ($self,$para,$curr_open,$paras) = @_; + if(@$curr_open) { # Deal with things left open + DEBUG and print "Stack is nonempty at end-document: (", + $self->_dump_curr_open(), ")\n"; + + DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n"; + unshift @$paras, $self->_closers_for_all_curr_open; + # Make sure there is exactly one ~end in the parastack, at the end: + @$paras = grep $_->[0] ne '~end', @$paras; + push @$paras, $para, $para; + # We need two -- once for the next cycle where we + # generate errata, and then another to be at the end + # when that loop back around to process the errata. + return 1; + + } else { + DEBUG and print "Okay, stack is empty now.\n"; + } + + # Try generating errata section, if applicable + unless($self->{'~tried_gen_errata'}) { + $self->{'~tried_gen_errata'} = 1; + my @extras = $self->_gen_errata(); + if(@extras) { + unshift @$paras, @extras; + DEBUG and print "Generated errata... relooping...\n"; + return 1; # I.e., loop around again to process these fake-o paragraphs + } + } + + splice @$paras; # Well, that's that for this paragraph buffer. + DEBUG and print "Throwing end-document event.\n"; + + $self->_handle_element_end( my $scratch = 'Document' ); + return 1; # Hasta la byebye +} + +sub _ponder_pod { + my ($self,$para,$curr_open,$paras) = @_; + $self->whine( + $para->[1]{'start_line'}, + "=pod directives shouldn't be over one line long! Ignoring all " + . (@$para - 2) . " lines of content" + ) if @$para > 3; + # Content is always ignored. + return; +} + +sub _ponder_over { + my ($self,$para,$curr_open,$paras) = @_; + return 1 unless @$paras; + my $list_type; + + if($paras->[0][0] eq '=item') { # most common case + $list_type = $self->_get_initial_item_type($paras->[0]); + + } elsif($paras->[0][0] eq '=back') { + # Ignore empty lists. TODO: make this an option? + shift @$paras; + return 1; + + } elsif($paras->[0][0] eq '~end') { + $self->whine( + $para->[1]{'start_line'}, + "=over is the last thing in the document?!" + ); + return 1; # But feh, ignore it. + } else { + $list_type = 'block'; + } + $para->[1]{'~type'} = $list_type; + push @$curr_open, $para; + # yes, we reuse the paragraph as a stack item + + my $content = join ' ', splice @$para, 2; + my $overness; + if($content =~ m/^\s*$/s) { + $para->[1]{'indent'} = 4; + } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { + no integer; + $para->[1]{'indent'} = $1; + if($1 == 0) { + $self->whine( + $para->[1]{'start_line'}, + "Can't have a 0 in =over $content" + ); + $para->[1]{'indent'} = 4; + } + } else { + $self->whine( + $para->[1]{'start_line'}, + "=over should be: '=over' or '=over positive_number'" + ); + $para->[1]{'indent'} = 4; + } + DEBUG > 1 and print "=over found of type $list_type\n"; + + $self->{'content_seen'} ||= 1; + $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); + + return; +} + +sub _ponder_back { + my ($self,$para,$curr_open,$paras) = @_; + # TODO: fire off or or ?? + + my $content = join ' ', splice @$para, 2; + if($content =~ m/\S/) { + $self->whine( + $para->[1]{'start_line'}, + "=back doesn't take any parameters, but you said =back $content" + ); + } + + if(@$curr_open and $curr_open->[-1][0] eq '=over') { + DEBUG > 1 and print "=back happily closes matching =over\n"; + # Expected case: we're closing the most recently opened thing + #my $over = pop @$curr_open; + $self->{'content_seen'} ||= 1; + $self->_handle_element_end( my $scratch = + 'over-' . ( (pop @$curr_open)->[1]{'~type'} ) + ); + } else { + DEBUG > 1 and print "=back found without a matching =over. Stack: (", + join(', ', map $_->[0], @$curr_open), ").\n"; + $self->whine( + $para->[1]{'start_line'}, + '=back without =over' + ); + return 1; # and ignore it + } +} + +sub _ponder_item { + my ($self,$para,$curr_open,$paras) = @_; + my $over; + unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') { + $self->whine( + $para->[1]{'start_line'}, + "'=item' outside of any '=over'" + ); + unshift @$paras, + ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], + $para + ; + return 1; + } + + + my $over_type = $over->[1]{'~type'}; + + if(!$over_type) { + # Shouldn't happen1 + die "Typeless over in stack, starting at line " + . $over->[1]{'start_line'}; + + } elsif($over_type eq 'block') { + unless($curr_open->[-1][1]{'~bitched_about'}) { + $curr_open->[-1][1]{'~bitched_about'} = 1; + $self->whine( + $curr_open->[-1][1]{'start_line'}, + "You can't have =items (as at line " + . $para->[1]{'start_line'} + . ") unless the first thing after the =over is an =item" + ); + } + # Just turn it into a paragraph and reconsider it + $para->[0] = '~Para'; + unshift @$paras, $para; + return 1; + + } elsif($over_type eq 'text') { + my $item_type = $self->_get_item_type($para); + # That kills the content of the item if it's a number or bullet. + DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; + + if($item_type eq 'text') { + # Nothing special needs doing for 'text' + } elsif($item_type eq 'number' or $item_type eq 'bullet') { + die "Unknown item type $item_type" + unless $item_type eq 'number' or $item_type eq 'bullet'; + # Undo our clobbering: + push @$para, $para->[1]{'~orig_content'}; + delete $para->[1]{'number'}; + # Only a PROPER item-number element is allowed + # to have a number attribute. + } else { + die "Unhandled item type $item_type"; # should never happen + } + + # =item-text thingies don't need any assimilation, it seems. + + } elsif($over_type eq 'number') { + my $item_type = $self->_get_item_type($para); + # That kills the content of the item if it's a number or bullet. + DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; + + my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; + + if($item_type eq 'bullet') { + # Hm, it's not numeric. Correct for this. + $para->[1]{'number'} = $expected_value; + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item $expected_value'" + ); + push @$para, $para->[1]{'~orig_content'}; + # restore the bullet, blocking the assimilation of next para + + } elsif($item_type eq 'text') { + # Hm, it's not numeric. Correct for this. + $para->[1]{'number'} = $expected_value; + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item $expected_value'" + ); + # Text content will still be there and will block next ~Para + + } elsif($item_type ne 'number') { + die "Unknown item type $item_type"; # should never happen + + } elsif($expected_value == $para->[1]{'number'}) { + DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; + + } else { + DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, + " instead of the expected value of $expected_value\n"; + $self->whine( + $para->[1]{'start_line'}, + "You have '=item " . $para->[1]{'number'} . + "' instead of the expected '=item $expected_value'" + ); + $para->[1]{'number'} = $expected_value; # correcting!! + } + + if(@$para == 2) { + # For the cases where we /didn't/ push to @$para + if($paras->[0][0] eq '~Para') { + DEBUG and print "Assimilating following ~Para content into $over_type item\n"; + push @$para, splice @{shift @$paras},2; + } else { + DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; + push @$para, ''; # Just so it's not contentless + } + } + + + } elsif($over_type eq 'bullet') { + my $item_type = $self->_get_item_type($para); + # That kills the content of the item if it's a number or bullet. + DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; + + if($item_type eq 'bullet') { + # as expected! + + if( $para->[1]{'~_freaky_para_hack'} ) { + DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; + push @$para, delete $para->[1]{'~_freaky_para_hack'}; + } + + } elsif($item_type eq 'number') { + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item *'" + ); + push @$para, $para->[1]{'~orig_content'}; + # and block assimilation of the next paragraph + delete $para->[1]{'number'}; + # Only a PROPER item-number element is allowed + # to have a number attribute. + } elsif($item_type eq 'text') { + $self->whine( + $para->[1]{'start_line'}, + "Expected '=item *'" + ); + # But doesn't need processing. But it'll block assimilation + # of the next para. + } else { + die "Unhandled item type $item_type"; # should never happen + } + + if(@$para == 2) { + # For the cases where we /didn't/ push to @$para + if($paras->[0][0] eq '~Para') { + DEBUG and print "Assimilating following ~Para content into $over_type item\n"; + push @$para, splice @{shift @$paras},2; + } else { + DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; + push @$para, ''; # Just so it's not contentless + } + } + + } else { + die "Unhandled =over type \"$over_type\"?"; + # Shouldn't happen! + } + $para->[0] .= '-' . $over_type; + + return; +} + +sub _ponder_Plain { + my ($self,$para) = @_; + DEBUG and print " giving plain treatment...\n"; + unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) + or $para->[1]{'~cooked'} + ) { + push @$para, + @{$self->_make_treelet( + join("\n", splice(@$para, 2)), + $para->[1]{'start_line'} + )}; + } + # Empty paragraphs don't need a treelet for any reason I can see. + # And precooked paragraphs already have a treelet. + return; +} + +sub _ponder_Verbatim { + my ($self,$para) = @_; + DEBUG and print " giving verbatim treatment...\n"; + + $para->[1]{'xml:space'} = 'preserve'; + for(my $i = 2; $i < @$para; $i++) { + foreach my $line ($para->[$i]) { # just for aliasing + while( $line =~ + # Sort of adapted from Text::Tabs -- yes, it's hardwired in that + # tabs are at every EIGHTH column. For portability, it has to be + # one setting everywhere, and 8th wins. + s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e + ) {} + + # TODO: whinge about (or otherwise treat) unindented or overlong lines + + } + } + + # Now the VerbatimFormatted hoodoo... + if( $self->{'accept_codes'} and + $self->{'accept_codes'}{'VerbatimFormatted'} + ) { + while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } + # Kill any number of terminal newlines + $self->_verbatim_format($para); + } elsif ($self->{'codes_in_verbatim'}) { + push @$para, + @{$self->_make_treelet( + join("\n", splice(@$para, 2)), + $para->[1]{'start_line'}, $para->[1]{'xml:space'} + )}; + $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines + } else { + push @$para, join "\n", splice(@$para, 2) if @$para > 3; + $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines + } + return; +} + +sub _ponder_Data { + my ($self,$para) = @_; + DEBUG and print " giving data treatment...\n"; + $para->[1]{'xml:space'} = 'preserve'; + push @$para, join "\n", splice(@$para, 2) if @$para > 3; + return; +} + + + + +########################################################################### + sub _traverse_treelet_bit { # for use only by the routine above my($self, $name) = splice @_,0,2; @@ -1382,13 +1609,18 @@ # "!" # ] - my($self, $para, $start_line) = @_; + my($self, $para, $start_line, $preserve_space) = @_; + my $treelet = ['~Top', {'start_line' => $start_line},]; - $para =~ s/\s+/ /g; # collapse and trim all whitespace first. - $para =~ s/ $//g; - $para =~ s/^ //g; + unless ($preserve_space || $self->{'preserve_whitespace'}) { + $para =~ s/\. /\.\xA0 /g if $self->{'fullstop_space_harden'}; + $para =~ s/\s+/ /g; # collapse and trim all whitespace first. + $para =~ s/ $//; + $para =~ s/^ //; + } + # Only apparent problem the above code is that N<< >> turns into # N<< >>. But then, word wrapping does that too! So don't do that! @@ -1396,26 +1628,57 @@ my @lineage = ($treelet); DEBUG > 4 and print "Paragraph:\n$para\n\n"; - - while($para =~ # Here begins our frightening tokenizer RE. + + # Here begins our frightening tokenizer RE. The following regex matches + # text in four main parts: + # + # * Start-codes. The first alternative matches C< or C<<, the latter + # followed by some whitespace. $1 will hold the entire start code + # (including any space following a multiple-angle-bracket delimiter), + # and $2 will hold only the additional brackets past the first in a + # multiple-bracket delimiter. length($2) + 1 will be the number of + # closing brackets we have to find. + # + # * Closing brackets. Match some amount of whitespace followed by + # multiple close brackets. The logic to see if this closes anything + # is down below. Note that in order to parse C<< >> correctly, we + # have to use look-behind (?<=\s\s), since the match of the starting + # code will have consumed the whitespace. + # + # * A single closing bracket, to close a simple code like C<>. + # + # * Something that isn't a start or end code. We have to be careful + # about accepting whitespace, since perlpodspec says that any whitespace + # before a multiple-bracket closing delimiter should be ignored. + # + while($para =~ m/\G (?: - ([A-Z]<(<+\ )?) # that's $1 and $2 for both kinds of start-codes + # Match starting codes, including the whitespace following a + # multiple-delimiter start code. $1 gets the whole start code and + # $2 gets all but one of the {2,}) # $3: end-codes of the type " >>", " >>>", etc. + # Match multiple-bracket end codes. $3 gets the whitespace that + # should be discarded before an end bracket but kept in other cases + # and $4 gets the end brackets themselves. + (\s+|(?<=\s\s))(>{2,}) | - (\ ?>) # $4: simple end-codes + (\s?>) # $5: simple end-codes | - ( # $5: stuff containing no start-codes or end-codes + ( # $6: stuff containing no start-codes or end-codes (?: - [^A-Z\ >]+ + [^A-Z\s>] | (?: [A-Z](?!<) ) | + # whitespace is ok, but we don't want to eat the whitespace before + # a multiple-bracket end code. + # NOTE: we may still have problems with e.g. S<< >> (?: - \ (?!>) + \s(?!\s*>{2,}) ) )+ ) @@ -1426,7 +1689,7 @@ if(defined $1) { if(defined $2) { DEBUG > 3 and print "Found complex start-text code \"$1\"\n"; - push @stack, length($1) - 1; + push @stack, length($2) + 1; # length of the necessary complex end-code string } else { DEBUG > 3 and print "Found simple start-text code \"$1\"\n"; @@ -1435,48 +1698,48 @@ push @lineage, [ substr($1,0,1), {}, ]; # new node object push @{ $lineage[-2] }, $lineage[-1]; - } elsif(defined $3) { - DEBUG > 3 and print "Found apparent complex end-text code \"$3\"\n"; + } elsif(defined $4) { + DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n"; # This is where it gets messy... if(! @stack) { # We saw " >>>>" but needed nothing. This is ALL just stuff then. DEBUG > 4 and print " But it's really just stuff.\n"; - push @{ $lineage[-1] }, $3; + push @{ $lineage[-1] }, $3, $4; next; } elsif(!$stack[-1]) { # We saw " >>>>" but needed only ">". Back pos up. DEBUG > 4 and print " And that's more than we needed to close simple.\n"; - push @{ $lineage[-1] }, ' '; # That was a for-real space, too. - pos($para) = pos($para) - length($3) + 2; - } elsif($stack[-1] == length($3)) { + push @{ $lineage[-1] }, $3; # That was a for-real space, too. + pos($para) = pos($para) - length($4) + 1; + } elsif($stack[-1] == length($4)) { # We found " >>>>", and it was exactly what we needed. Commonest case. DEBUG > 4 and print " And that's exactly what we needed to close complex.\n"; - } elsif($stack[-1] < length($3)) { + } elsif($stack[-1] < length($4)) { # We saw " >>>>" but needed only " >>". Back pos up. DEBUG > 4 and print " And that's more than we needed to close complex.\n"; - pos($para) = pos($para) - length($3) + $stack[-1]; + pos($para) = pos($para) - length($4) + $stack[-1]; } else { # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! DEBUG > 4 and print " But it's really just stuff, because we needed more.\n"; - push @{ $lineage[-1] }, $3; + push @{ $lineage[-1] }, $3, $4; next; } #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; - + push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; # Keep the element from being childless pop @stack; pop @lineage; - } elsif(defined $4) { + } elsif(defined $5) { DEBUG > 3 and print "Found apparent simple end-text code \"$4\"\n"; if(@stack and ! $stack[-1]) { # We're indeed expecting a simple end-code DEBUG > 4 and print " It's indeed an end-code.\n"; - if(length($4) == 2) { # There was a space there: " >" + if(length($5) == 2) { # There was a space there: " >" push @{ $lineage[-1] }, ' '; } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element push @{ $lineage[-1] }, ''; # keep it from being really childless @@ -1486,12 +1749,12 @@ pop @lineage; } else { DEBUG > 4 and print " It's just stuff.\n"; - push @{ $lineage[-1] }, $4; + push @{ $lineage[-1] }, $5; } - } elsif(defined $5) { - DEBUG > 3 and print "Found stuff \"$5\"\n"; - push @{ $lineage[-1] }, $5; + } elsif(defined $6) { + DEBUG > 3 and print "Found stuff \"$6\"\n"; + push @{ $lineage[-1] }, $6; } else { # should never ever ever ever happen @@ -1634,5 +1897,23 @@ } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +# A rather unsubtle method of blowing away all the state information +# from a parser object so it can be reused. Provided as a utility for +# backward compatibilty in Pod::Man, etc. but not recommended for +# general use. + +sub reinit { + my $self = shift; + foreach (qw(source_dead source_filename doc_has_started +start_of_pod_block content_seen last_was_blank paras curr_open +line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen +Title)) { + + delete $self->{$_}; + } +} + +#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1; Index: lib/Pod/Escapes.pm =================================================================== --- lib/Pod/Escapes.pm (revision 35086) +++ lib/Pod/Escapes.pm (working copy) @@ -1,11 +1,11 @@ require 5; # The documentation is at the end. -# Time-stamp: "2002-08-27 19:58:02 MDT" +# Time-stamp: "2004-05-07 15:31:25 ADT" package Pod::Escapes; require Exporter; @ISA = ('Exporter'); -$VERSION = '1.03'; +$VERSION = '1.04'; @EXPORT_OK = qw( %Code2USASCII %Name2character @@ -44,7 +44,7 @@ # Convert to decimal: if($in =~ m/^(0[0-7]*)$/s ) { $in = oct $in; - } elsif($in =~ m/^0x([0-9a-fA-F]+)$/s ) { + } elsif($in =~ m/^0?x([0-9a-fA-F]+)$/s ) { $in = hex $1; } # else it's decimal, or named @@ -86,7 +86,7 @@ # Convert to decimal: if($in =~ m/^(0[0-7]*)$/s ) { $in = oct $in; - } elsif($in =~ m/^0x([0-9a-fA-F]+)$/s ) { + } elsif($in =~ m/^0?x([0-9a-fA-F]+)$/s ) { $in = hex $1; } # else it's decimal, or named @@ -649,7 +649,7 @@ =head1 COPYRIGHT AND DISCLAIMERS -Copyright (c) 2001 Sean M. Burke. All rights reserved. +Copyright (c) 2001-2004 Sean M. Burke. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. @@ -685,7 +685,7 @@ xhtml-lat1.ent xhtml-special.ent )) { - open(IN, "<$dir$file") or die "can't read-open $dir$file: $!"; + open(IN, "<", "$dir$file") or die "can't read-open $dir$file: $!"; print "Reading $file...\n"; while() { if(m//) { --- /dev/null 2009-01-07 14:55:01.000000000 +1100 +++ lib/Pod/Simple/Progress.pm 2009-01-07 13:36:39.000000000 +1100 @@ -0,0 +1,93 @@ + +require 5; +package Pod::Simple::Progress; +$VERSION = "1.01"; +use strict; + +# Objects of this class are used for noting progress of an +# operation every so often. Messages delivered more often than that +# are suppressed. +# +# There's actually nothing in here that's specific to Pod processing; +# but it's ad-hoc enough that I'm not willing to give it a name that +# implies that it's generally useful, like "IO::Progress" or something. +# +# -- sburke +# +#-------------------------------------------------------------------------- + +sub new { + my($class,$delay) = @_; + my $self = bless {'quiet_until' => 1}, ref($class) || $class; + $self->to(*STDOUT{IO}); + $self->delay(defined($delay) ? $delay : 5); + return $self; +} + +sub copy { + my $orig = shift; + bless {%$orig, 'quiet_until' => 1}, ref($orig); +} +#-------------------------------------------------------------------------- + +sub reach { + my($self, $point, $note) = @_; + if( (my $now = time) >= $self->{'quiet_until'}) { + my $goal; + my $to = $self->{'to'}; + print $to join('', + ($self->{'quiet_until'} == 1) ? () : '... ', + (defined $point) ? ( + '#', + ($goal = $self->{'goal'}) ? ( + ' ' x (length($goal) - length($point)), + $point, '/', $goal, + ) : $point, + $note ? ': ' : (), + ) : (), + $note || '', + "\n" + ); + $self->{'quiet_until'} = $now + $self->{'delay'}; + } + return $self; +} + +#-------------------------------------------------------------------------- + +sub done { + my($self, $note) = @_; + $self->{'quiet_until'} = 1; + return $self->reach( undef, $note ); +} + +#-------------------------------------------------------------------------- +# Simple accessors: + +sub delay { + return $_[0]{'delay'} if @_ == 1; $_[0]{'delay'} = $_[1]; return $_[0] } +sub goal { + return $_[0]{'goal' } if @_ == 1; $_[0]{'goal' } = $_[1]; return $_[0] } +sub to { + return $_[0]{'to' } if @_ == 1; $_[0]{'to' } = $_[1]; return $_[0] } + +#-------------------------------------------------------------------------- + +unless(caller) { # Simple self-test: + my $p = __PACKAGE__->new->goal(5); + $p->reach(1, "Primus!"); + sleep 1; + $p->reach(2, "Secundus!"); + sleep 3; + $p->reach(3, "Tertius!"); + sleep 5; + $p->reach(4); + $p->reach(5, "Quintus!"); + sleep 1; + $p->done("All done"); +} + +#-------------------------------------------------------------------------- +1; +__END__ + --- /dev/null 2009-01-07 14:55:01.000000000 +1100 +++ lib/Pod/Simple/Search.pm 2009-01-07 13:41:31.000000000 +1100 @@ -0,0 +1,1016 @@ + +require 5.005; +package Pod::Simple::Search; +use strict; + +use vars qw($VERSION $MAX_VERSION_WITHIN $SLEEPY); +$VERSION = 3.04; ## Current version of this package + +BEGIN { *DEBUG = sub () {0} unless defined &DEBUG; } # set DEBUG level +use Carp (); + +$SLEEPY = 1 if !defined $SLEEPY and $^O =~ /mswin|mac/i; + # flag to occasionally sleep for $SLEEPY - 1 seconds. + +$MAX_VERSION_WITHIN ||= 60; + +############################################################################# + +#use diagnostics; +use File::Spec (); +use File::Basename qw( basename ); +use Config (); +use Cwd qw( cwd ); + +#========================================================================== +__PACKAGE__->_accessorize( # Make my dumb accessor methods + 'callback', 'progress', 'dir_prefix', 'inc', 'laborious', 'limit_glob', + 'limit_re', 'shadows', 'verbose', 'name2path', 'path2name', +); +#========================================================================== + +sub new { + my $class = shift; + my $self = bless {}, ref($class) || $class; + $self->init; + return $self; +} + +sub init { + my $self = shift; + $self->inc(1); + $self->verbose(DEBUG); + return $self; +} + +#-------------------------------------------------------------------------- + +sub survey { + my($self, @search_dirs) = @_; + $self = $self->new unless ref $self; # tolerate being a class method + + $self->_expand_inc( \@search_dirs ); + + + $self->{'_scan_count'} = 0; + $self->{'_dirs_visited'} = {}; + $self->path2name( {} ); + $self->name2path( {} ); + $self->limit_re( $self->_limit_glob_to_limit_re ) if $self->{'limit_glob'}; + my $cwd = cwd(); + my $verbose = $self->verbose; + local $_; # don't clobber the caller's $_ ! + + foreach my $try (@search_dirs) { + unless( File::Spec->file_name_is_absolute($try) ) { + # make path absolute + $try = File::Spec->catfile( $cwd ,$try); + } + # simplify path + $try = File::Spec->canonpath($try); + + my $start_in; + my $modname_prefix; + if($self->{'dir_prefix'}) { + $start_in = File::Spec->catdir( + $try, + grep length($_), split '[\\/:]+', $self->{'dir_prefix'} + ); + $modname_prefix = [grep length($_), split m{[:/\\]}, $self->{'dir_prefix'}]; + $verbose and print "Appending \"$self->{'dir_prefix'}\" to $try, ", + "giving $start_in (= @$modname_prefix)\n"; + } else { + $start_in = $try; + } + + if( $self->{'_dirs_visited'}{$start_in} ) { + $verbose and print "Directory '$start_in' already seen, skipping.\n"; + next; + } else { + $self->{'_dirs_visited'}{$start_in} = 1; + } + + unless(-e $start_in) { + $verbose and print "Skipping non-existent $start_in\n"; + next; + } + + my $closure = $self->_make_search_callback; + + if(-d $start_in) { + # Normal case: + $verbose and print "Beginning excursion under $start_in\n"; + $self->_recurse_dir( $start_in, $closure, $modname_prefix ); + $verbose and print "Back from excursion under $start_in\n\n"; + + } elsif(-f _) { + # A excursion consisting of just one file! + $_ = basename($start_in); + $verbose and print "Pondering $start_in ($_)\n"; + $closure->($start_in, $_, 0, []); + + } else { + $verbose and print "Skipping mysterious $start_in\n"; + } + } + $self->progress and $self->progress->done( + "Noted $$self{'_scan_count'} Pod files total"); + + return unless defined wantarray; # void + return $self->name2path unless wantarray; # scalar + return $self->name2path, $self->path2name; # list +} + + +#========================================================================== +sub _make_search_callback { + my $self = $_[0]; + + # Put the options in variables, for easy access + my( $laborious, $verbose, $shadows, $limit_re, $callback, $progress,$path2name,$name2path) = + map scalar($self->$_()), + qw(laborious verbose shadows limit_re callback progress path2name name2path); + + my($file, $shortname, $isdir, $modname_bits); + return sub { + ($file, $shortname, $isdir, $modname_bits) = @_; + + if($isdir) { # this never gets called on the startdir itself, just subdirs + + if( $self->{'_dirs_visited'}{$file} ) { + $verbose and print "Directory '$file' already seen, skipping.\n"; + return 'PRUNE'; + } + + print "Looking in dir $file\n" if $verbose; + + unless ($laborious) { # $laborious overrides pruning + if( m/^(\d+\.[\d_]{3,})\z/s + and do { my $x = $1; $x =~ tr/_//d; $x != $] } + ) { + $verbose and print "Perl $] version mismatch on $_, skipping.\n"; + return 'PRUNE'; + } + + if( m/^([A-Za-z][a-zA-Z0-9_]*)\z/s ) { + $verbose and print "$_ is a well-named module subdir. Looking....\n"; + } else { + $verbose and print "$_ is a fishy directory name. Skipping.\n"; + return 'PRUNE'; + } + } # end unless $laborious + + $self->{'_dirs_visited'}{$file} = 1; + return; # (not pruning); + } + + + # Make sure it's a file even worth even considering + if($laborious) { + unless( + m/\.(pod|pm|plx?)\z/i || -x _ and -T _ + # Note that the cheapest operation (the RE) is run first. + ) { + $verbose > 1 and print " Brushing off uninteresting $file\n"; + return; + } + } else { + unless( m/^[-_a-zA-Z0-9]+\.(?:pod|pm|plx?)\z/is ) { + $verbose > 1 and print " Brushing off oddly-named $file\n"; + return; + } + } + + $verbose and print "Considering item $file\n"; + my $name = $self->_path2modname( $file, $shortname, $modname_bits ); + $verbose > 0.01 and print " Nominating $file as $name\n"; + + if($limit_re and $name !~ m/$limit_re/i) { + $verbose and print "Shunning $name as not matching $limit_re\n"; + return; + } + + if( !$shadows and $name2path->{$name} ) { + $verbose and print "Not worth considering $file ", + "-- already saw $name as ", + join(' ', grep($path2name->{$_} eq $name, keys %$path2name)), "\n"; + return; + } + + # Put off until as late as possible the expense of + # actually reading the file: + if( m/\.pod\z/is ) { + # just assume it has pod, okay? + } else { + $progress and $progress->reach($self->{'_scan_count'}, "Scanning $file"); + return unless $self->contains_pod( $file ); + } + ++ $self->{'_scan_count'}; + + # Or finally take note of it: + if( $name2path->{$name} ) { + $verbose and print + "Duplicate POD found (shadowing?): $name ($file)\n", + " Already seen in ", + join(' ', grep($path2name->{$_} eq $name, keys %$path2name)), "\n"; + } else { + $name2path->{$name} = $file; # Noting just the first occurrence + } + $verbose and print " Noting $name = $file\n"; + if( $callback ) { + local $_ = $_; # insulate from changes, just in case + $callback->($file, $name); + } + $path2name->{$file} = $name; + return; + } +} + +#========================================================================== + +sub _path2modname { + my($self, $file, $shortname, $modname_bits) = @_; + + # this code simplifies the POD name for Perl modules: + # * remove "site_perl" + # * remove e.g. "i586-linux" (from 'archname') + # * remove e.g. 5.00503 + # * remove pod/ if followed by perl*.pod (e.g. in pod/perlfunc.pod) + # * dig into the file for case-preserved name if not already mixed case + + my @m = @$modname_bits; + my $x; + my $verbose = $self->verbose; + + # Shaving off leading naughty-bits + while(@m + and defined($x = lc( $m[0] )) + and( $x eq 'site_perl' + or($x eq 'pod' and @m == 1 and $shortname =~ m{^perl.*\.pod$}s ) + or $x =~ m{\\d+\\.z\\d+([_.]?\\d+)?} # if looks like a vernum + or $x eq lc( $Config::Config{'archname'} ) + )) { shift @m } + + my $name = join '::', @m, $shortname; + $self->_simplify_base($name); + + # On VMS, case-preserved document names can't be constructed from + # filenames, so try to extract them from the "=head1 NAME" tag in the + # file instead. + if ($^O eq 'VMS' && ($name eq lc($name) || $name eq uc($name))) { + open PODFILE, "<" ,"$file" or die "_path2modname: Can't open $file: $!"; + my $in_pod = 0; + my $in_name = 0; + my $line; + while ($line = ) { + chomp $line; + $in_pod = 1 if ($line =~ m/^=\w/); + $in_pod = 0 if ($line =~ m/^=cut/); + next unless $in_pod; # skip non-pod text + next if ($line =~ m/^\s*\z/); # and blank lines + next if ($in_pod && ($line =~ m/^X{'fs_recursion_maxdepth'} || 10; + my $verbose = $self->verbose; + + my $here_string = File::Spec->curdir; + my $up_string = File::Spec->updir; + $modname_bits ||= []; + + my $recursor; + $recursor = sub { + my($dir_long, $dir_bare) = @_; + if( @$modname_bits >= 10 ) { + $verbose and print "Too deep! [@$modname_bits]\n"; + return; + } + + unless(-d $dir_long) { + $verbose > 2 and print "But it's not a dir! $dir_long\n"; + return; + } + unless( opendir(INDIR, $dir_long) ) { + $verbose > 2 and print "Can't opendir $dir_long : $!\n"; + closedir(INDIR); + return + } + my @items = sort readdir(INDIR); + closedir(INDIR); + + push @$modname_bits, $dir_bare unless $dir_bare eq ''; + + my $i_full; + foreach my $i (@items) { + next if $i eq $here_string or $i eq $up_string or $i eq ''; + $i_full = File::Spec->catfile( $dir_long, $i ); + + if(!-r $i_full) { + $verbose and print "Skipping unreadable $i_full\n"; + + } elsif(-f $i_full) { + $_ = $i; + $callback->( $i_full, $i, 0, $modname_bits ); + + } elsif(-d _) { + $i =~ s/\.DIR\z//i if $^O eq 'VMS'; + $_ = $i; + my $rv = $callback->( $i_full, $i, 1, $modname_bits ) || ''; + + if($rv eq 'PRUNE') { + $verbose > 1 and print "OK, pruning"; + } else { + # Otherwise, recurse into it + $recursor->( File::Spec->catdir($dir_long, $i) , $i); + } + } else { + $verbose > 1 and print "Skipping oddity $i_full\n"; + } + } + pop @$modname_bits; + return; + };; + + local $_; + $recursor->($startdir, ''); + + undef $recursor; # allow it to be GC'd + + return; +} + + +#========================================================================== + +sub run { + # A function, useful in one-liners + + my $self = __PACKAGE__->new; + $self->limit_glob($ARGV[0]) if @ARGV; + $self->callback( sub { + my($file, $name) = @_; + my $version = ''; + + # Yes, I know we won't catch the version in like a File/Thing.pm + # if we see File/Thing.pod first. That's just the way the + # cookie crumbles. -- SMB + + if($file =~ m/\.pod$/i) { + # Don't bother looking for $VERSION in .pod files + DEBUG and print "Not looking for \$VERSION in .pod $file\n"; + } elsif( !open(INPOD, "<", $file) ) { + DEBUG and print "Couldn't open $file: $!\n"; + close(INPOD); + } else { + # Sane case: file is readable + my $lines = 0; + while() { + last if $lines++ > $MAX_VERSION_WITHIN; # some degree of sanity + if( s/^\s*\$VERSION\s*=\s*//s and m/\d/ ) { + DEBUG and print "Found version line (#$lines): $_"; + s/\s*\#.*//s; + s/\;\s*$//s; + s/\s+$//s; + s/\t+/ /s; # nix tabs + # Optimize the most common cases: + $_ = "v$1" + if m{^v?["']?([0-9_]+(\.[0-9_]+)*)["']?$}s + # like in $VERSION = "3.14159"; + or m{\$Revision:\s*([0-9_]+(?:\.[0-9_]+)*)\s*\$}s + # like in sprintf("%d.%02d", q$Revision: 4.13 $ =~ /(\d+)\.(\d+)/); + ; + + # Like in sprintf("%d.%s", map {s/_//g; $_} q$Name: release-0_55-public $ =~ /-(\d+)_([\d_]+)/) + $_ = sprintf("v%d.%s", + map {s/_//g; $_} + $1 =~ m/-(\d+)_([\d_]+)/) # snare just the numeric part + if m{\$Name:\s*([^\$]+)\$}s + ; + $version = $_; + DEBUG and print "Noting $version as version\n"; + last; + } + } + close(INPOD); + } + print "$name\t$version\t$file\n"; + return; + # End of callback! + }); + + $self->survey; +} + +#========================================================================== + +sub simplify_name { + my($self, $str) = @_; + + # Remove all path components + # XXX Why not just use basename()? -- SMB + + if ($^O eq 'MacOS') { $str =~ s{^.*:+}{}s } + else { $str =~ s{^.*/+}{}s } + + $self->_simplify_base($str); + return $str; +} + +#========================================================================== + +sub _simplify_base { # Internal method only + + # strip Perl's own extensions + $_[1] =~ s/\.(pod|pm|plx?)\z//i; + + # strip meaningless extensions on Win32 and OS/2 + $_[1] =~ s/\.(bat|exe|cmd)\z//i if $^O =~ /mswin|os2/i; + + # strip meaningless extensions on VMS + $_[1] =~ s/\.(com)\z//i if $^O eq 'VMS'; + + return; +} + +#========================================================================== + +sub _expand_inc { + my($self, $search_dirs) = @_; + + return unless $self->{'inc'}; + + if ($^O eq 'MacOS') { + push @$search_dirs, + grep $_ ne File::Spec->curdir, $self->_mac_whammy(@INC); + # Any other OSs need custom handling here? + } else { + push @$search_dirs, grep $_ ne File::Spec->curdir, @INC; + } + + $self->{'laborious'} = 0; # Since inc said to use INC + return; +} + +#========================================================================== + +sub _mac_whammy { # Tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS + my @them; + (undef,@them) = @_; + for $_ (@them) { + if ( $_ eq '.' ) { + $_ = ':'; + } elsif ( $_ =~ s|^((?:\.\./)+)|':' x (length($1)/3)|e ) { + $_ = ':'. $_; + } else { + $_ =~ s|^\./|:|; + } + } + return @them; +} + +#========================================================================== + +sub _limit_glob_to_limit_re { + my $self = $_[0]; + my $limit_glob = $self->{'limit_glob'} || return; + + my $limit_re = '^' . quotemeta($limit_glob) . '$'; + $limit_re =~ s/\\\?/./g; # glob "?" => "." + $limit_re =~ s/\\\*/.*?/g; # glob "*" => ".*?" + $limit_re =~ s/\.\*\?\$$//s; # final glob "*" => ".*?$" => "" + + $self->{'verbose'} and print "Turning limit_glob $limit_glob into re $limit_re\n"; + + # A common optimization: + if(!exists($self->{'dir_prefix'}) + and $limit_glob =~ m/^(?:\w+\:\:)+/s # like "File::*" or "File::Thing*" + # Optimize for sane and common cases (but not things like "*::File") + ) { + $self->{'dir_prefix'} = join "::", $limit_glob =~ m/^(?:\w+::)+/sg; + $self->{'verbose'} and print " and setting dir_prefix to $self->{'dir_prefix'}\n"; + } + + return $limit_re; +} + +#========================================================================== + +# contribution mostly from Tim Jenness + +sub find { + my($self, $pod, @search_dirs) = @_; + $self = $self->new unless ref $self; # tolerate being a class method + + # Check usage + Carp::carp 'Usage: \$self->find($podname, ...)' + unless defined $pod and length $pod; + + my $verbose = $self->verbose; + + # Split on :: and then join the name together using File::Spec + my @parts = split /::/, $pod; + $verbose and print "Chomping {$pod} => {@parts}\n"; + + #@search_dirs = File::Spec->curdir unless @search_dirs; + + if( $self->inc ) { + if( $^O eq 'MacOS' ) { + push @search_dirs, $self->_mac_whammy(@INC); + } else { + push @search_dirs, @INC; + } + + # Add location of pod documentation for perl man pages (eg perlfunc) + # This is a pod directory in the private install tree + #my $perlpoddir = File::Spec->catdir($Config::Config{'installprivlib'}, + # 'pod'); + #push (@search_dirs, $perlpoddir) + # if -d $perlpoddir; + + # Add location of binaries such as pod2text: + push @search_dirs, $Config::Config{'scriptdir'}; + # and if that's undef or q{} or nonexistent, we just ignore it later + } + + my %seen_dir; + Dir: + foreach my $dir ( @search_dirs ) { + next unless defined $dir and length $dir; + next if $seen_dir{$dir}; + $seen_dir{$dir} = 1; + unless(-d $dir) { + print "Directory $dir does not exist\n" if $verbose; + next Dir; + } + + print "Looking in directory $dir\n" if $verbose; + my $fullname = File::Spec->catfile( $dir, @parts ); + print "Filename is now $fullname\n" if $verbose; + + foreach my $ext ('', '.pod', '.pm', '.pl') { # possible extensions + my $fullext = $fullname . $ext; + if( -f $fullext and $self->contains_pod( $fullext ) ){ + print "FOUND: $fullext\n" if $verbose; + return $fullext; + } + } + my $subdir = File::Spec->catdir($dir,'pod'); + if(-d $subdir) { # slip in the ./pod dir too + $verbose and print "Noticing $subdir and stopping there...\n"; + $dir = $subdir; + redo Dir; + } + } + + return undef; +} + +#========================================================================== + +sub contains_pod { + my($self, $file) = @_; + my $verbose = $self->{'verbose'}; + + # check for one line of POD + $verbose > 1 and print " Scanning $file for pod...\n"; + unless( open(MAYBEPOD,"<", "$file") ) { + print "Error: $file is unreadable: $!\n"; + return undef; + } + + sleep($SLEEPY - 1) if $SLEEPY; + # avoid totally hogging the processor on OSs with poor process control + + local $_; + while( ) { + if(m/^=(head\d|pod|over|item)\b/s) { + close(MAYBEPOD) || die "Bizarre error closing $file: $!\nAborting"; + chomp; + $verbose > 1 and print " Found some pod ($_) in $file\n"; + return 1; + } + } + close(MAYBEPOD) || die "Bizarre error closing $file: $!\nAborting"; + $verbose > 1 and print " No POD in $file, skipping.\n"; + return 0; +} + +#========================================================================== + +sub _accessorize { # A simple-minded method-maker + shift; + no strict 'refs'; + foreach my $attrname (@_) { + *{caller() . '::' . $attrname} = sub { + use strict; + $Carp::CarpLevel = 1, Carp::croak( + "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)" + ) unless (@_ == 1 or @_ == 2) and ref $_[0]; + + # Read access: + return $_[0]->{$attrname} if @_ == 1; + + # Write access: + $_[0]->{$attrname} = $_[1]; + return $_[0]; # RETURNS MYSELF! + }; + } + # Ya know, they say accessories make the ensemble! + return; +} + +#========================================================================== +sub _state_as_string { + my $self = $_[0]; + return '' unless ref $self; + my @out = "{\n # State of $self ...\n"; + foreach my $k (sort keys %$self) { + push @out, " ", _esc($k), " => ", _esc($self->{$k}), ",\n"; + } + push @out, "}\n"; + my $x = join '', @out; + $x =~ s/^/#/mg; + return $x; +} + +sub _esc { + my $in = $_[0]; + return 'undef' unless defined $in; + $in =~ + s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> + <'\\x'.(unpack("H2",$1))>eg; + return qq{"$in"}; +} + +#========================================================================== + +run() unless caller; # run if "perl whatever/Search.pm" + +1; + +#========================================================================== + +__END__ + + +=head1 NAME + +Pod::Simple::Search - find POD documents in directory trees + +=head1 SYNOPSIS + + use Pod::Simple::Search; + my $name2path = Pod::Simple::Search->new->limit_glob('LWP::*')->survey; + print "Looky see what I found: ", + join(' ', sort keys %$name2path), "\n"; + + print "LWPUA docs = ", + Pod::Simple::Search->new->find('LWP::UserAgent') || "?", + "\n"; + +=head1 DESCRIPTION + +B is a class that you use for running searches +for Pod files. An object of this class has several attributes +(mostly options for controlling search options), and some methods +for searching based on those attributes. + +The way to use this class is to make a new object of this class, +set any options, and then call one of the search options +(probably C or C). The sections below discuss the +syntaxes for doing all that. + + +=head1 CONSTRUCTOR + +This class provides the one constructor, called C. +It takes no parameters: + + use Pod::Simple::Search; + my $search = Pod::Simple::Search->new; + +=head1 ACCESSORS + +This class defines several methods for setting (and, occasionally, +reading) the contents of an object. With two exceptions (discussed at +the end of this section), these attributes are just for controlling the +way searches are carried out. + +Note that each of these return C<$self> when you call them as +C<< $self->I >>. That's so that you can chain +together set-attribute calls like this: + + my $name2path = + Pod::Simple::Search->new + -> inc(0) -> verbose(1) -> callback(\&blab) + ->survey(@there); + +...which works exactly as if you'd done this: + + my $search = Pod::Simple::Search->new; + $search->inc(0); + $search->verbose(1); + $search->callback(\&blab); + my $name2path = $search->survey(@there); + +=over + +=item $search->inc( I ); + +This attribute, if set to a true value, means that searches should +implicitly add perl's I<@INC> paths. This +automatically considers paths specified in the C environment +as this is prepended to I<@INC> by the Perl interpreter itself. +This attribute's default value is B. If you want to search +only specific directories, set $self->inc(0) before calling +$inc->survey or $inc->find. + + +=item $search->verbose( I ); + +This attribute, if set to a nonzero positive value, will make searches output +(via C) notes about what they're doing as they do it. +This option may be useful for debugging a pod-related module. +This attribute's default value is zero, meaning that no C messages +are produced. (Setting verbose to 1 turns on some messages, and setting +it to 2 turns on even more messages, i.e., makes the following search(es) +even more verbose than 1 would make them.) + + +=item $search->limit_glob( I ); + +This option means that you want to limit the results just to items whose +podnames match the given glob/wildcard expression. For example, you +might limit your search to just "LWP::*", to search only for modules +starting with "LWP::*" (but not including the module "LWP" itself); or +you might limit your search to "LW*" to see only modules whose (full) +names begin with "LW"; or you might search for "*Find*" to search for +all modules with "Find" somewhere in their full name. (You can also use +"?" in a glob expression; so "DB?" will match "DBI" and "DBD".) + + +=item $search->callback( I<\&some_routine> ); + +This attribute means that every time this search sees a matching +Pod file, it should call this callback routine. The routine is called +with two parameters: the current file's filespec, and its pod name. +(For example: C<("/etc/perljunk/File/Crunk.pm", "File::Crunk")> would +be in C<@_>.) + +The callback routine's return value is not used for anything. + +This attribute's default value is false, meaning that no callback +is called. + +=item $search->laborious( I ); + +Unless you set this attribute to a true value, Pod::Search will +apply Perl-specific heuristics to find the correct module PODs quickly. +This attribute's default value is false. You won't normally need +to set this to true. + +Specifically: Turning on this option will disable the heuristics for +seeing only files with Perl-like extensions, omitting subdirectories +that are numeric but do I match the current Perl interpreter's +version ID, suppressing F as a module hierarchy name, etc. + + +=item $search->shadows( I ); + +Unless you set this attribute to a true value, Pod::Simple::Search will +consider only the first file of a given modulename as it looks thru the +specified directories; that is, with this option off, if +Pod::Simple::Search has seen a C already in this +search, then it won't bother looking at a C +later on in that search, because that file is merely a "shadow". But if +you turn on C<< $self->shadows(1) >>, then these "shadow" files are +inspected too, and are noted in the pathname2podname return hash. + +This attribute's default value is false; and normally you won't +need to turn it on. + + +=item $search->limit_re( I ); + +Setting this attribute (to a value that's a regexp) means that you want +to limit the results just to items whose podnames match the given +regexp. Normally this option is not needed, and the more efficient +C attribute is used instead. + + +=item $search->dir_prefix( I ); + +Setting this attribute to a string value means that the searches should +begin in the specified subdirectory name (like "Pod" or "File::Find", +also expressable as "File/Find"). For example, the search option +C<< $search->limit_glob("File::Find::R*") >> +is the same as the combination of the search options +C<< $search->limit_re("^File::Find::R") -> dir_prefix("File::Find") >>. + +Normally you don't need to know about the C option, but I +include it in case it might prove useful for someone somewhere. + +(Implementationally, searching with limit_glob ends up setting limit_re +and usually dir_prefix.) + + +=item $search->progress( I ); + +If you set a value for this attribute, the value is expected +to be an object (probably of a class that you define) that has a +C method and a C method. This is meant for reporting +progress during the search, if you don't want to use a simple +callback. + +Normally you don't need to know about the C option, but I +include it in case it might prove useful for someone somewhere. + +While a search is in progress, the progress object's C and +C methods are called like this: + + # Every time a file is being scanned for pod: + $progress->reach($count, "Scanning $file"); ++$count; + + # And then at the end of the search: + $progress->done("Noted $count Pod files total"); + +Internally, we often set this to an object of class +Pod::Simple::Progress. That class is probably undocumented, +but you may wish to look at its source. + + +=item $name2path = $self->name2path; + +This attribute is not a search parameter, but is used to report the +result of C method, as discussed in the next section. + +=item $path2name = $self->path2name; + +This attribute is not a search parameter, but is used to report the +result of C method, as discussed in the next section. + +=back + +=head1 MAIN SEARCH METHODS + +Once you've actually set any options you want (if any), you can go +ahead and use the following methods to search for Pod files +in particular ways. + + +=head2 C<< $search->survey( @directories ) >> + +The method C searches for POD documents in a given set of +files and/or directories. This runs the search according to the various +options set by the accessors above. (For example, if the C attribute +is on, as it is by default, then the perl @INC directories are implicitly +added to the list of directories (if any) that you specify.) + +The return value of C is two hashes: + +=over + +=item C + +A hash that maps from each pod-name to the filespec (like +"Stuff::Thing" => "/whatever/plib/Stuff/Thing.pm") + +=item C + +A hash that maps from each Pod filespec to its pod-name (like +"/whatever/plib/Stuff/Thing.pm" => "Stuff::Thing") + +=back + +Besides saving these hashes as the hashref attributes +C and C, calling this function also returns +these hashrefs. In list context, the return value of +C<< $search->survey >> is the list C<(\%name2path, \%path2name)>. +In scalar context, the return value is C<\%name2path>. +Or you can just call this in void context. + +Regardless of calling context, calling C saves +its results in its C and C attributes. + +E.g., when searching in F<$HOME/perl5lib>, the file +F<$HOME/perl5lib/MyModule.pm> would get the POD name I, +whereas F<$HOME/perl5lib/Myclass/Subclass.pm> would be +I. The name information can be used for POD +translators. + +Only text files containing at least one valid POD command are found. + +In verbose mode, a warning is printed if shadows are found (i.e., more +than one POD file with the same POD name is found, e.g. F in +different directories). This usually indicates duplicate occurrences of +modules in the I<@INC> search path, which is occasionally inadvertent +(but is often simply a case of a user's path dir having a more recent +version than the system's general path dirs in general.) + +The options to this argument is a list of either directories that are +searched recursively, or files. (Usually you wouldn't specify files, +but just dirs.) Or you can just specify an empty-list, as in +$name2path; with the +C option on, as it is by default, teh + +The POD names of files are the plain basenames with any Perl-like +extension (.pm, .pl, .pod) stripped, and path separators replaced by +C<::>'s. + +Calling Pod::Simple::Search->search(...) is short for +Pod::Simple::Search->new->search(...). That is, a throwaway object +with default attribute values is used. + + +=head2 C<< $search->simplify_name( $str ) >> + +The method B is equivalent to B, but also +strips Perl-like extensions (.pm, .pl, .pod) and extensions like +F<.bat>, F<.cmd> on Win32 and OS/2, or F<.com> on VMS, respectively. + + +=head2 C<< $search->find( $pod ) >> + +=head2 C<< $search->find( $pod, @search_dirs ) >> + +Returns the location of a Pod file, given a Pod/module/script name +(like "Foo::Bar" or "perlvar" or "perldoc"), and an idea of +what files/directories to look in. +It searches according to the various options set by the accessors above. +(For example, if the C attribute is on, as it is by default, then +the perl @INC directories are implicitly added to the list of +directories (if any) that you specify.) + +This returns the full path of the first occurrence to the file. +Package names (eg 'A::B') are automatically converted to directory +names in the selected directory. Additionally, '.pm', '.pl' and '.pod' +are automatically appended to the search as required. +(So, for example, under Unix, "A::B" is converted to "somedir/A/B.pm", +"somedir/A/B.pod", or "somedir/A/B.pl", as appropriate.) + +If no such Pod file is found, this method returns undef. + +If any of the given search directories contains a F subdirectory, +then it is searched. (That's how we manage to find F, +for example, which is usually in F in most Perl dists.) + +The C and C attributes influence the behavior of this +search; notably, C, if true, adds @INC I to the list of directories to search. + +It is common to simply say C<< $filename = Pod::Simple::Search-> new +->find("perlvar") >> so that just the @INC (well, and scriptdir) +directories are searched. (This happens because the C +attribute is true by default.) + +Calling Pod::Simple::Search->find(...) is short for +Pod::Simple::Search->new->find(...). That is, a throwaway object +with default attribute values is used. + + +=head2 C<< $self->contains_pod( $file ) >> + +Returns true if the supplied filename (not POD module) contains some Pod +documentation. + + +=head1 AUTHOR + +Sean M. Burke Esburke@cpan.orgE +borrowed code from +Marek Rouchal's Pod::Find, which in turn +heavily borrowed code from Nick Ing-Simmons' PodToHtml. + +Tim Jenness Et.jenness@jach.hawaii.eduE provided +C and C to Pod::Find. + +=head1 SEE ALSO + +L, L + +=cut + --- /dev/null 2009-01-07 14:55:01.000000000 +1100 +++ lib/Pod/Simple/HTMLBatch.pm 2009-01-07 13:40:15.000000000 +1100 @@ -0,0 +1,1342 @@ + +require 5; +package Pod::Simple::HTMLBatch; +use strict; +use vars qw( $VERSION $HTML_RENDER_CLASS $HTML_EXTENSION + $CSS $JAVASCRIPT $SLEEPY $SEARCH_CLASS @ISA +); +$VERSION = '3.02'; +@ISA = (); # Yup, we're NOT a subclass of Pod::Simple::HTML! + +# TODO: nocontents stylesheets. Strike some of the color variations? + +use Pod::Simple::HTML (); +BEGIN {*esc = \&Pod::Simple::HTML::esc } +use File::Spec (); +use UNIVERSAL (); + # "Isn't the Universe an amazing place? I wouldn't live anywhere else!" + +use Pod::Simple::Search; +$SEARCH_CLASS ||= 'Pod::Simple::Search'; + +BEGIN { + if(defined &DEBUG) { } # no-op + elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG } + else { *DEBUG = sub () {0}; } +} + +$SLEEPY = 1 if !defined $SLEEPY and $^O =~ /mswin|mac/i; +# flag to occasionally sleep for $SLEEPY - 1 seconds. + +$HTML_RENDER_CLASS ||= "Pod::Simple::HTML"; + +# +# Methods beginning with "_" are particularly internal and possibly ugly. +# + +Pod::Simple::_accessorize( __PACKAGE__, + 'verbose', # how verbose to be during batch conversion + 'html_render_class', # what class to use to render + 'contents_file', # If set, should be the name of a file (in current directory) + # to write the list of all modules to + 'index', # will set $htmlpage->index(...) to this (true or false) + 'progress', # progress object + 'contents_page_start', 'contents_page_end', + + 'css_flurry', '_css_wad', 'javascript_flurry', '_javascript_wad', + 'no_contents_links', # set to true to suppress automatic adding of << links. + '_contents', +); + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +# Just so we can run from the command line more easily +sub go { + @ARGV == 2 or die sprintf( + "Usage: perl -M%s -e %s:go indirs outdir\n (or use \"\@INC\" for indirs)\n", + __PACKAGE__, __PACKAGE__, + ); + + if(defined($ARGV[1]) and length($ARGV[1])) { + my $d = $ARGV[1]; + -e $d or die "I see no output directory named \"$d\"\nAborting"; + -d $d or die "But \"$d\" isn't a directory!\nAborting"; + -w $d or die "Directory \"$d\" isn't writeable!\nAborting"; + } + + __PACKAGE__->batch_convert(@ARGV); +} +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + +sub new { + my $new = bless {}, ref($_[0]) || $_[0]; + $new->html_render_class($HTML_RENDER_CLASS); + $new->verbose(1 + DEBUG); + $new->_contents([]); + + $new->index(1); + + $new-> _css_wad([]); $new->css_flurry(1); + $new->_javascript_wad([]); $new->javascript_flurry(1); + + $new->contents_file( + 'index' . ($HTML_EXTENSION || $Pod::Simple::HTML::HTML_EXTENSION) + ); + + $new->contents_page_start( join "\n", grep $_, + $Pod::Simple::HTML::Doctype_decl, + "", + "Perl Documentation", + $Pod::Simple::HTML::Content_decl, + "", + "\n\n

Perl Documentation

\n" + ); # override if you need a different title + + + $new->contents_page_end( sprintf( + "\n\n

Generated by %s v%s under Perl v%s\n
At %s GMT, which is %s local time.

\n\n\n", + esc( + ref($new), + eval {$new->VERSION} || $VERSION, + $], scalar(gmtime), scalar(localtime), + ))); + + return $new; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub muse { + my $self = shift; + if($self->verbose) { + print 'T+', int(time() - $self->{'_batch_start_time'}), "s: ", @_, "\n"; + } + return 1; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub batch_convert { + my($self, $dirs, $outdir) = @_; + $self ||= __PACKAGE__; # tolerate being called as an optionless function + $self = $self->new unless ref $self; # tolerate being used as a class method + + if(!defined($dirs) or $dirs eq '' or $dirs eq '@INC' ) { + $dirs = ''; + } elsif(ref $dirs) { + # OK, it's an explicit set of dirs to scan, specified as an arrayref. + } else { + # OK, it's an explicit set of dirs to scan, specified as a + # string like "/thing:/also:/whatever/perl" (":"-delim, as usual) + # or, under MSWin, like "c:/thing;d:/also;c:/whatever/perl" (";"-delim!) + require Config; + my $ps = quotemeta( $Config::Config{'path_sep'} || ":" ); + $dirs = [ grep length($_), split qr/$ps/, $dirs ]; + } + + $outdir = $self->filespecsys->curdir + unless defined $outdir and length $outdir; + + $self->_batch_convert_main($dirs, $outdir); +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _batch_convert_main { + my($self, $dirs, $outdir) = @_; + # $dirs is either false, or an arrayref. + # $outdir is a pathspec. + + $self->{'_batch_start_time'} ||= time(); + + $self->muse( "= ", scalar(localtime) ); + $self->muse( "Starting batch conversion to \"$outdir\"" ); + + my $progress = $self->progress; + if(!$progress and $self->verbose > 0 and $self->verbose() <= 5) { + require Pod::Simple::Progress; + $progress = Pod::Simple::Progress->new( + ($self->verbose < 2) ? () # Default omission-delay + : ($self->verbose == 2) ? 1 # Reduce the omission-delay + : 0 # Eliminate the omission-delay + ); + $self->progress($progress); + } + + if($dirs) { + $self->muse(scalar(@$dirs), " dirs to scan: @$dirs"); + } else { + $self->muse("Scanning \@INC. This could take a minute or two."); + } + my $mod2path = $self->find_all_pods($dirs ? $dirs : ()); + $self->muse("Done scanning."); + + my $total = keys %$mod2path; + unless($total) { + $self->muse("No pod found. Aborting batch conversion.\n"); + return $self; + } + + $progress and $progress->goal($total); + $self->muse("Now converting pod files to HTML.", + ($total > 25) ? " This will take a while more." : () + ); + + $self->_spray_css( $outdir ); + $self->_spray_javascript( $outdir ); + + $self->_do_all_batch_conversions($mod2path, $outdir); + + $progress and $progress->done(sprintf ( + "Done converting %d files.", $self->{"__batch_conv_page_count"} + )); + return $self->_batch_convert_finish($outdir); + return $self; +} + + +sub _do_all_batch_conversions { + my($self, $mod2path, $outdir) = @_; + $self->{"__batch_conv_page_count"} = 0; + + foreach my $module (sort {lc($a) cmp lc($b)} keys %$mod2path) { + $self->_do_one_batch_conversion($module, $mod2path, $outdir); + sleep($SLEEPY - 1) if $SLEEPY; + } + + return; +} + +sub _batch_convert_finish { + my($self, $outdir) = @_; + $self->write_contents_file($outdir); + $self->muse("Done with batch conversion. $$self{'__batch_conv_page_count'} files done."); + $self->muse( "= ", scalar(localtime) ); + $self->progress and $self->progress->done("All done!"); + return; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _do_one_batch_conversion { + my($self, $module, $mod2path, $outdir, $outfile) = @_; + + my $retval; + my $total = scalar keys %$mod2path; + my $infile = $mod2path->{$module}; + my @namelets = grep m/\S/, split "::", $module; + # this can stick around in the contents LoL + my $depth = scalar @namelets; + die "Contentless thingie?! $module $infile" unless @namelets; #sanity + + $outfile ||= do { + my @n = @namelets; + $n[-1] .= $HTML_EXTENSION || $Pod::Simple::HTML::HTML_EXTENSION; + $self->filespecsys->catfile( $outdir, @n ); + }; + + my $progress = $self->progress; + + my $page = $self->html_render_class->new; + if(DEBUG > 5) { + $self->muse($self->{"__batch_conv_page_count"} + 1, "/$total: ", + ref($page), " render ($depth) $module => $outfile"); + } elsif(DEBUG > 2) { + $self->muse($self->{"__batch_conv_page_count"} + 1, "/$total: $module => $outfile") + } + + # Give each class a chance to init the converter: + + $page->batch_mode_page_object_init($self, $module, $infile, $outfile, $depth) + if $page->can('batch_mode_page_object_init'); + $self->batch_mode_page_object_init($page, $module, $infile, $outfile, $depth) + if $self->can('batch_mode_page_object_init'); + + # Now get busy... + $self->makepath($outdir => \@namelets); + + $progress and $progress->reach($self->{"__batch_conv_page_count"}, "Rendering $module"); + + if( $retval = $page->parse_from_file($infile, $outfile) ) { + ++ $self->{"__batch_conv_page_count"} ; + $self->note_for_contents_file( \@namelets, $infile, $outfile ); + } else { + $self->muse("Odd, parse_from_file(\"$infile\", \"$outfile\") returned false."); + } + + $page->batch_mode_page_object_kill($self, $module, $infile, $outfile, $depth) + if $page->can('batch_mode_page_object_kill'); + # The following isn't a typo. Note that it switches $self and $page. + $self->batch_mode_page_object_kill($page, $module, $infile, $outfile, $depth) + if $self->can('batch_mode_page_object_kill'); + + DEBUG > 4 and printf "%s %sb < $infile %s %sb\n", + $outfile, -s $outfile, $infile, -s $infile + ; + + undef($page); + return $retval; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +sub filespecsys { $_[0]{'_filespecsys'} || 'File::Spec' } + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub note_for_contents_file { + my($self, $namelets, $infile, $outfile) = @_; + + # I think the infile and outfile parts are never used. -- SMB + # But it's handy to have them around for debugging. + + if( $self->contents_file ) { + my $c = $self->_contents(); + push @$c, + [ join("::", @$namelets), $infile, $outfile, $namelets ] + # 0 1 2 3 + ; + DEBUG > 3 and print "Noting @$c[-1]\n"; + } + return; +} + +#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- + +sub write_contents_file { + my($self, $outdir) = @_; + my $outfile = $self->_contents_filespec($outdir) || return; + + $self->muse("Preparing list of modules for ToC"); + + my($toplevel, # maps toplevelbit => [all submodules] + $toplevel_form_freq, # ends up being 'foo' => 'Foo' + ) = $self->_prep_contents_breakdown; + + my $Contents = eval { $self->_wopen($outfile) }; + if( $Contents ) { + $self->muse( "Writing contents file $outfile" ); + } else { + warn "Couldn't write-open contents file $outfile: $!\nAbort writing to $outfile at all"; + return; + } + + $self->_write_contents_start( $Contents, $outfile, ); + $self->_write_contents_middle( $Contents, $outfile, $toplevel, $toplevel_form_freq ); + $self->_write_contents_end( $Contents, $outfile, ); + return $outfile; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _write_contents_start { + my($self, $Contents, $outfile) = @_; + my $starter = $self->contents_page_start || ''; + + { + my $css_wad = $self->_css_wad_to_markup(1); + if( $css_wad ) { + $starter =~ s{()}{\n$css_wad\n$1}i; # otherwise nevermind + } + + my $javascript_wad = $self->_javascript_wad_to_markup(1); + if( $javascript_wad ) { + $starter =~ s{()}{\n$javascript_wad\n$1}i; # otherwise nevermind + } + } + + unless(print $Contents $starter, "
\n", + $self->contents_page_end || '', + ) { + warn "Couldn't write to $outfile: $!"; + } + close($Contents) or warn "Couldn't close $outfile: $!"; + return 1; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _prep_contents_breakdown { + my($self) = @_; + my $contents = $self->_contents; + my %toplevel; # maps lctoplevelbit => [all submodules] + my %toplevel_form_freq; # ends up being 'foo' => 'Foo' + # (mapping anycase forms to most freq form) + + foreach my $entry (@$contents) { + my $toplevel = + $entry->[0] =~ m/^perl\w*$/ ? 'perl_core_docs' + # group all the perlwhatever docs together + : $entry->[3][0] # normal case + ; + ++$toplevel_form_freq{ lc $toplevel }{ $toplevel }; + push @{ $toplevel{ lc $toplevel } }, $entry; + push @$entry, lc($entry->[0]); # add a sort-order key to the end + } + + foreach my $toplevel (sort keys %toplevel) { + my $fgroup = $toplevel_form_freq{$toplevel}; + $toplevel_form_freq{$toplevel} = + ( + sort { $fgroup->{$b} <=> $fgroup->{$a} or $a cmp $b } + keys %$fgroup + # This hash is extremely unlikely to have more than 4 members, so this + # sort isn't so very wasteful + )[0]; + } + + return(\%toplevel, \%toplevel_form_freq) if wantarray; + return \%toplevel; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _contents_filespec { + my($self, $outdir) = @_; + my $outfile = $self->contents_file; + return unless $outfile; + return $self->filespecsys->catfile( $outdir, $outfile ); +} + +#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- + +sub makepath { + my($self, $outdir, $namelets) = @_; + return unless @$namelets > 1; + for my $i (0 .. ($#$namelets - 1)) { + my $dir = $self->filespecsys->catdir( $outdir, @$namelets[0 .. $i] ); + if(-e $dir) { + die "$dir exists but not as a directory!?" unless -d $dir; + next; + } + DEBUG > 3 and print " Making $dir\n"; + mkdir $dir, 0777 + or die "Can't mkdir $dir: $!\nAborting" + ; + } + return; +} + +#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- + +sub batch_mode_page_object_init { + my $self = shift; + my($page, $module, $infile, $outfile, $depth) = @_; + + # TODO: any further options to percolate onto this new object here? + + $page->default_title($module); + $page->index( $self->index ); + + $page->html_css( $self-> _css_wad_to_markup($depth) ); + $page->html_javascript( $self->_javascript_wad_to_markup($depth) ); + + $self->add_header_backlink($page, $module, $infile, $outfile, $depth); + $self->add_footer_backlink($page, $module, $infile, $outfile, $depth); + + + return $self; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub add_header_backlink { + my $self = shift; + return if $self->no_contents_links; + my($page, $module, $infile, $outfile, $depth) = @_; + $page->html_header_after_title( join '', + $page->html_header_after_title || '', + + qq[

<<

\n], + ) + if $self->contents_file + ; + return; +} + +sub add_footer_backlink { + my $self = shift; + return if $self->no_contents_links; + my($page, $module, $infile, $outfile, $depth) = @_; + $page->html_footer( join '', + qq[

<<

\n], + + $page->html_footer || '', + ) + if $self->contents_file + ; + return; +} + +sub url_up_to_contents { + my($self, $depth) = @_; + --$depth; + return join '/', ('..') x $depth, esc($self->contents_file); +} + +#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- + +sub find_all_pods { + my($self, $dirs) = @_; + # You can override find_all_pods in a subclass if you want to + # do extra filtering or whatnot. But for the moment, we just + # pass to modnames2paths: + return $self->modnames2paths($dirs); +} + +#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- + +sub modnames2paths { # return a hashref mapping modulenames => paths + my($self, $dirs) = @_; + + my $m2p; + { + my $search = $SEARCH_CLASS->new; + DEBUG and print "Searching via $search\n"; + $search->verbose(1) if DEBUG > 10; + $search->progress( $self->progress->copy->goal(0) ) if $self->progress; + $search->shadows(0); # don't bother noting shadowed files + $search->inc( $dirs ? 0 : 1 ); + $search->survey( $dirs ? @$dirs : () ); + $m2p = $search->name2path; + die "What, no name2path?!" unless $m2p; + } + + $self->muse("That's odd... no modules found!") unless keys %$m2p; + if( DEBUG > 4 ) { + print "Modules found (name => path):\n"; + foreach my $m (sort {lc($a) cmp lc($b)} keys %$m2p) { + print " $m $$m2p{$m}\n"; + } + print "(total ", scalar(keys %$m2p), ")\n\n"; + } elsif( DEBUG ) { + print "Found ", scalar(keys %$m2p), " modules.\n"; + } + $self->muse( "Found ", scalar(keys %$m2p), " modules." ); + + # return the Foo::Bar => /whatever/Foo/Bar.pod|pm hashref + return $m2p; +} + +#=========================================================================== + +sub _wopen { + # this is abstracted out so that the daemon class can override it + my($self, $outpath) = @_; + require Symbol; + my $out_fh = Symbol::gensym(); + DEBUG > 5 and print "Write-opening to $outpath\n"; + return $out_fh if open($out_fh, ">", "$outpath"); + require Carp; + Carp::croak("Can't write-open $outpath: $!"); +} + +#========================================================================== + +sub add_css { + my($self, $url, $is_default, $name, $content_type, $media, $_code) = @_; + return unless $url; + unless($name) { + # cook up a reasonable name based on the URL + $name = $url; + if( $name !~ m/\?/ and $name =~ m{([^/]+)$}s ) { + $name = $1; + $name =~ s/\.css//i; + } + } + $media ||= 'all'; + $content_type ||= 'text/css'; + + my $bunch = [$url, $name, $content_type, $media, $_code]; + if($is_default) { unshift @{ $self->_css_wad }, $bunch } + else { push @{ $self->_css_wad }, $bunch } + return; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _spray_css { + my($self, $outdir) = @_; + + return unless $self->css_flurry(); + $self->_gen_css_wad(); + + my $lol = $self->_css_wad; + foreach my $chunk (@$lol) { + my $url = $chunk->[0]; + my $outfile; + if( ref($chunk->[-1]) and $url =~ m{^(_[-a-z0-9_]+\.css$)} ) { + $outfile = $self->filespecsys->catfile( $outdir, "$1" ); + DEBUG > 5 and print "Noting $$chunk[0] as a file I'll create.\n"; + } else { + DEBUG > 5 and print "OK, noting $$chunk[0] as an external CSS.\n"; + # Requires no further attention. + next; + } + + #$self->muse( "Writing autogenerated CSS file $outfile" ); + my $Cssout = $self->_wopen($outfile); + print $Cssout ${$chunk->[-1]} + or warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all"; + close($Cssout); + DEBUG > 5 and print "Wrote $outfile\n"; + } + + return; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _css_wad_to_markup { + my($self, $depth) = @_; + + my @css = @{ $self->_css_wad || return '' }; + return '' unless @css; + + my $rel = 'stylesheet'; + my $out = ''; + + --$depth; + my $uplink = $depth ? ('../' x $depth) : ''; + + foreach my $chunk (@css) { + next unless $chunk and @$chunk; + + my( $url1, $url2, $title, $type, $media) = ( + $self->_maybe_uplink( $chunk->[0], $uplink ), + esc(grep !ref($_), @$chunk) + ); + + $out .= qq{\n}; + + $rel = 'alternate stylesheet'; # alternates = all non-first iterations + } + return $out; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +sub _maybe_uplink { + # if the given URL looks relative, return the given uplink string -- + # otherwise return emptystring + my($self, $url, $uplink) = @_; + ($url =~ m{^\./} or $url !~ m{[/\:]} ) + ? $uplink + : '' + # qualify it, if/as needed +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +sub _gen_css_wad { + my $self = $_[0]; + my $css_template = $self->_css_template; + foreach my $variation ( + + # Commented out for sake of concision: + # + # 011n=black_with_red_on_white + # 001n=black_with_yellow_on_white + # 101n=black_with_green_on_white + # 110=white_with_yellow_on_black + # 010=white_with_green_on_black + # 011=white_with_blue_on_black + # 100=white_with_red_on_black + + qw[ + 110n=black_with_blue_on_white + 010n=black_with_magenta_on_white + 100n=black_with_cyan_on_white + + 101=white_with_purple_on_black + 001=white_with_navy_blue_on_black + + 010a=grey_with_green_on_black + 010b=white_with_green_on_grey + 101an=black_with_green_on_grey + 101bn=grey_with_green_on_white + ]) { + + my $outname = $variation; + my($flipmode, @swap) = ( ($4 || ''), $1,$2,$3) + if $outname =~ s/^([012])([012])([[012])([a-z]*)=?//s; + @swap = () if '010' eq join '', @swap; # 010 is a swop-no-op! + + my $this_css = + "/* This file is autogenerated. Do not edit. $variation */\n\n" + . $css_template; + + # Only look at three-digitty colors, for now at least. + if( $flipmode =~ m/n/ ) { + $this_css =~ s/(#[0-9a-fA-F]{3})\b/_color_negate($1)/eg; + $this_css =~ s/\bthin\b/medium/g; + } + $this_css =~ s<#([0-9a-fA-F])([0-9a-fA-F])([0-9a-fA-F])\b> + < join '', '#', ($1,$2,$3)[@swap] >eg if @swap; + + if( $flipmode =~ m/a/) + { $this_css =~ s/#fff\b/#999/gi } # black -> dark grey + elsif($flipmode =~ m/b/) + { $this_css =~ s/#000\b/#666/gi } # white -> light grey + + my $name = $outname; + $name =~ tr/-_/ /; + $self->add_css( "_$outname.css", 0, $name, 0, 0, \$this_css); + } + + # Now a few indexless variations: + foreach my $variation (qw[ + black_with_blue_on_white white_with_purple_on_black + white_with_green_on_grey grey_with_green_on_white + ]) { + my $outname = "indexless_$variation"; + my $this_css = join "\n", + "/* This file is autogenerated. Do not edit. $outname */\n", + "\@import url(\"./_$variation.css\");", + ".indexgroup { display: none; }", + "\n", + ; + my $name = $outname; + $name =~ tr/-_/ /; + $self->add_css( "_$outname.css", 0, $name, 0, 0, \$this_css); + } + + return; +} + +sub _color_negate { + my $x = lc $_[0]; + $x =~ tr[0123456789abcdef] + [fedcba9876543210]; + return $x; +} + +#=========================================================================== + +sub add_javascript { + my($self, $url, $content_type, $_code) = @_; + return unless $url; + push @{ $self->_javascript_wad }, [ + $url, $content_type || 'text/javascript', $_code + ]; + return; +} + +sub _spray_javascript { + my($self, $outdir) = @_; + return unless $self->javascript_flurry(); + $self->_gen_javascript_wad(); + + my $lol = $self->_javascript_wad; + foreach my $script (@$lol) { + my $url = $script->[0]; + my $outfile; + + if( ref($script->[-1]) and $url =~ m{^(_[-a-z0-9_]+\.js$)} ) { + $outfile = $self->filespecsys->catfile( $outdir, "$1" ); + DEBUG > 5 and print "Noting $$script[0] as a file I'll create.\n"; + } else { + DEBUG > 5 and print "OK, noting $$script[0] as an external JavaScript.\n"; + next; + } + + #$self->muse( "Writing JavaScript file $outfile" ); + my $Jsout = $self->_wopen($outfile); + + print $Jsout ${$script->[-1]} + or warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all"; + close($Jsout); + DEBUG > 5 and print "Wrote $outfile\n"; + } + + return; +} + +sub _gen_javascript_wad { + my $self = $_[0]; + my $js_code = $self->_javascript || return; + $self->add_javascript( "_podly.js", 0, \$js_code); + return; +} + +sub _javascript_wad_to_markup { + my($self, $depth) = @_; + + my @scripts = @{ $self->_javascript_wad || return '' }; + return '' unless @scripts; + + my $out = ''; + + --$depth; + my $uplink = $depth ? ('../' x $depth) : ''; + + foreach my $s (@scripts) { + next unless $s and @$s; + + my( $url1, $url2, $type, $media) = ( + $self->_maybe_uplink( $s->[0], $uplink ), + esc(grep !ref($_), @$s) + ); + + $out .= qq{\n}; + } + return $out; +} + +#=========================================================================== + +sub _css_template { return $CSS } +sub _javascript { return $JAVASCRIPT } + +$CSS = <<'EOCSS'; +/* For accessibility reasons, never specify text sizes in px/pt/pc/in/cm/mm */ + +@media all { .hide { display: none; } } + +@media print { + .noprint, div.indexgroup, .backlinktop, .backlinkbottom { display: none } + + * { + border-color: black !important; + color: black !important; + background-color: transparent !important; + background-image: none !important; + } + + dl.superindex > dd { + word-spacing: .6em; + } +} + +@media aural, braille, embossed { + div.indexgroup { display: none; } /* Too noisy, don't you think? */ + dl.superindex > dt:before { content: "Group "; } + dl.superindex > dt:after { content: " contains:"; } + .backlinktop a:before { content: "Back to contents"; } + .backlinkbottom a:before { content: "Back to contents"; } +} + +@media aural { + dl.superindex > dt { pause-before: 600ms; } +} + +@media screen, tty, tv, projection { + .noscreen { display: none; } + + a:link { color: #7070ff; text-decoration: underline; } + a:visited { color: #e030ff; text-decoration: underline; } + a:active { color: #800000; text-decoration: underline; } + body.contentspage a { text-decoration: none; } + a.u { color: #fff !important; text-decoration: none; } + + body.pod { + margin: 0 5px; + color: #fff; + background-color: #000; + } + + body.pod h1, body.pod h2, body.pod h3, body.pod h4 { + font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif; + font-weight: normal; + margin-top: 1.2em; + margin-bottom: .1em; + border-top: thin solid transparent; + /* margin-left: -5px; border-left: 2px #7070ff solid; padding-left: 3px; */ + } + + body.pod h1 { border-top-color: #0a0; } + body.pod h2 { border-top-color: #080; } + body.pod h3 { border-top-color: #040; } + body.pod h4 { border-top-color: #010; } + + p.backlinktop + h1 { border-top: none; margin-top: 0em; } + p.backlinktop + h2 { border-top: none; margin-top: 0em; } + p.backlinktop + h3 { border-top: none; margin-top: 0em; } + p.backlinktop + h4 { border-top: none; margin-top: 0em; } + + body.pod dt { + font-size: 105%; /* just a wee bit more than normal */ + } + + .indexgroup { font-size: 80%; } + + .backlinktop, .backlinkbottom { + margin-left: -5px; + margin-right: -5px; + background-color: #040; + border-top: thin solid #050; + border-bottom: thin solid #050; + } + + .backlinktop a, .backlinkbottom a { + text-decoration: none; + color: #080; + background-color: #000; + border: thin solid #0d0; + } + .backlinkbottom { margin-bottom: 0; padding-bottom: 0; } + .backlinktop { margin-top: 0; padding-top: 0; } + + body.contentspage { + color: #fff; + background-color: #000; + } + + body.contentspage h1 { + color: #0d0; + margin-left: 1em; + margin-right: 1em; + text-indent: -.9em; + font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif; + font-weight: normal; + border-top: thin solid #fff; + border-bottom: thin solid #fff; + text-align: center; + } + + dl.superindex > dt { + font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif; + font-weight: normal; + font-size: 90%; + margin-top: .45em; + /* margin-bottom: -.15em; */ + } + dl.superindex > dd { + word-spacing: .6em; /* most important rule here! */ + } + dl.superindex > a:link { + text-decoration: none; + color: #fff; + } + + .contentsfooty { + border-top: thin solid #999; + font-size: 90%; + } + +} + +/* The End */ + +EOCSS + +#========================================================================== + +$JAVASCRIPT = <<'EOJAVASCRIPT'; + +// From http://www.alistapart.com/articles/alternate/ + +function setActiveStyleSheet(title) { + var i, a, main; + for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) { + if(a.getAttribute("rel").indexOf("style") != -1 && a.getAttribute("title")) { + a.disabled = true; + if(a.getAttribute("title") == title) a.disabled = false; + } + } +} + +function getActiveStyleSheet() { + var i, a; + for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) { + if( a.getAttribute("rel").indexOf("style") != -1 + && a.getAttribute("title") + && !a.disabled + ) return a.getAttribute("title"); + } + return null; +} + +function getPreferredStyleSheet() { + var i, a; + for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) { + if( a.getAttribute("rel").indexOf("style") != -1 + && a.getAttribute("rel").indexOf("alt") == -1 + && a.getAttribute("title") + ) return a.getAttribute("title"); + } + return null; +} + +function createCookie(name,value,days) { + if (days) { + var date = new Date(); + date.setTime(date.getTime()+(days*24*60*60*1000)); + var expires = "; expires="+date.toGMTString(); + } + else expires = ""; + document.cookie = name+"="+value+expires+"; path=/"; +} + +function readCookie(name) { + var nameEQ = name + "="; + var ca = document.cookie.split(';'); + for(var i=0 ; i < ca.length ; i++) { + var c = ca[i]; + while (c.charAt(0)==' ') c = c.substring(1,c.length); + if (c.indexOf(nameEQ) == 0) return c.substring(nameEQ.length,c.length); + } + return null; +} + +window.onload = function(e) { + var cookie = readCookie("style"); + var title = cookie ? cookie : getPreferredStyleSheet(); + setActiveStyleSheet(title); +} + +window.onunload = function(e) { + var title = getActiveStyleSheet(); + createCookie("style", title, 365); +} + +var cookie = readCookie("style"); +var title = cookie ? cookie : getPreferredStyleSheet(); +setActiveStyleSheet(title); + +// The End + +EOJAVASCRIPT + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +1; +__END__ + + +=head1 NAME + +Pod::Simple::HTMLBatch - convert several Pod files to several HTML files + +=head1 SYNOPSIS + + perl -MPod::Simple::HTMLBatch -e 'Pod::Simple::HTMLBatch::go' in out + + +=head1 DESCRIPTION + +This module is used for running batch-conversions of a lot of HTML +documents + +This class is NOT a subclass of Pod::Simple::HTML +(nor of bad old Pod::Html) -- although it uses +Pod::Simple::HTML for doing the conversion of each document. + +The normal use of this class is like so: + + use Pod::Simple::HTMLBatch; + my $batchconv = Pod::Simple::HTMLBatch->new; + $batchconv->some_option( some_value ); + $batchconv->some_other_option( some_other_value ); + $batchconv->batch_convert( \@search_dirs, $output_dir ); + +=head2 FROM THE COMMAND LINE + +Note that this class also provides +(but does not export) the function Pod::Simple::HTMLBatch::go. +This is basically just a shortcut for C<< +Pod::Simple::HTMLBatch->batch_convert(@ARGV) >>. +It's meant to be handy for calling from the command line. + +However, the shortcut requires that you specify exactly two command-line +arguments, C and C. + +Example: + + % mkdir out_html + % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go @INC out_html + (to convert the pod from Perl's @INC + files under the directory ../htmlversion) + +(Note that the command line there contains a literal atsign-I-N-C. This +is handled as a special case by batch_convert, in order to save you having +to enter the odd-looking "" as the first command-line parameter when you +mean "just use whatever's in @INC".) + +Example: + + % mkdir ../seekrut + % chmod og-rx ../seekrut + % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go . ../htmlversion + (to convert the pod under the current dir into HTML + files under the directory ../htmlversion) + +Example: + + % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go happydocs . + (to convert all pod from happydocs into the current directory) + + + +=head1 MAIN METHODS + +=over + +=item $batchconv = Pod::Simple::HTMLBatch->new; + +This TODO + + +=item $batchconv->batch_convert( I, I ); + +this TODO + +=item $batchconv->batch_convert( undef , ...); + +=item $batchconv->batch_convert( q{@INC}, ...); + +These two values for I specify that the normal Perl @INC + +=item $batchconv->batch_convert( \@dirs , ...); + +This specifies that the input directories are the items in +the arrayref C<\@dirs>. + +=item $batchconv->batch_convert( "somedir" , ...); + +This specifies that the director "somedir" is the input. +(This can be an absolute or relative path, it doesn't matter.) + +A common value you might want would be just "." for the current +directory: + + $batchconv->batch_convert( "." , ...); + + +=item $batchconv->batch_convert( 'somedir:someother:also' , ...); + +This specifies that you want the dirs "somedir", "somother", and "also" +scanned, just as if you'd passed the arrayref +C<[qw( somedir someother also)]>. Note that a ":"-separator is normal +under Unix, but Under MSWin, you'll need C<'somedir;someother;also'> +instead, since the pathsep on MSWin is ";" instead of ":". (And +I is because ":" often comes up in paths, like +C<"c:/perl/lib">.) + +(Exactly what separator character should be used, is gotten from +C<$Config::Config{'path_sep'}>, via the L module.) + +=item $batchconv->batch_convert( ... , undef ); + +This specifies that you want the HTML output to go into the current +directory. + +(Note that a missing or undefined value means a different thing in +the first slot than in the second. That's so that C +with no arguments (or undef arguments) means "go from @INC, into +the current directory.) + +=item $batchconv->batch_convert( ... , 'somedir' ); + +This specifies that you want the HTML output to go into the +directory 'somedir'. +(This can be an absolute or relative path, it doesn't matter.) + +=back + + +Note that you can also call C as a class method, +like so: + + Pod::Simple::HTMLBatch->batch_convert( ... ); + +That is just short for this: + + Pod::Simple::HTMLBatch-> new-> batch_convert(...); + +That is, it runs a conversion with default options, for +whatever inputdirs and output dir you specify. + + +=head2 ACCESSOR METHODS + +The following are all accessor methods -- that is, they don't do anything +on their own, but just alter the contents of the conversion object, +which comprises the options for this particular batch conversion. + +We show the "put" form of the accessors below (i.e., the syntax you use +for setting the accessor to a specific value). But you can also +call each method with no parameters to get its current value. For +example, C<< $self->contents_file() >> returns the current value of +the contents_file attribute. + +=over + + +=item $batchconv->verbose( I ); + +This controls how verbose to be during batch conversion, as far as +notes to STDOUT (or whatever is C
\n" ) { + warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all"; + close($Contents); + return 0; + } + return 1; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _write_contents_middle { + my($self, $Contents, $outfile, $toplevel2submodules, $toplevel_form_freq) = @_; + + foreach my $t (sort keys %$toplevel2submodules) { + my @downlines = sort {$a->[-1] cmp $b->[-1]} + @{ $toplevel2submodules->{$t} }; + + printf $Contents qq[
%s
\n
\n], + esc( $t, $toplevel_form_freq->{$t} ) + ; + + my($path, $name); + foreach my $e (@downlines) { + $name = $e->[0]; + $path = join( "/", '.', esc( @{$e->[3]} ) ) + . ($HTML_EXTENSION || $Pod::Simple::HTML::HTML_EXTENSION); + print $Contents qq{ }, esc($name), "  \n"; + } + print $Contents "
\n\n"; + } + return 1; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _write_contents_end { + my($self, $Contents, $outfile) = @_; + unless( + print $Contents "