Index: MANIFEST
===================================================================
--- MANIFEST (revision 35086)
+++ MANIFEST (working copy)
@@ -2796,14 +2796,18 @@
lib/Pod/Simple/DumpAsText.pm [devel]
lib/Pod/Simple/DumpAsXML.pm [devel]
lib/Pod/Simple/HTML.pm [devel]
+lib/Pod/Simple/HTMLBatch.pm [devel]
+lib/Pod/Simple/HTMLLegacy.pm [devel]
lib/Pod/Simple/LinkSection.pm [devel]
lib/Pod/Simple/Methody.pm [devel]
+lib/Pod/Simple/Progress.pm [devel]
lib/Pod/Simple/PullParser.pm [devel]
lib/Pod/Simple/PullParserEndToken.pm [devel]
lib/Pod/Simple/PullParserStartToken.pm [devel]
lib/Pod/Simple/PullParserTextToken.pm [devel]
lib/Pod/Simple/PullParserToken.pm [devel]
lib/Pod/Simple/RTF.pm [devel]
+lib/Pod/Simple/Search.pm [devel]
lib/Pod/Simple/SimpleTree.pm [devel]
lib/Pod/Simple/Text.pm [devel]
lib/Pod/Simple/TextContent.pm [devel]
@@ -2811,6 +2815,7 @@
lib/Pod/Simple/Transcode.pm [devel]
lib/Pod/Simple/TranscodeDumb.pm [devel]
lib/Pod/Simple/TranscodeSmart.pm [devel]
+lib/Pod/Simple/XHTML.pm [devel]
lib/Pod/Simple/XMLOutStream.pm [devel]
parrot.spec []
parrotbug []
Index: lib/Pod/Simple.pm
===================================================================
--- lib/Pod/Simple.pm (revision 35086)
+++ lib/Pod/Simple.pm (working copy)
@@ -18,7 +18,7 @@
);
@ISA = ('Pod::Simple::BlackBox');
-$VERSION = '2.05';
+$VERSION = '3.07';
@Known_formatting_codes = qw(I B C L E F S X Z);
%Known_formatting_codes = map(($_=>1), @Known_formatting_codes);
@@ -80,13 +80,19 @@
'bare_output', # For some subclasses: whether to prepend
# header-code and postpend footer-code
+ 'fullstop_space_harden', # Whether to turn ". " into ".[nbsp] ";
+
'nix_X_codes', # whether to ignore X<...> codes
'merge_text', # whether to avoid breaking a single piece of
# text up into several events
+ 'preserve_whitespace', # whether to try to keep whitespace as-is
+
'content_seen', # whether we've seen any real Pod content
'errors_seen', # TODO: document. whether we've seen any errors (fatal or not)
+ 'codes_in_verbatim', # for PseudoPod extensions
+
'code_handler', # coderef to call when a code (non-pod) line is seen
'cut_handler', # coderef to call when a =cut line is seen
#Called like:
@@ -139,9 +145,17 @@
$$x = '' unless defined $$x;
DEBUG > 4 and print "# Output string set to $x ($$x)\n";
$this->{'output_fh'} = Pod::Simple::TiedOutFH->handle_on($_[0]);
- return $this->{'output_string'} = ${ $this->{'output_fh'} };
+ return
+ $this->{'output_string'} = $_[0];
+ #${ ${ $this->{'output_fh'} } };
}
+sub abandon_output_string { $_[0]->abandon_output_fh; delete $_[0]{'output_string'} }
+sub abandon_output_fh { $_[0]->output_fh(undef) }
+# These don't delete the string or close the FH -- they just delete our
+# references to it/them.
+# TODO: document these
+
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
sub new {
@@ -969,6 +983,7 @@
# L or L
# L or L or L
# L
+ # Ltext|scheme:...>
my($self,@stack) = @_;
@@ -988,11 +1003,12 @@
# By here, $treelet->[$i] is definitely an L node
- DEBUG > 1 and print "Ogling L node $treelet->[$i]\n";
+ my $ell = $treelet->[$i];
+ DEBUG > 1 and print "Ogling L node $ell\n";
# bitch if it's empty
- if( @{$treelet->[$i]} == 2
- or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '')
+ if( @{$ell} == 2
+ or (@{$ell} == 3 and $ell->[2] eq '')
) {
$self->whine( $start_line, "An empty L<>" );
$treelet->[$i] = 'L<>'; # just make it a text node
@@ -1000,53 +1016,70 @@
}
# Catch URLs:
- # URLs can, alas, contain E<...> sequences, so we can't /assume/
- # that this is one text node. But it has to START with one text
- # node...
- if(! ref $treelet->[$i][2] and
- $treelet->[$i][2] =~ m/^\w+:[^:\s]\S*$/s
+
+ # there are a number of possible cases:
+ # 1) text node containing url: http://foo.com
+ # -> [ 'http://foo.com' ]
+ # 2) text node containing url and text: foo|http://foo.com
+ # -> [ 'foo|http://foo.com' ]
+ # 3) text node containing url start: mailto:xEfoo.com
+ # -> [ 'mailto:x', [ E ... ], 'foo.com' ]
+ # 4) text node containing url start and text: foo|mailto:xEfoo.com
+ # -> [ 'foo|mailto:x', [ E ... ], 'foo.com' ]
+ # 5) other nodes containing text and url start: OE<39>Malley|http://foo.com
+ # -> [ 'O', [ E ... ], 'Malley', '|http://foo.com' ]
+ # ... etc.
+
+ # anything before the url is part of the text.
+ # anything after it is part of the url.
+ # the url text node itself may contain parts of both.
+
+ if (my ($url_index, $text_part, $url_part) =
+ # grep is no good here; we want to bail out immediately so that we can
+ # use $1, $2, etc. without having to do the match twice.
+ sub {
+ for (2..$#$ell) {
+ next if ref $ell->[$_];
+ next unless $ell->[$_] =~ m/^(?:([^|]*)\|)?(\w+:[^:\s]\S*)$/s;
+ return ($_, $1, $2);
+ }
+ return;
+ }->()
) {
- $treelet->[$i][1]{'type'} = 'url';
- $treelet->[$i][1]{'content-implicit'} = 'yes';
-
- if( 3 == @{ $treelet->[$i] } ) {
- # But if it IS just one text node (most common case)
- DEBUG > 1 and printf qq{Catching "%s as " as ho-hum L link.\n},
- $treelet->[$i][2]
- ;
- $treelet->[$i][1]{'to'} = Pod::Simple::LinkSection->new(
- $treelet->[$i][2]
- ); # its own treelet
- } else {
- # It's a URL but complex (like "Lbar>"). Feh.
- #$treelet->[$i][1]{'to'} = [ @{$treelet->[$i]} ];
- #splice @{ $treelet->[$i][1]{'to'} }, 0,2;
- #DEBUG > 1 and printf qq{Catching "%s as " as complex L link.\n},
- # join '~', @{$treelet->[$i][1]{'to' }};
-
- $treelet->[$i][1]{'to'} = Pod::Simple::LinkSection->new(
- $treelet->[$i] # yes, clone the whole content as a treelet
- );
- $treelet->[$i][1]{'to'}[0] = ''; # set the copy's tagname to nil
- die "SANITY FAILURE" if $treelet->[0] eq ''; # should never happen!
- DEBUG > 1 and print
- qq{Catching "$treelet->[$i][1]{'to'}" as a complex L link.\n};
+ $ell->[1]{'type'} = 'url';
+
+ my @text = @{$ell}[2..$url_index-1];
+ push @text, $text_part if defined $text_part;
+
+ my @url = @{$ell}[$url_index+1..$#$ell];
+ unshift @url, $url_part;
+
+ unless (@text) {
+ $ell->[1]{'content-implicit'} = 'yes';
+ @text = @url;
}
- next; # and move on
+ $ell->[1]{to} = Pod::Simple::LinkSection->new(
+ @url == 1
+ ? $url[0]
+ : [ '', {}, @url ],
+ );
+
+ splice @$ell, 2, $#$ell, @text;
+
+ next;
}
-
# Catch some very simple and/or common cases
- if(@{$treelet->[$i]} == 3 and ! ref $treelet->[$i][2]) {
- my $it = $treelet->[$i][2];
+ if(@{$ell} == 3 and ! ref $ell->[2]) {
+ my $it = $ell->[2];
if($it =~ m/^[-a-zA-Z0-9]+\([-a-zA-Z0-9]+\)$/s) { # man sections
# Hopefully neither too broad nor too restrictive a RE
DEBUG > 1 and print "Catching \"$it\" as manpage link.\n";
- $treelet->[$i][1]{'type'} = 'man';
+ $ell->[1]{'type'} = 'man';
# This's the only place where man links can get made.
- $treelet->[$i][1]{'content-implicit'} = 'yes';
- $treelet->[$i][1]{'to' } =
+ $ell->[1]{'content-implicit'} = 'yes';
+ $ell->[1]{'to' } =
Pod::Simple::LinkSection->new( $it ); # treelet!
next;
@@ -1055,9 +1088,9 @@
# Extremely forgiving idea of what constitutes a bare
# modulename link like L or even L
DEBUG > 1 and print "Catching \"$it\" as ho-hum L link.\n";
- $treelet->[$i][1]{'type'} = 'pod';
- $treelet->[$i][1]{'content-implicit'} = 'yes';
- $treelet->[$i][1]{'to' } =
+ $ell->[1]{'type'} = 'pod';
+ $ell->[1]{'content-implicit'} = 'yes';
+ $ell->[1]{'to' } =
Pod::Simple::LinkSection->new( $it ); # treelet!
next;
}
@@ -1073,7 +1106,6 @@
my $link_text; # set to an arrayref if found
- my $ell = $treelet->[$i];
my @ell_content = @$ell;
splice @ell_content,0,2; # Knock off the 'L' and {} bits
@@ -1357,7 +1389,7 @@
$i += @$to_pull_up - 1; # Make $i skip the pulled-up stuff
}
} else {
- $treelet->[$i] =~ tr/ /\xA0/ if ASCII and $in_s;
+ $treelet->[$i] =~ s/\s/\xA0/g if ASCII and $in_s;
# (If not in ASCIIland, we can't assume that \xA0 == nbsp.)
# Note that if you apply nbsp_for_S to text, and so turn
@@ -1427,7 +1459,7 @@
"\nAbout to parse source: {{\n$_[0]\n}}\n\n";
- my $parser = $class->new;
+ my $parser = ref $class && $class->isa(__PACKAGE__) ? $class : $class->new;
$parser->hide_line_numbers(1);
my $out = '';
Index: lib/Pod/Simple/XMLOutStream.pm
===================================================================
--- lib/Pod/Simple/XMLOutStream.pm (revision 35086)
+++ lib/Pod/Simple/XMLOutStream.pm (working copy)
@@ -113,14 +113,34 @@
L is rather like this class; see its
documentation for a discussion of the differences.
-L, L
+L, L, L
-The older libraries L, L, L
+L
+The older (and possibly obsolete) libraries L, L
+
+=head1 ABOUT EXTENDING POD
+
+TODO: An example or two of =extend, then point to Pod::Simple::Subclassing
+
+
+=head1 ASK ME!
+
+If you actually want to use Pod as a format that you want to render to
+XML (particularly if to an XML instance with more elements than normal
+Pod has), please email me (C) and I'll probably have
+some recommendations.
+
+For reasons of concision and energetic laziness, some methods and
+options in this module (and the dozen modules it depends on) are
+undocumented; but one of those undocumented bits might be just what
+you're looking for.
+
+
=head1 COPYRIGHT AND DISCLAIMERS
-Copyright (c) 2002 Sean M. Burke. All rights reserved.
+Copyright (c) 2002-4 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
Index: lib/Pod/Simple/HTML.pm
===================================================================
--- lib/Pod/Simple/HTML.pm (revision 35086)
+++ lib/Pod/Simple/HTML.pm (working copy)
@@ -3,19 +3,79 @@
package Pod::Simple::HTML;
use strict;
use Pod::Simple::PullParser ();
-use vars qw(@ISA %Tagmap $Computerese $Lame $Linearization_Limit $VERSION);
+use vars qw(
+ @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION
+ $Perldoc_URL_Prefix $Perldoc_URL_Postfix
+ $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex
+ $Doctype_decl $Content_decl
+);
@ISA = ('Pod::Simple::PullParser');
-$VERSION = '2.02';
+$VERSION = '3.03';
use UNIVERSAL ();
-sub DEBUG () {0}
+BEGIN {
+ if(defined &DEBUG) { } # no-op
+ elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
+ else { *DEBUG = sub () {0}; }
+}
-$Computerese = " lang='und' xml:lang='und'" unless defined $Computerese;
-$Lame = ' class="pad"' unless defined $Lame;
+$Doctype_decl ||= ''; # No. Just No. Don't even ask me for it.
+ # qq{\n};
-$Linearization_Limit = 90 unless defined $Linearization_Limit;
+$Content_decl ||=
+ q{ };
+
+$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
+$Computerese = "" unless defined $Computerese;
+$LamePad = '' unless defined $LamePad;
+
+$Linearization_Limit = 120 unless defined $Linearization_Limit;
# headings/items longer than that won't get an
+$Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?'
+ unless defined $Perldoc_URL_Prefix;
+$Perldoc_URL_Postfix = ''
+ unless defined $Perldoc_URL_Postfix;
+$Title_Prefix = '' unless defined $Title_Prefix;
+$Title_Postfix = '' unless defined $Title_Postfix;
+%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
+ # 'item-text' stuff in the index doesn't quite work, and may
+ # not be a good idea anyhow.
+
+
+__PACKAGE__->_accessorize(
+ 'perldoc_url_prefix',
+ # In turning L into http://whatever/Foo%3a%3aBar, what
+ # to put before the "Foo%3a%3aBar".
+ # (for singleton mode only?)
+ 'perldoc_url_postfix',
+ # what to put after "Foo%3a%3aBar" in the URL. Normally "".
+
+ 'batch_mode', # whether we're in batch mode
+ 'batch_mode_current_level',
+ # When in batch mode, how deep the current module is: 1 for "LWP",
+ # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
+
+ 'title_prefix', 'title_postfix',
+ # What to put before and after the title in the head.
+ # Should already be &-escaped
+
+ 'html_header_before_title',
+ 'html_header_after_title',
+ 'html_footer',
+
+ 'index', # whether to add an index at the top of each page
+ # (actually it's a table-of-contents, but we'll call it an index,
+ # out of apparently longstanding habit)
+
+ 'html_css', # URL of CSS file to point to
+ 'html_javascript', # URL of CSS file to point to
+
+ 'force_title', # should already be &-escaped
+ 'default_title', # should already be &-escaped
+);
+
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
my @_to_accept;
@@ -77,12 +137,13 @@
] # no point in providing a way to get ... , I think
),
- '/item-bullet' => "
\n",
- '/item-number' => "
\n",
- '/item-text' => "
\n",
- 'Para_item' => "\n",
- '/Para_item' => "
\n",
+ '/item-bullet' => "$LamePad\n",
+ '/item-number' => "$LamePad\n",
+ '/item-text' => "$LamePad\n",
+ 'item-body' => "\n",
+ '/item-body' => " \n",
+
'B' => "", '/B' => " ",
'I' => "", '/I' => " ",
'F' => "", '/F' => " ",
@@ -103,6 +164,10 @@
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
+ # Just so we can run from the command line. No options.
+ # For that, use perldoc!
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub new {
my $new = shift->SUPER::new(@_);
@@ -112,11 +177,37 @@
$new->accept_codes('VerbatimFormatted');
$new->accept_codes(@_to_accept);
DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n";
-
+
+ $new->perldoc_url_prefix( $Perldoc_URL_Prefix );
+ $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
+ $new->title_prefix( $Title_Prefix );
+ $new->title_postfix( $Title_Postfix );
+
+ $new->html_header_before_title(
+ qq[$Doctype_decl]
+ );
+ $new->html_header_after_title( join "\n" =>
+ " ",
+ $Content_decl,
+ "\n",
+ $new->version_tag_comment,
+ "\n",
+ );
+ $new->html_footer( qq[\n\n\n\n] );
+
$new->{'Tagmap'} = {%Tagmap};
return $new;
}
+sub batch_mode_page_object_init {
+ my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
+ DEBUG and print "Initting $self\n for $module\n",
+ " in $infile\n out $outfile\n depth $depth\n";
+ $self->batch_mode(1);
+ $self->batch_mode_current_level($depth);
+ return $self;
+}
+
sub run {
my $self = $_[0];
return $self->do_middle if $self->bare_output;
@@ -126,121 +217,266 @@
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-sub do_pod_link {
- my($self, $link) = @_;
- my $to = $link->attr('to');
- my $section = $link->attr('section');
- return undef unless( # should never happen
- (defined $to and length $to) or
- (defined $section and length $section)
- );
+sub do_beginning {
+ my $self = $_[0];
- if(defined $to and length $to) {
- $to = $self->resolve_pod_page_link($to, $section);
- return undef unless defined $to and length $to;
- # resolve_pod_page_link returning undef is how it
- # can signal that it gives up on making a link
- # (I pass it the section value, but I don't see a
- # particular reason it'd use it.)
+ my $title;
+
+ if(defined $self->force_title) {
+ $title = $self->force_title;
+ DEBUG and print "Forcing title to be $title\n";
+ } else {
+ # Actually try looking for the title in the document:
+ $title = $self->get_short_title();
+ unless($self->content_seen) {
+ DEBUG and print "No content seen in search for title.\n";
+ return;
+ }
+ $self->{'Title'} = $title;
+
+ if(defined $title and $title =~ m/\S/) {
+ $title = $self->title_prefix . esc($title) . $self->title_postfix;
+ } else {
+ $title = $self->default_title;
+ $title = '' unless defined $title;
+ DEBUG and print "Title defaults to $title\n";
+ }
}
+
- if(defined $section and length($section .= '')) {
- $section =~ tr/ /_/;
- $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65);
- $section = $self->unicode_escape_url($section);
- # Turn char 1234 into "(1234)"
- $section = '_' unless length $section;
+ my $after = $self->html_header_after_title || '';
+ if($self->html_css) {
+ my $link =
+ $self->html_css =~ m/
+ ? $self->html_css # It's a big blob of markup, let's drop it in
+ : sprintf( # It's just a URL, so let's wrap it up
+ qq[ \n],
+ $self->html_css,
+ );
+ $after =~ s{()}{$link\n$1}i; # otherwise nevermind
}
-
-
+ $self->_add_top_anchor(\$after);
- foreach my $it ($to, $section) {
- if( defined $it ) {
- $it =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
- $it =~ s/([^\._abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
- # Yes, stipulate the list without a range, so that this can work right on
- # all charsets that this module happens to run under.
- # Altho, hmm, what about that ord? Presumably that won't work right
- # under non-ASCII charsets. Something should be done about that.
- }
+ if($self->html_javascript) {
+ my $link =
+ $self->html_javascript =~ m/
+ ? $self->html_javascript # It's a big blob of markup, let's drop it in
+ : sprintf( # It's just a URL, so let's wrap it up
+ qq[\n],
+ $self->html_javascript,
+ );
+ $after =~ s{()}{$link\n$1}i; # otherwise nevermind
}
-
- my $out = $to if defined $to and length $to;
- $out .= "#" . $section if defined $section and length $section;
- return undef unless length $out;
- return $out;
+
+ print {$self->{'output_fh'}}
+ $self->html_header_before_title || '',
+ $title, # already escaped
+ $after,
+ ;
+
+ DEBUG and print "Returning from do_beginning...\n";
+ return 1;
}
+sub _add_top_anchor {
+ my($self, $text_r) = @_;
+ unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
+ $$text_r .= " \n";
+ }
+ return;
+}
-sub resolve_pod_page_link {
- my($self, $to) = @_;
-
- return 'TODO';
+sub version_tag_comment {
+ my $self = shift;
+ return sprintf
+ "\n",
+ esc(
+ ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
+ $], scalar(gmtime),
+ ), $self->_modnote(),
+ ;
}
-sub do_url_link { return $_[1]->attr('to') }
+sub _modnote {
+ my $class = ref($_[0]) || $_[0];
+ return join "\n " => grep m/\S/, split "\n",
-sub do_man_link { return undef }
- # But subclasses are welcome to override this if they have man
- # pages somewhere URL-accessible.
+qq{
+If you want to change this HTML document, you probably shouldn't do that
+by changing it directly. Instead, see about changing the calling options
+to $class, and/or subclassing $class,
+then reconverting this document from the Pod source.
+When in doubt, email the author of $class for advice.
+See 'perldoc $class' for more info.
+};
-#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+}
-sub do_link {
- my($self, $token) = @_;
- my $type = $token->attr('type');
- if(!defined $type) {
- $self->whine("Typeless L!?", $token->attr('start_line'));
- } elsif( $type eq 'pod') { return $self->do_pod_link($token);
- } elsif( $type eq 'url') { return $self->do_url_link($token);
- } elsif( $type eq 'man') { return $self->do_man_link($token);
- } else {
- $self->whine("L of unknown type $type!?", $token->attr('start_line'));
+sub do_end {
+ my $self = $_[0];
+ print {$self->{'output_fh'}} $self->html_footer || '';
+ return 1;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Normally this would just be a call to _do_middle_main_loop -- but we
+# have to do some elaborate things to emit all the content and then
+# summarize it and output it /before/ the content that it's a summary of.
+
+sub do_middle {
+ my $self = $_[0];
+ return $self->_do_middle_main_loop unless $self->index;
+
+ if( $self->output_string ) {
+ # An efficiency hack
+ my $out = $self->output_string; #it's a reference to it
+ my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
+ $$out .= $sneakytag;
+ $self->_do_middle_main_loop;
+ $sneakytag = quotemeta($sneakytag);
+ my $index = $self->index_as_html();
+ if( $$out =~ s/$sneakytag/$index/s ) {
+ # Expected case
+ DEBUG and print "Inserted ", length($index), " bytes of index HTML into $out.\n";
+ } else {
+ DEBUG and print "Odd, couldn't find where to insert the index in the output!\n";
+ # I don't think this should ever happen.
+ }
+ return 1;
}
- return 'FNORG';
+
+ unless( $self->output_fh ) {
+ require Carp;
+ Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
+ }
+
+ # If we get here, we're outputting to a FH. So we need to do some magic.
+ # Namely, divert all content to a string, which we output after the index.
+ my $fh = $self->output_fh;
+ my $content = '';
+ {
+ # Our horrible bait and switch:
+ $self->output_string( \$content );
+ $self->_do_middle_main_loop;
+ $self->abandon_output_string();
+ $self->output_fh($fh);
+ }
+ print $fh $self->index_as_html();
+ print $fh $content;
+
+ return 1;
}
+###########################################################################
-sub do_middle { # the main work
+sub index_as_html {
my $self = $_[0];
+ # This is meant to be called AFTER the input document has been parsed!
+
+ my $points = $self->{'PSHTML_index_points'} || [];
+
+ @$points > 1 or return qq[
\n];
+ # There's no point in having a 0-item or 1-item index, I dare say.
+
+ my(@out) = qq{\n};
+ my $level = 0;
+
+ my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
+ foreach my $p (@$points, ['head0', '(end)']) {
+ ($tagname, $text) = @$p;
+ $anchorname = $self->section_escape($text);
+ if( $tagname =~ m{^head(\d+)$} ) {
+ $target_level = 0 + $1;
+ } else { # must be some kinda list item
+ if($previous_tagname =~ m{^head\d+$} ) {
+ $target_level = $level + 1;
+ } else {
+ $target_level = $level; # no change needed
+ }
+ }
+
+ # Get to target_level by opening or closing ULs
+ while($level > $target_level)
+ { --$level; push @out, (" " x $level) . ""; }
+ while($level < $target_level)
+ { ++$level; push @out, (" " x ($level-1))
+ . "
"; }
+
+ $previous_tagname = $tagname;
+ next unless $level;
+
+ $indent = ' ' x $level;
+ push @out, sprintf
+ "%s%s ",
+ $indent, $level, $anchorname, esc($text)
+ ;
+ }
+ push @out, " \n";
+ return join "\n", @out;
+}
+
+###########################################################################
+
+sub _do_middle_main_loop {
+ my $self = $_[0];
my $fh = $self->{'output_fh'};
+ my $tagmap = $self->{'Tagmap'};
- my($token, $type, $tagname);
+ my($token, $type, $tagname, $linkto, $linktype);
my @stack;
my $dont_wrap = 0;
+
while($token = $self->get_token) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if( ($type = $token->type) eq 'start' ) {
if(($tagname = $token->tagname) eq 'L') {
- esc($type = $self->do_link($token)); # reuse it, why not
- if(defined $type and length $type) {
- print $fh "";
+ $linktype = $token->attr('type') || 'insane';
+
+ $linkto = $self->do_link($token);
+
+ if(defined $linkto and length $linkto) {
+ esc($linkto);
+ # (Yes, SGML-escaping applies on top of %-escaping!
+ # But it's rarely noticeable in practice.)
+ print $fh qq{ };
} else {
print $fh " "; # Yes, an 'a' element with no attributes!
}
} elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
- print $fh $self->{'Tagmap'}{$tagname} || next;
+ print $fh $tagmap->{$tagname} || next;
my @to_unget;
while(1) {
push @to_unget, $self->get_token;
last if $to_unget[-1]->is_end
and $to_unget[-1]->tagname eq $tagname;
+
+ # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
}
+
my $name = $self->linearize_tokens(@to_unget);
- if(defined $name) { # ludicrously long, so nevermind
- $name =~ tr/ /_/;
- print $fh " ";
+ print $fh " section_name_tidy( $name ) );
+ print $fh qq[name="$esc"];
DEBUG and print "Linearized ", scalar(@to_unget),
" tokens as \"$name\".\n";
- } else {
- print $fh " "; # Yes, an 'a' element with no attributes!
+ push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
+ if $ToIndex{ $tagname };
+ # Obviously, this discards all formatting codes (saving
+ # just their content), but ahwell.
+
+ } else { # ludicrously long, so nevermind
DEBUG and print "Linearized ", scalar(@to_unget),
" tokens, but it was too long, so nevermind.\n";
}
+ print $fh "\n>";
$self->unget_token(@to_unget);
} elsif ($tagname eq 'Data') {
@@ -255,12 +491,13 @@
next;
} else {
- if( $tagname =~ m/^over-(.+)$/s ) {
- push @stack, $1;
- } elsif( $tagname eq 'Para') {
- $tagname = 'Para_item' if @stack and $stack[-1] eq 'text';
+ if( $tagname =~ m/^over-/s ) {
+ push @stack, '';
+ } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
+ print $fh $stack[-1];
+ $stack[-1] = '';
}
- print $fh $self->{'Tagmap'}{$tagname} || next;
+ print $fh $tagmap->{$tagname} || next;
++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
or $tagname eq 'X';
}
@@ -268,11 +505,21 @@
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
} elsif( $type eq 'end' ) {
if( ($tagname = $token->tagname) =~ m/^over-/s ) {
- pop @stack;
- } elsif( $tagname eq 'Para' ) {
- $tagname = 'Para_item' if @stack and $stack[-1] eq 'text';
+ if( my $end = pop @stack ) {
+ print $fh $end;
+ }
+ } elsif( $tagname =~ m/^item-/s and @stack) {
+ $stack[-1] = $tagmap->{"/$tagname"};
+ if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
+ $self->unget_token($next);
+ if( $next->type eq 'start' and $next->tagname !~ m/^item-/s ) {
+ print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
+ $stack[-1] = $tagmap->{"/item-body"};
+ }
+ }
+ next;
}
- print $fh $self->{'Tagmap'}{"/$tagname"} || next;
+ print $fh $tagmap->{"/$tagname"} || next;
--$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -286,75 +533,214 @@
return 1;
}
+###########################################################################
+#
+
+sub do_link {
+ my($self, $token) = @_;
+ my $type = $token->attr('type');
+ if(!defined $type) {
+ $self->whine("Typeless L!?", $token->attr('start_line'));
+ } elsif( $type eq 'pod') { return $self->do_pod_link($token);
+ } elsif( $type eq 'url') { return $self->do_url_link($token);
+ } elsif( $type eq 'man') { return $self->do_man_link($token);
+ } else {
+ $self->whine("L of unknown type $type!?", $token->attr('start_line'));
+ }
+ return 'FNORG'; # should never get called
+}
+
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-sub do_beginning {
- my $self = $_[0];
+sub do_url_link { return $_[1]->attr('to') }
- my $title = $self->get_short_title();
- unless($self->content_seen) {
- DEBUG and print "No content seen in search for title.\n";
- return;
+sub do_man_link { return undef }
+ # But subclasses are welcome to override this if they have man
+ # pages somewhere URL-accessible.
+
+
+sub do_pod_link {
+ # And now things get really messy...
+ my($self, $link) = @_;
+ my $to = $link->attr('to');
+ my $section = $link->attr('section');
+ return undef unless( # should never happen
+ (defined $to and length $to) or
+ (defined $section and length $section)
+ );
+
+ $section = $self->section_escape($section)
+ if defined $section and length($section .= ''); # (stringify)
+
+ DEBUG and printf "Resolving \"%s\" \"%s\"...\n",
+ $to || "(nil)", $section || "(nil)";
+
+ {
+ # An early hack:
+ my $complete_url = $self->resolve_pod_link_by_table($to, $section);
+ if( $complete_url ) {
+ DEBUG > 1 and print "resolve_pod_link_by_table(T,S) gives ",
+ $complete_url, "\n (Returning that.)\n";
+ return $complete_url;
+ } else {
+ DEBUG > 4 and print " resolve_pod_link_by_table(T,S)",
+ " didn't return anything interesting.\n";
+ }
}
- $self->{'Title'} = $title;
- esc($title);
- print {$self->{'output_fh'}}
- "\n$title \n\n\n",
- $self->version_tag_comment,
- "\n",
- ;
- # TODO: more configurability there
+ if(defined $to and length $to) {
+ # Give this routine first hack again
+ my $there = $self->resolve_pod_link_by_table($to);
+ if(defined $there and length $there) {
+ DEBUG > 1
+ and print "resolve_pod_link_by_table(T) gives $there\n";
+ } else {
+ $there =
+ $self->resolve_pod_page_link($to, $section);
+ # (I pass it the section value, but I don't see a
+ # particular reason it'd use it.)
+ DEBUG > 1 and print "resolve_pod_page_link gives ", $to || "(nil)", "\n";
+ unless( defined $there and length $there ) {
+ DEBUG and print "Can't resolve $to\n";
+ return undef;
+ }
+ # resolve_pod_page_link returning undef is how it
+ # can signal that it gives up on making a link
+ }
+ $to = $there;
+ }
- DEBUG and print "Returning from do_beginning...\n";
- return 1;
+ #DEBUG and print "So far [", $to||'nil', "] [", $section||'nil', "]\n";
+
+ my $out = (defined $to and length $to) ? $to : '';
+ $out .= "#" . $section if defined $section and length $section;
+
+ unless(length $out) { # sanity check
+ DEBUG and printf "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
+ $to || "(nil)", $section || "(nil)";
+ return undef;
+ }
+
+ DEBUG and print "Resolved to $out\n";
+ return $out;
}
-sub version_tag_comment {
- my $self = shift;
- return sprintf
- "\n",
- # None of the following things should need escaping, I dare say!
- ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
- $], scalar(gmtime),
- ;
+
+# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
+sub section_escape {
+ my($self, $section) = @_;
+ return $self->section_url_escape(
+ $self->section_name_tidy($section)
+ );
}
+sub section_name_tidy {
+ my($self, $section) = @_;
+ $section =~ tr/ /_/;
+ $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters
+ $section = $self->unicode_escape_url($section);
+ $section = '_' unless length $section;
+ return $section;
+}
-sub do_end {
- my $self = $_[0];
- print {$self->{'output_fh'}} "\n\n\n";
- # TODO: allow for a footer
- return 1;
+sub section_url_escape { shift->general_url_escape(@_) }
+sub pagepath_url_escape { shift->general_url_escape(@_) }
+
+sub general_url_escape {
+ my($self, $string) = @_;
+
+ $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
+ # express Unicode things as urlencode(utf(orig)).
+
+ # A pretty conservative escaping, behoovey even for query components
+ # of a URL (see RFC 2396)
+
+ $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
+ # Yes, stipulate the list without a range, so that this can work right on
+ # all charsets that this module happens to run under.
+ # Altho, hmm, what about that ord? Presumably that won't work right
+ # under non-ASCII charsets. Something should be done
+ # about that, I guess?
+
+ return $string;
}
-#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-sub esc {
- if(defined wantarray) {
- if(wantarray) {
- @_ = splice @_; # break aliasing
- } else {
- my $x = shift;
- $x =~ s/([^\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/''.(ord($1)).';'/eg;
- return $x;
- }
+#--------------------------------------------------------------------------
+#
+# Oh look, a yawning portal to Hell! Let's play touch football right by it!
+#
+
+sub resolve_pod_page_link {
+ # resolve_pod_page_link must return a properly escaped URL
+ my $self = shift;
+ return $self->batch_mode()
+ ? $self->resolve_pod_page_link_batch_mode(@_)
+ : $self->resolve_pod_page_link_singleton_mode(@_)
+ ;
+}
+
+sub resolve_pod_page_link_singleton_mode {
+ my($self, $it) = @_;
+ return undef unless defined $it and length $it;
+ my $url = $self->pagepath_url_escape($it);
+
+ $url =~ s{::$}{}s; # probably never comes up anyway
+ $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
+
+ return undef unless length $url;
+ return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
+}
+
+sub resolve_pod_page_link_batch_mode {
+ my($self, $to) = @_;
+ DEBUG > 1 and print " During batch mode, resolving $to ...\n";
+ my @path = grep length($_), split m/::/s, $to, -1;
+ unless( @path ) { # sanity
+ DEBUG and print "Very odd! Splitting $to gives (nil)!\n";
+ return undef;
}
- foreach my $x (@_) {
- # Escape things very cautiously:
- $x =~ s/([^\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/''.(ord($1)).';'/eg;
- # Leave out "- so that "--" won't make it thru in X-generated comments
- # with text in them.
+ $self->batch_mode_rectify_path(\@path);
+ my $out = join('/', map $self->pagepath_url_escape($_), @path)
+ . $HTML_EXTENSION;
+ DEBUG > 1 and print " => $out\n";
+ return $out;
+}
- # Yes, stipulate the list without a range, so that this can work right on
- # all charsets that this module happens to run under.
- # Altho, hmm, what about that ord? Presumably that won't work right
- # under non-ASCII charsets. Something should be done about that.
+sub batch_mode_rectify_path {
+ my($self, $pathbits) = @_;
+ my $level = $self->batch_mode_current_level;
+ $level--; # how many levels up to go to get to the root
+ if($level < 1) {
+ unshift @$pathbits, '.'; # just to be pretty
+ } else {
+ unshift @$pathbits, ('..') x $level;
}
- return @_;
+ return;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+sub resolve_pod_link_by_table {
+ # A crazy hack to allow specifying custom L => URL mappings
+
+ return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
+
+ my($self, $to, $section) = @_;
+
+ # TODO: add a method that actually populates podhtml_LOT from a file?
+
+ if(defined $section) {
+ $to = '' unless defined $to and length $to;
+ return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
+ } else {
+ return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
+ }
+ return;
+}
+
+###########################################################################
+
sub linearize_tokens { # self, tokens
my $self = shift;
my $out = '';
@@ -362,11 +748,11 @@
my $t;
while($t = shift @_) {
if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
- $out .= $t;
+ $out .= $t; # a string, or some insane thing
} elsif($t->is_text) {
$out .= $t->text;
} elsif($t->is_start and $t->tag eq 'X') {
- # ignore until the end of this X<...> sequence
+ # Ignore until the end of this X<...> sequence:
my $x_open = 1;
while($x_open) {
next if( ($t = shift @_)->is_text );
@@ -375,13 +761,7 @@
}
}
}
-
- $out =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65);
return undef if length $out > $Linearization_Limit;
-
- $out = $self->unicode_escape_url($out);
- $out = '_' unless length $out;
-
return $out;
}
@@ -395,38 +775,104 @@
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+sub esc { # a function.
+ if(defined wantarray) {
+ if(wantarray) {
+ @_ = splice @_; # break aliasing
+ } else {
+ my $x = shift;
+ $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/''.(ord($1)).';'/eg;
+ return $x;
+ }
+ }
+ foreach my $x (@_) {
+ # Escape things very cautiously:
+ $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/''.(ord($1)).';'/eg
+ if defined $x;
+ # Leave out "- so that "--" won't make it thru in X-generated comments
+ # with text in them.
+ # Yes, stipulate the list without a range, so that this can work right on
+ # all charsets that this module happens to run under.
+ # Altho, hmm, what about that ord? Presumably that won't work right
+ # under non-ASCII charsets. Something should be done about that.
+ }
+ return @_;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
1;
__END__
=head1 NAME
-TODO - TODO
+Pod::Simple::HTML - convert Pod to HTML
=head1 SYNOPSIS
- TODO
+ perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod
- perl -MPod::Simple::HTML -e \
- "exit Pod::Simple::HTML->filter(shift)->errors_seen" \
- thingy.pod
-
=head1 DESCRIPTION
-This class is for TODO.
+This class is for making an HTML rendering of a Pod document.
+
This is a subclass of L and inherits all its
-methods.
+methods (and options).
+Note that if you want to do a batch conversion of a lot of Pod
+documents to HTML, you should see the module L.
+
+
+
+=head1 CALLING FROM THE COMMAND LINE
+
TODO
+ perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html
+
+
+
+=head1 CALLING FROM PERL
+
+TODO make a new object, set any options, and use parse_from_file
+
+
+=head1 METHODS
+
+TODO
+all (most?) accessorized methods
+
+
+=head1 SUBCLASSING
+
+TODO
+
+ can just set any of: html_css html_javascript title_prefix
+ 'html_header_before_title',
+ 'html_header_after_title',
+ 'html_footer',
+
+maybe override do_pod_link
+
+maybe override do_beginning do_end
+
+
+
=head1 SEE ALSO
-L
+L, L
+
+TODO: a corpus of sample Pod input and HTML output? Or common
+idioms?
+
+
+
=head1 COPYRIGHT AND DISCLAIMERS
-Copyright (c) 2002 Sean M. Burke. All rights reserved.
+Copyright (c) 2002-2004 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
Index: lib/Pod/Simple/BlackBox.pm
===================================================================
--- lib/Pod/Simple/BlackBox.pm (revision 35086)
+++ lib/Pod/Simple/BlackBox.pm (working copy)
@@ -525,237 +525,17 @@
DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
$self->_dump_curr_open(), ")\n";
- if($para_type eq '=for') { #//////////////////////////////////////////////
- # Fake it out as a begin/end
- my $target;
+ if($para_type eq '=for') {
+ next if $self->_ponder_for($para,$curr_open,$paras);
- if(grep $_->[1]{'~ignore'}, @$curr_open) {
- DEBUG > 1 and print "Ignoring ignorable =for\n";
- next;
- }
+ } elsif($para_type eq '=begin') {
+ next if $self->_ponder_begin($para,$curr_open,$paras);
- for(my $i = 2; $i < @$para; ++$i) {
- if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
- $target = $1;
- last;
- }
- }
- unless(defined $target) {
- $self->whine(
- $para->[1]{'start_line'},
- "=for without a target?"
- );
- next;
- }
- DEBUG > 1 and
- print "Faking out a =for $target as a =begin $target / =end $target\n";
-
- $para->[0] = 'Data';
-
- unshift @$paras,
- ['=begin',
- {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
- $target,
- ],
- $para,
- ['=end',
- {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
- $target,
- ],
- ;
-
- next;
-
- } elsif($para_type eq '=begin') { #///////////////////////////////////////
+ } elsif($para_type eq '=end') {
+ next if $self->_ponder_end($para,$curr_open,$paras);
- my $content = join ' ', splice @$para, 2;
- $content =~ s/^\s+//s;
- $content =~ s/\s+$//s;
- unless(length($content)) {
- $self->whine(
- $para->[1]{'start_line'},
- "=begin without a target?"
- );
- DEBUG and print "Ignoring targetless =begin\n";
- next;
- }
-
- unless($content =~ m/^\S+$/s) { # i.e., unless it's one word
- $self->whine(
- $para->[1]{'start_line'},
- "'=begin' only takes one parameter, not several as in '=begin $content'"
- );
- DEBUG and print "Ignoring unintelligible =begin $content\n";
- next;
- }
-
-
- $para->[1]{'target'} = $content; # without any ':'
-
- $content =~ s/^:!/!:/s;
- my $neg; # whether this is a negation-match
- $neg = 1 if $content =~ s/^!//s;
- my $to_resolve; # whether to process formatting codes
- $to_resolve = 1 if $content =~ s/^://s;
-
- my $dont_ignore; # whether this target matches us
-
- foreach my $target_name (
- split(',', $content, -1),
- $neg ? () : '*'
- ) {
- DEBUG > 2 and
- print " Considering whether =begin $content matches $target_name\n";
- next unless $self->{'accept_targets'}{$target_name};
-
- DEBUG > 2 and
- print " It DOES match the acceptable target $target_name!\n";
- $to_resolve = 1
- if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
- $dont_ignore = 1;
- $para->[1]{'target_matching'} = $target_name;
- last; # stop looking at other target names
- }
-
- if($neg) {
- if( $dont_ignore ) {
- $dont_ignore = '';
- delete $para->[1]{'target_matching'};
- DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
- } else {
- $dont_ignore = 1;
- $para->[1]{'target_matching'} = '!';
- DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
- }
- }
-
- $para->[0] = '=for'; # Just what we happen to call these, internally
- $para->[1]{'~really'} ||= '=begin';
- $para->[1]{'~ignore'} = (! $dont_ignore) || 0;
- $para->[1]{'~resolve'} = $to_resolve || 0;
-
- DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
- "ignore contents of this region\n";
- DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
- ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
- DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
-
- push @$curr_open, $para;
- if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
- DEBUG > 1 and print "Ignoring ignorable =begin\n";
- } else {
- $self->{'content_seen'} ||= 1;
- $self->_handle_element_start(($scratch='for'), $para->[1]);
- }
-
- next;
-
- } elsif($para_type eq '=end') { #/////////////////////////////////////////
-
- my $content = join ' ', splice @$para, 2;
- $content =~ s/^\s+//s;
- $content =~ s/\s+$//s;
- DEBUG and print "Ogling '=end $content' directive\n";
-
- unless(length($content)) {
- $self->whine(
- $para->[1]{'start_line'},
- "'=end' without a target?" . (
- ( @$curr_open and $curr_open->[-1][0] eq '=for' )
- ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
- : ''
- )
- );
- DEBUG and print "Ignoring targetless =end\n";
- next;
- }
-
- unless($content =~ m/^\S+$/) { # i.e., unless it's one word
- $self->whine(
- $para->[1]{'start_line'},
- "'=end $content' is invalid. (Stack: "
- . $self->_dump_curr_open() . ')'
- );
- DEBUG and print "Ignoring mistargetted =end $content\n";
- next;
- }
-
- unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
- $self->whine(
- $para->[1]{'start_line'},
- "=end $content without matching =begin. (Stack: "
- . $self->_dump_curr_open() . ')'
- );
- DEBUG and print "Ignoring mistargetted =end $content\n";
- next;
- }
-
- unless($content eq $curr_open->[-1][1]{'target'}) {
- $self->whine(
- $para->[1]{'start_line'},
- "=end $content doesn't match =begin "
- . $curr_open->[-1][1]{'target'}
- . ". (Stack: "
- . $self->_dump_curr_open() . ')'
- );
- DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
- next;
- }
-
- # Else it's okay to close...
- if(grep $_->[1]{'~ignore'}, @$curr_open) {
- DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
- # And that may be because of this to-be-closed =for region, or some
- # other one, but it doesn't matter.
- } else {
- $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
- # what's that for?
-
- $self->{'content_seen'} ||= 1;
- $self->_handle_element_end( $scratch = 'for' );
- }
- DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
- pop @$curr_open;
-
- next;
-
- } elsif($para_type eq '~end') { #/////////////////////////////////////////
- # The virtual end-document signal
-
- if(@$curr_open) { # Deal with things left open
- DEBUG and print "Stack is nonempty at end-document: (",
- $self->_dump_curr_open(), ")\n";
-
- DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
- unshift @$paras, $self->_closers_for_all_curr_open;
- # Make sure there is exactly one ~end in the parastack, at the end:
- @$paras = grep $_->[0] ne '~end', @$paras;
- push @$paras, $para, $para;
- # We need two -- once for the next cycle where we
- # generate errata, and then another to be at the end
- # when that loop back around to process the errata.
- next;
-
- } else {
- DEBUG and print "Okay, stack is empty now.\n";
- }
-
- # Try generating errata section, if applicable
- unless($self->{'~tried_gen_errata'}) {
- $self->{'~tried_gen_errata'} = 1;
- my @extras = $self->_gen_errata();
- if(@extras) {
- unshift @$paras, @extras;
- DEBUG and print "Generated errata... relooping...\n";
- next; # I.e., loop around again to process these fake-o paragraphs
- }
- }
-
- splice @$paras; # Well, that's that for this paragraph buffer.
- DEBUG and print "Throwing end-document event.\n";
-
- $self->_handle_element_end( $scratch = 'Document' );
- next; # Hasta la byebye
+ } elsif($para_type eq '~end') { # The virtual end-document signal
+ next if $self->_ponder_doc_end($para,$curr_open,$paras);
}
@@ -769,97 +549,17 @@
#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
- if($para_type eq '=pod') { #//////////////////////////////////////////////
- $self->whine(
- $para->[1]{'start_line'},
- "=pod directives shouldn't be over one line long! Ignoring all "
- . (@$para - 2) . " lines of content"
- ) if @$para > 3;
- # Content is always ignored.
-
+ if($para_type eq '=pod') {
+ $self->_ponder_pod($para,$curr_open,$paras);
- } elsif($para_type eq '=over') { #////////////////////////////////////////
- next unless @$paras;
- my $list_type;
+ } elsif($para_type eq '=over') {
+ next if $self->_ponder_over($para,$curr_open,$paras);
- if($paras->[0][0] eq '=item') { # most common case
- $list_type = $self->_get_initial_item_type($paras->[0]);
+ } elsif($para_type eq '=back') {
+ next if $self->_ponder_back($para,$curr_open,$paras);
- } elsif($paras->[0][0] eq '=back') {
- # Ignore empty lists. TODO: make this an option?
- shift @$paras;
- next;
-
- } elsif($paras->[0][0] eq '~end') {
- $self->whine(
- $para->[1]{'start_line'},
- "=over is the last thing in the document?!"
- );
- next; # But feh, ignore it.
- } else {
- $list_type = 'block';
- }
- $para->[1]{'~type'} = $list_type;
- push @$curr_open, $para;
- # yes, we reuse the paragraph as a stack item
-
- my $content = join ' ', splice @$para, 2;
- my $overness;
- if($content =~ m/^\s*$/s) {
- $para->[1]{'indent'} = 4;
- } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
- no integer;
- $para->[1]{'indent'} = $1;
- if($1 == 0) {
- $self->whine(
- $para->[1]{'start_line'},
- "Can't have a 0 in =over $content"
- );
- $para->[1]{'indent'} = 4;
- }
- } else {
- $self->whine(
- $para->[1]{'start_line'},
- "=over should be: '=over' or '=over positive_number'"
- );
- $para->[1]{'indent'} = 4;
- }
- DEBUG > 1 and print "=over found of type $list_type\n";
-
- $self->{'content_seen'} ||= 1;
- $self->_handle_element_start(($scratch = 'over-' . $list_type), $para->[1]);
-
- } elsif($para_type eq '=back') { #////////////////////////////////////////
+ } else {
- # TODO: fire off or or ??
-
- my $content = join ' ', splice @$para, 2;
- if($content =~ m/\S/) {
- $self->whine(
- $para->[1]{'start_line'},
- "=back doesn't take any parameters, but you said =back $content"
- );
- }
-
- if(@$curr_open and $curr_open->[-1][0] eq '=over') {
- DEBUG > 1 and print "=back happily closes matching =over\n";
- # Expected case: we're closing the most recently opened thing
- #my $over = pop @$curr_open;
- $self->{'content_seen'} ||= 1;
- $self->_handle_element_end( $scratch =
- 'over-' . ( (pop @$curr_open)->[1]{'~type'} )
- );
- } else {
- DEBUG > 1 and print "=back found without a matching =over. Stack: (",
- join(', ', map $_->[0], @$curr_open), ").\n";
- $self->whine(
- $para->[1]{'start_line'},
- '=back without =over'
- );
- next; # and ignore it
- }
-
- } else { #////////////////////////////////////////////////////////////////
# All non-magical codes!!!
# Here we start using $para_type for our own twisted purposes, to
@@ -1123,54 +823,11 @@
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if($para_type eq 'Plain') {
- DEBUG and print " giving plain treatment...\n";
- unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
- or $para->[1]{'~cooked'}
- ) {
- push @$para,
- @{$self->_make_treelet(
- join("\n", splice(@$para, 2)),
- $para->[1]{'start_line'}
- )};
- }
- # Empty paragraphs don't need a treelet for any reason I can see.
- # And precooked paragraphs already have a treelet.
-
+ $self->_ponder_Plain($para);
} elsif($para_type eq 'Verbatim') {
- DEBUG and print " giving verbatim treatment...\n";
-
- $para->[1]{'xml:space'} = 'preserve';
- for($i = 2; $i < @$para; $i++) {
- foreach my $line ($para->[$i]) { # just for aliasing
- while( $line =~
- # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
- # tabs are at every EIGHTH column. For portability, it has to be
- # one setting everywhere, and 8th wins.
- s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
- ) {}
-
- # TODO: whinge about (or otherwise treat) unindented or overlong lines
-
- }
- }
-
- # Now the VerbatimFormatted hoodoo...
- if( $self->{'accept_codes'} and
- $self->{'accept_codes'}{'VerbatimFormatted'}
- ) {
- while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
- # Kill any number of terminal newlines
- $self->_verbatim_format($para);
- } else {
- push @$para, join "\n", splice(@$para, 2) if @$para > 3;
- $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
- }
-
+ $self->_ponder_Verbatim($para);
} elsif($para_type eq 'Data') {
- DEBUG and print " giving data treatment...\n";
- $para->[1]{'xml:space'} = 'preserve';
- push @$para, join "\n", splice(@$para, 2) if @$para > 3;
-
+ $self->_ponder_Data($para);
} else {
die "\$para type is $para_type -- how did that happen?";
# Shouldn't happen.
@@ -1190,6 +847,576 @@
return;
}
+###########################################################################
+# The sub-ponderers...
+
+
+
+sub _ponder_for {
+ my ($self,$para,$curr_open,$paras) = @_;
+
+ # Fake it out as a begin/end
+ my $target;
+
+ if(grep $_->[1]{'~ignore'}, @$curr_open) {
+ DEBUG > 1 and print "Ignoring ignorable =for\n";
+ return 1;
+ }
+
+ for(my $i = 2; $i < @$para; ++$i) {
+ if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
+ $target = $1;
+ last;
+ }
+ }
+ unless(defined $target) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=for without a target?"
+ );
+ return 1;
+ }
+ DEBUG > 1 and
+ print "Faking out a =for $target as a =begin $target / =end $target\n";
+
+ $para->[0] = 'Data';
+
+ unshift @$paras,
+ ['=begin',
+ {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
+ $target,
+ ],
+ $para,
+ ['=end',
+ {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
+ $target,
+ ],
+ ;
+
+ return 1;
+}
+
+sub _ponder_begin {
+ my ($self,$para,$curr_open,$paras) = @_;
+ my $content = join ' ', splice @$para, 2;
+ $content =~ s/^\s+//s;
+ $content =~ s/\s+$//s;
+ unless(length($content)) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=begin without a target?"
+ );
+ DEBUG and print "Ignoring targetless =begin\n";
+ return 1;
+ }
+
+ my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
+ $para->[1]{'title'} = $title if ($title);
+ $para->[1]{'target'} = $target; # without any ':'
+ $content = $target; # strip off the title
+
+ $content =~ s/^:!/!:/s;
+ my $neg; # whether this is a negation-match
+ $neg = 1 if $content =~ s/^!//s;
+ my $to_resolve; # whether to process formatting codes
+ $to_resolve = 1 if $content =~ s/^://s;
+
+ my $dont_ignore; # whether this target matches us
+
+ foreach my $target_name (
+ split(',', $content, -1),
+ $neg ? () : '*'
+ ) {
+ DEBUG > 2 and
+ print " Considering whether =begin $content matches $target_name\n";
+ next unless $self->{'accept_targets'}{$target_name};
+
+ DEBUG > 2 and
+ print " It DOES match the acceptable target $target_name!\n";
+ $to_resolve = 1
+ if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
+ $dont_ignore = 1;
+ $para->[1]{'target_matching'} = $target_name;
+ last; # stop looking at other target names
+ }
+
+ if($neg) {
+ if( $dont_ignore ) {
+ $dont_ignore = '';
+ delete $para->[1]{'target_matching'};
+ DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
+ } else {
+ $dont_ignore = 1;
+ $para->[1]{'target_matching'} = '!';
+ DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
+ }
+ }
+
+ $para->[0] = '=for'; # Just what we happen to call these, internally
+ $para->[1]{'~really'} ||= '=begin';
+ $para->[1]{'~ignore'} = (! $dont_ignore) || 0;
+ $para->[1]{'~resolve'} = $to_resolve || 0;
+
+ DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
+ "ignore contents of this region\n";
+ DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
+ ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
+ DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
+
+ push @$curr_open, $para;
+ if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
+ DEBUG > 1 and print "Ignoring ignorable =begin\n";
+ } else {
+ $self->{'content_seen'} ||= 1;
+ $self->_handle_element_start((my $scratch='for'), $para->[1]);
+ }
+
+ return 1;
+}
+
+sub _ponder_end {
+ my ($self,$para,$curr_open,$paras) = @_;
+ my $content = join ' ', splice @$para, 2;
+ $content =~ s/^\s+//s;
+ $content =~ s/\s+$//s;
+ DEBUG and print "Ogling '=end $content' directive\n";
+
+ unless(length($content)) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "'=end' without a target?" . (
+ ( @$curr_open and $curr_open->[-1][0] eq '=for' )
+ ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
+ : ''
+ )
+ );
+ DEBUG and print "Ignoring targetless =end\n";
+ return 1;
+ }
+
+ unless($content =~ m/^\S+$/) { # i.e., unless it's one word
+ $self->whine(
+ $para->[1]{'start_line'},
+ "'=end $content' is invalid. (Stack: "
+ . $self->_dump_curr_open() . ')'
+ );
+ DEBUG and print "Ignoring mistargetted =end $content\n";
+ return 1;
+ }
+
+ unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=end $content without matching =begin. (Stack: "
+ . $self->_dump_curr_open() . ')'
+ );
+ DEBUG and print "Ignoring mistargetted =end $content\n";
+ return 1;
+ }
+
+ unless($content eq $curr_open->[-1][1]{'target'}) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=end $content doesn't match =begin "
+ . $curr_open->[-1][1]{'target'}
+ . ". (Stack: "
+ . $self->_dump_curr_open() . ')'
+ );
+ DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
+ return 1;
+ }
+
+ # Else it's okay to close...
+ if(grep $_->[1]{'~ignore'}, @$curr_open) {
+ DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
+ # And that may be because of this to-be-closed =for region, or some
+ # other one, but it doesn't matter.
+ } else {
+ $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
+ # what's that for?
+
+ $self->{'content_seen'} ||= 1;
+ $self->_handle_element_end( my $scratch = 'for' );
+ }
+ DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
+ pop @$curr_open;
+
+ return 1;
+}
+
+sub _ponder_doc_end {
+ my ($self,$para,$curr_open,$paras) = @_;
+ if(@$curr_open) { # Deal with things left open
+ DEBUG and print "Stack is nonempty at end-document: (",
+ $self->_dump_curr_open(), ")\n";
+
+ DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
+ unshift @$paras, $self->_closers_for_all_curr_open;
+ # Make sure there is exactly one ~end in the parastack, at the end:
+ @$paras = grep $_->[0] ne '~end', @$paras;
+ push @$paras, $para, $para;
+ # We need two -- once for the next cycle where we
+ # generate errata, and then another to be at the end
+ # when that loop back around to process the errata.
+ return 1;
+
+ } else {
+ DEBUG and print "Okay, stack is empty now.\n";
+ }
+
+ # Try generating errata section, if applicable
+ unless($self->{'~tried_gen_errata'}) {
+ $self->{'~tried_gen_errata'} = 1;
+ my @extras = $self->_gen_errata();
+ if(@extras) {
+ unshift @$paras, @extras;
+ DEBUG and print "Generated errata... relooping...\n";
+ return 1; # I.e., loop around again to process these fake-o paragraphs
+ }
+ }
+
+ splice @$paras; # Well, that's that for this paragraph buffer.
+ DEBUG and print "Throwing end-document event.\n";
+
+ $self->_handle_element_end( my $scratch = 'Document' );
+ return 1; # Hasta la byebye
+}
+
+sub _ponder_pod {
+ my ($self,$para,$curr_open,$paras) = @_;
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=pod directives shouldn't be over one line long! Ignoring all "
+ . (@$para - 2) . " lines of content"
+ ) if @$para > 3;
+ # Content is always ignored.
+ return;
+}
+
+sub _ponder_over {
+ my ($self,$para,$curr_open,$paras) = @_;
+ return 1 unless @$paras;
+ my $list_type;
+
+ if($paras->[0][0] eq '=item') { # most common case
+ $list_type = $self->_get_initial_item_type($paras->[0]);
+
+ } elsif($paras->[0][0] eq '=back') {
+ # Ignore empty lists. TODO: make this an option?
+ shift @$paras;
+ return 1;
+
+ } elsif($paras->[0][0] eq '~end') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=over is the last thing in the document?!"
+ );
+ return 1; # But feh, ignore it.
+ } else {
+ $list_type = 'block';
+ }
+ $para->[1]{'~type'} = $list_type;
+ push @$curr_open, $para;
+ # yes, we reuse the paragraph as a stack item
+
+ my $content = join ' ', splice @$para, 2;
+ my $overness;
+ if($content =~ m/^\s*$/s) {
+ $para->[1]{'indent'} = 4;
+ } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
+ no integer;
+ $para->[1]{'indent'} = $1;
+ if($1 == 0) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Can't have a 0 in =over $content"
+ );
+ $para->[1]{'indent'} = 4;
+ }
+ } else {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=over should be: '=over' or '=over positive_number'"
+ );
+ $para->[1]{'indent'} = 4;
+ }
+ DEBUG > 1 and print "=over found of type $list_type\n";
+
+ $self->{'content_seen'} ||= 1;
+ $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
+
+ return;
+}
+
+sub _ponder_back {
+ my ($self,$para,$curr_open,$paras) = @_;
+ # TODO: fire off or or ??
+
+ my $content = join ' ', splice @$para, 2;
+ if($content =~ m/\S/) {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "=back doesn't take any parameters, but you said =back $content"
+ );
+ }
+
+ if(@$curr_open and $curr_open->[-1][0] eq '=over') {
+ DEBUG > 1 and print "=back happily closes matching =over\n";
+ # Expected case: we're closing the most recently opened thing
+ #my $over = pop @$curr_open;
+ $self->{'content_seen'} ||= 1;
+ $self->_handle_element_end( my $scratch =
+ 'over-' . ( (pop @$curr_open)->[1]{'~type'} )
+ );
+ } else {
+ DEBUG > 1 and print "=back found without a matching =over. Stack: (",
+ join(', ', map $_->[0], @$curr_open), ").\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ '=back without =over'
+ );
+ return 1; # and ignore it
+ }
+}
+
+sub _ponder_item {
+ my ($self,$para,$curr_open,$paras) = @_;
+ my $over;
+ unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "'=item' outside of any '=over'"
+ );
+ unshift @$paras,
+ ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
+ $para
+ ;
+ return 1;
+ }
+
+
+ my $over_type = $over->[1]{'~type'};
+
+ if(!$over_type) {
+ # Shouldn't happen1
+ die "Typeless over in stack, starting at line "
+ . $over->[1]{'start_line'};
+
+ } elsif($over_type eq 'block') {
+ unless($curr_open->[-1][1]{'~bitched_about'}) {
+ $curr_open->[-1][1]{'~bitched_about'} = 1;
+ $self->whine(
+ $curr_open->[-1][1]{'start_line'},
+ "You can't have =items (as at line "
+ . $para->[1]{'start_line'}
+ . ") unless the first thing after the =over is an =item"
+ );
+ }
+ # Just turn it into a paragraph and reconsider it
+ $para->[0] = '~Para';
+ unshift @$paras, $para;
+ return 1;
+
+ } elsif($over_type eq 'text') {
+ my $item_type = $self->_get_item_type($para);
+ # That kills the content of the item if it's a number or bullet.
+ DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
+
+ if($item_type eq 'text') {
+ # Nothing special needs doing for 'text'
+ } elsif($item_type eq 'number' or $item_type eq 'bullet') {
+ die "Unknown item type $item_type"
+ unless $item_type eq 'number' or $item_type eq 'bullet';
+ # Undo our clobbering:
+ push @$para, $para->[1]{'~orig_content'};
+ delete $para->[1]{'number'};
+ # Only a PROPER item-number element is allowed
+ # to have a number attribute.
+ } else {
+ die "Unhandled item type $item_type"; # should never happen
+ }
+
+ # =item-text thingies don't need any assimilation, it seems.
+
+ } elsif($over_type eq 'number') {
+ my $item_type = $self->_get_item_type($para);
+ # That kills the content of the item if it's a number or bullet.
+ DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
+
+ my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
+
+ if($item_type eq 'bullet') {
+ # Hm, it's not numeric. Correct for this.
+ $para->[1]{'number'} = $expected_value;
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item $expected_value'"
+ );
+ push @$para, $para->[1]{'~orig_content'};
+ # restore the bullet, blocking the assimilation of next para
+
+ } elsif($item_type eq 'text') {
+ # Hm, it's not numeric. Correct for this.
+ $para->[1]{'number'} = $expected_value;
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item $expected_value'"
+ );
+ # Text content will still be there and will block next ~Para
+
+ } elsif($item_type ne 'number') {
+ die "Unknown item type $item_type"; # should never happen
+
+ } elsif($expected_value == $para->[1]{'number'}) {
+ DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
+
+ } else {
+ DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
+ " instead of the expected value of $expected_value\n";
+ $self->whine(
+ $para->[1]{'start_line'},
+ "You have '=item " . $para->[1]{'number'} .
+ "' instead of the expected '=item $expected_value'"
+ );
+ $para->[1]{'number'} = $expected_value; # correcting!!
+ }
+
+ if(@$para == 2) {
+ # For the cases where we /didn't/ push to @$para
+ if($paras->[0][0] eq '~Para') {
+ DEBUG and print "Assimilating following ~Para content into $over_type item\n";
+ push @$para, splice @{shift @$paras},2;
+ } else {
+ DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
+ push @$para, ''; # Just so it's not contentless
+ }
+ }
+
+
+ } elsif($over_type eq 'bullet') {
+ my $item_type = $self->_get_item_type($para);
+ # That kills the content of the item if it's a number or bullet.
+ DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
+
+ if($item_type eq 'bullet') {
+ # as expected!
+
+ if( $para->[1]{'~_freaky_para_hack'} ) {
+ DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
+ push @$para, delete $para->[1]{'~_freaky_para_hack'};
+ }
+
+ } elsif($item_type eq 'number') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item *'"
+ );
+ push @$para, $para->[1]{'~orig_content'};
+ # and block assimilation of the next paragraph
+ delete $para->[1]{'number'};
+ # Only a PROPER item-number element is allowed
+ # to have a number attribute.
+ } elsif($item_type eq 'text') {
+ $self->whine(
+ $para->[1]{'start_line'},
+ "Expected '=item *'"
+ );
+ # But doesn't need processing. But it'll block assimilation
+ # of the next para.
+ } else {
+ die "Unhandled item type $item_type"; # should never happen
+ }
+
+ if(@$para == 2) {
+ # For the cases where we /didn't/ push to @$para
+ if($paras->[0][0] eq '~Para') {
+ DEBUG and print "Assimilating following ~Para content into $over_type item\n";
+ push @$para, splice @{shift @$paras},2;
+ } else {
+ DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
+ push @$para, ''; # Just so it's not contentless
+ }
+ }
+
+ } else {
+ die "Unhandled =over type \"$over_type\"?";
+ # Shouldn't happen!
+ }
+ $para->[0] .= '-' . $over_type;
+
+ return;
+}
+
+sub _ponder_Plain {
+ my ($self,$para) = @_;
+ DEBUG and print " giving plain treatment...\n";
+ unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
+ or $para->[1]{'~cooked'}
+ ) {
+ push @$para,
+ @{$self->_make_treelet(
+ join("\n", splice(@$para, 2)),
+ $para->[1]{'start_line'}
+ )};
+ }
+ # Empty paragraphs don't need a treelet for any reason I can see.
+ # And precooked paragraphs already have a treelet.
+ return;
+}
+
+sub _ponder_Verbatim {
+ my ($self,$para) = @_;
+ DEBUG and print " giving verbatim treatment...\n";
+
+ $para->[1]{'xml:space'} = 'preserve';
+ for(my $i = 2; $i < @$para; $i++) {
+ foreach my $line ($para->[$i]) { # just for aliasing
+ while( $line =~
+ # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
+ # tabs are at every EIGHTH column. For portability, it has to be
+ # one setting everywhere, and 8th wins.
+ s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
+ ) {}
+
+ # TODO: whinge about (or otherwise treat) unindented or overlong lines
+
+ }
+ }
+
+ # Now the VerbatimFormatted hoodoo...
+ if( $self->{'accept_codes'} and
+ $self->{'accept_codes'}{'VerbatimFormatted'}
+ ) {
+ while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
+ # Kill any number of terminal newlines
+ $self->_verbatim_format($para);
+ } elsif ($self->{'codes_in_verbatim'}) {
+ push @$para,
+ @{$self->_make_treelet(
+ join("\n", splice(@$para, 2)),
+ $para->[1]{'start_line'}, $para->[1]{'xml:space'}
+ )};
+ $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
+ } else {
+ push @$para, join "\n", splice(@$para, 2) if @$para > 3;
+ $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
+ }
+ return;
+}
+
+sub _ponder_Data {
+ my ($self,$para) = @_;
+ DEBUG and print " giving data treatment...\n";
+ $para->[1]{'xml:space'} = 'preserve';
+ push @$para, join "\n", splice(@$para, 2) if @$para > 3;
+ return;
+}
+
+
+
+
+###########################################################################
+
sub _traverse_treelet_bit { # for use only by the routine above
my($self, $name) = splice @_,0,2;
@@ -1382,13 +1609,18 @@
# "!"
# ]
- my($self, $para, $start_line) = @_;
+ my($self, $para, $start_line, $preserve_space) = @_;
+
my $treelet = ['~Top', {'start_line' => $start_line},];
- $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
- $para =~ s/ $//g;
- $para =~ s/^ //g;
+ unless ($preserve_space || $self->{'preserve_whitespace'}) {
+ $para =~ s/\. /\.\xA0 /g if $self->{'fullstop_space_harden'};
+ $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
+ $para =~ s/ $//;
+ $para =~ s/^ //;
+ }
+
# Only apparent problem the above code is that N<< >> turns into
# N<< >>. But then, word wrapping does that too! So don't do that!
@@ -1396,26 +1628,57 @@
my @lineage = ($treelet);
DEBUG > 4 and print "Paragraph:\n$para\n\n";
-
- while($para =~ # Here begins our frightening tokenizer RE.
+
+ # Here begins our frightening tokenizer RE. The following regex matches
+ # text in four main parts:
+ #
+ # * Start-codes. The first alternative matches C< or C<<, the latter
+ # followed by some whitespace. $1 will hold the entire start code
+ # (including any space following a multiple-angle-bracket delimiter),
+ # and $2 will hold only the additional brackets past the first in a
+ # multiple-bracket delimiter. length($2) + 1 will be the number of
+ # closing brackets we have to find.
+ #
+ # * Closing brackets. Match some amount of whitespace followed by
+ # multiple close brackets. The logic to see if this closes anything
+ # is down below. Note that in order to parse C<< >> correctly, we
+ # have to use look-behind (?<=\s\s), since the match of the starting
+ # code will have consumed the whitespace.
+ #
+ # * A single closing bracket, to close a simple code like C<>.
+ #
+ # * Something that isn't a start or end code. We have to be careful
+ # about accepting whitespace, since perlpodspec says that any whitespace
+ # before a multiple-bracket closing delimiter should be ignored.
+ #
+ while($para =~
m/\G
(?:
- ([A-Z]<(<+\ )?) # that's $1 and $2 for both kinds of start-codes
+ # Match starting codes, including the whitespace following a
+ # multiple-delimiter start code. $1 gets the whole start code and
+ # $2 gets all but one of the {2,}) # $3: end-codes of the type " >>", " >>>", etc.
+ # Match multiple-bracket end codes. $3 gets the whitespace that
+ # should be discarded before an end bracket but kept in other cases
+ # and $4 gets the end brackets themselves.
+ (\s+|(?<=\s\s))(>{2,})
|
- (\ ?>) # $4: simple end-codes
+ (\s?>) # $5: simple end-codes
|
- ( # $5: stuff containing no start-codes or end-codes
+ ( # $6: stuff containing no start-codes or end-codes
(?:
- [^A-Z\ >]+
+ [^A-Z\s>]
|
(?:
[A-Z](?!<)
)
|
+ # whitespace is ok, but we don't want to eat the whitespace before
+ # a multiple-bracket end code.
+ # NOTE: we may still have problems with e.g. S<< >>
(?:
- \ (?!>)
+ \s(?!\s*>{2,})
)
)+
)
@@ -1426,7 +1689,7 @@
if(defined $1) {
if(defined $2) {
DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
- push @stack, length($1) - 1;
+ push @stack, length($2) + 1;
# length of the necessary complex end-code string
} else {
DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
@@ -1435,48 +1698,48 @@
push @lineage, [ substr($1,0,1), {}, ]; # new node object
push @{ $lineage[-2] }, $lineage[-1];
- } elsif(defined $3) {
- DEBUG > 3 and print "Found apparent complex end-text code \"$3\"\n";
+ } elsif(defined $4) {
+ DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
# This is where it gets messy...
if(! @stack) {
# We saw " >>>>" but needed nothing. This is ALL just stuff then.
DEBUG > 4 and print " But it's really just stuff.\n";
- push @{ $lineage[-1] }, $3;
+ push @{ $lineage[-1] }, $3, $4;
next;
} elsif(!$stack[-1]) {
# We saw " >>>>" but needed only ">". Back pos up.
DEBUG > 4 and print " And that's more than we needed to close simple.\n";
- push @{ $lineage[-1] }, ' '; # That was a for-real space, too.
- pos($para) = pos($para) - length($3) + 2;
- } elsif($stack[-1] == length($3)) {
+ push @{ $lineage[-1] }, $3; # That was a for-real space, too.
+ pos($para) = pos($para) - length($4) + 1;
+ } elsif($stack[-1] == length($4)) {
# We found " >>>>", and it was exactly what we needed. Commonest case.
DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
- } elsif($stack[-1] < length($3)) {
+ } elsif($stack[-1] < length($4)) {
# We saw " >>>>" but needed only " >>". Back pos up.
DEBUG > 4 and print " And that's more than we needed to close complex.\n";
- pos($para) = pos($para) - length($3) + $stack[-1];
+ pos($para) = pos($para) - length($4) + $stack[-1];
} else {
# We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
- push @{ $lineage[-1] }, $3;
+ push @{ $lineage[-1] }, $3, $4;
next;
}
#print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
-
+
push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
# Keep the element from being childless
pop @stack;
pop @lineage;
- } elsif(defined $4) {
+ } elsif(defined $5) {
DEBUG > 3 and print "Found apparent simple end-text code \"$4\"\n";
if(@stack and ! $stack[-1]) {
# We're indeed expecting a simple end-code
DEBUG > 4 and print " It's indeed an end-code.\n";
- if(length($4) == 2) { # There was a space there: " >"
+ if(length($5) == 2) { # There was a space there: " >"
push @{ $lineage[-1] }, ' ';
} elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
push @{ $lineage[-1] }, ''; # keep it from being really childless
@@ -1486,12 +1749,12 @@
pop @lineage;
} else {
DEBUG > 4 and print " It's just stuff.\n";
- push @{ $lineage[-1] }, $4;
+ push @{ $lineage[-1] }, $5;
}
- } elsif(defined $5) {
- DEBUG > 3 and print "Found stuff \"$5\"\n";
- push @{ $lineage[-1] }, $5;
+ } elsif(defined $6) {
+ DEBUG > 3 and print "Found stuff \"$6\"\n";
+ push @{ $lineage[-1] }, $6;
} else {
# should never ever ever ever happen
@@ -1634,5 +1897,23 @@
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+# A rather unsubtle method of blowing away all the state information
+# from a parser object so it can be reused. Provided as a utility for
+# backward compatibilty in Pod::Man, etc. but not recommended for
+# general use.
+
+sub reinit {
+ my $self = shift;
+ foreach (qw(source_dead source_filename doc_has_started
+start_of_pod_block content_seen last_was_blank paras curr_open
+line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen
+Title)) {
+
+ delete $self->{$_};
+ }
+}
+
+#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1;
Index: lib/Pod/Escapes.pm
===================================================================
--- lib/Pod/Escapes.pm (revision 35086)
+++ lib/Pod/Escapes.pm (working copy)
@@ -1,11 +1,11 @@
require 5;
# The documentation is at the end.
-# Time-stamp: "2002-08-27 19:58:02 MDT"
+# Time-stamp: "2004-05-07 15:31:25 ADT"
package Pod::Escapes;
require Exporter;
@ISA = ('Exporter');
-$VERSION = '1.03';
+$VERSION = '1.04';
@EXPORT_OK = qw(
%Code2USASCII
%Name2character
@@ -44,7 +44,7 @@
# Convert to decimal:
if($in =~ m/^(0[0-7]*)$/s ) {
$in = oct $in;
- } elsif($in =~ m/^0x([0-9a-fA-F]+)$/s ) {
+ } elsif($in =~ m/^0?x([0-9a-fA-F]+)$/s ) {
$in = hex $1;
} # else it's decimal, or named
@@ -86,7 +86,7 @@
# Convert to decimal:
if($in =~ m/^(0[0-7]*)$/s ) {
$in = oct $in;
- } elsif($in =~ m/^0x([0-9a-fA-F]+)$/s ) {
+ } elsif($in =~ m/^0?x([0-9a-fA-F]+)$/s ) {
$in = hex $1;
} # else it's decimal, or named
@@ -649,7 +649,7 @@
=head1 COPYRIGHT AND DISCLAIMERS
-Copyright (c) 2001 Sean M. Burke. All rights reserved.
+Copyright (c) 2001-2004 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
@@ -685,7 +685,7 @@
xhtml-lat1.ent
xhtml-special.ent
)) {
- open(IN, "<$dir$file") or die "can't read-open $dir$file: $!";
+ open(IN, "<", "$dir$file") or die "can't read-open $dir$file: $!";
print "Reading $file...\n";
while() {
if(m//) {
--- /dev/null 2009-01-07 14:55:01.000000000 +1100
+++ lib/Pod/Simple/Progress.pm 2009-01-07 13:36:39.000000000 +1100
@@ -0,0 +1,93 @@
+
+require 5;
+package Pod::Simple::Progress;
+$VERSION = "1.01";
+use strict;
+
+# Objects of this class are used for noting progress of an
+# operation every so often. Messages delivered more often than that
+# are suppressed.
+#
+# There's actually nothing in here that's specific to Pod processing;
+# but it's ad-hoc enough that I'm not willing to give it a name that
+# implies that it's generally useful, like "IO::Progress" or something.
+#
+# -- sburke
+#
+#--------------------------------------------------------------------------
+
+sub new {
+ my($class,$delay) = @_;
+ my $self = bless {'quiet_until' => 1}, ref($class) || $class;
+ $self->to(*STDOUT{IO});
+ $self->delay(defined($delay) ? $delay : 5);
+ return $self;
+}
+
+sub copy {
+ my $orig = shift;
+ bless {%$orig, 'quiet_until' => 1}, ref($orig);
+}
+#--------------------------------------------------------------------------
+
+sub reach {
+ my($self, $point, $note) = @_;
+ if( (my $now = time) >= $self->{'quiet_until'}) {
+ my $goal;
+ my $to = $self->{'to'};
+ print $to join('',
+ ($self->{'quiet_until'} == 1) ? () : '... ',
+ (defined $point) ? (
+ '#',
+ ($goal = $self->{'goal'}) ? (
+ ' ' x (length($goal) - length($point)),
+ $point, '/', $goal,
+ ) : $point,
+ $note ? ': ' : (),
+ ) : (),
+ $note || '',
+ "\n"
+ );
+ $self->{'quiet_until'} = $now + $self->{'delay'};
+ }
+ return $self;
+}
+
+#--------------------------------------------------------------------------
+
+sub done {
+ my($self, $note) = @_;
+ $self->{'quiet_until'} = 1;
+ return $self->reach( undef, $note );
+}
+
+#--------------------------------------------------------------------------
+# Simple accessors:
+
+sub delay {
+ return $_[0]{'delay'} if @_ == 1; $_[0]{'delay'} = $_[1]; return $_[0] }
+sub goal {
+ return $_[0]{'goal' } if @_ == 1; $_[0]{'goal' } = $_[1]; return $_[0] }
+sub to {
+ return $_[0]{'to' } if @_ == 1; $_[0]{'to' } = $_[1]; return $_[0] }
+
+#--------------------------------------------------------------------------
+
+unless(caller) { # Simple self-test:
+ my $p = __PACKAGE__->new->goal(5);
+ $p->reach(1, "Primus!");
+ sleep 1;
+ $p->reach(2, "Secundus!");
+ sleep 3;
+ $p->reach(3, "Tertius!");
+ sleep 5;
+ $p->reach(4);
+ $p->reach(5, "Quintus!");
+ sleep 1;
+ $p->done("All done");
+}
+
+#--------------------------------------------------------------------------
+1;
+__END__
+
--- /dev/null 2009-01-07 14:55:01.000000000 +1100
+++ lib/Pod/Simple/Search.pm 2009-01-07 13:41:31.000000000 +1100
@@ -0,0 +1,1016 @@
+
+require 5.005;
+package Pod::Simple::Search;
+use strict;
+
+use vars qw($VERSION $MAX_VERSION_WITHIN $SLEEPY);
+$VERSION = 3.04; ## Current version of this package
+
+BEGIN { *DEBUG = sub () {0} unless defined &DEBUG; } # set DEBUG level
+use Carp ();
+
+$SLEEPY = 1 if !defined $SLEEPY and $^O =~ /mswin|mac/i;
+ # flag to occasionally sleep for $SLEEPY - 1 seconds.
+
+$MAX_VERSION_WITHIN ||= 60;
+
+#############################################################################
+
+#use diagnostics;
+use File::Spec ();
+use File::Basename qw( basename );
+use Config ();
+use Cwd qw( cwd );
+
+#==========================================================================
+__PACKAGE__->_accessorize( # Make my dumb accessor methods
+ 'callback', 'progress', 'dir_prefix', 'inc', 'laborious', 'limit_glob',
+ 'limit_re', 'shadows', 'verbose', 'name2path', 'path2name',
+);
+#==========================================================================
+
+sub new {
+ my $class = shift;
+ my $self = bless {}, ref($class) || $class;
+ $self->init;
+ return $self;
+}
+
+sub init {
+ my $self = shift;
+ $self->inc(1);
+ $self->verbose(DEBUG);
+ return $self;
+}
+
+#--------------------------------------------------------------------------
+
+sub survey {
+ my($self, @search_dirs) = @_;
+ $self = $self->new unless ref $self; # tolerate being a class method
+
+ $self->_expand_inc( \@search_dirs );
+
+
+ $self->{'_scan_count'} = 0;
+ $self->{'_dirs_visited'} = {};
+ $self->path2name( {} );
+ $self->name2path( {} );
+ $self->limit_re( $self->_limit_glob_to_limit_re ) if $self->{'limit_glob'};
+ my $cwd = cwd();
+ my $verbose = $self->verbose;
+ local $_; # don't clobber the caller's $_ !
+
+ foreach my $try (@search_dirs) {
+ unless( File::Spec->file_name_is_absolute($try) ) {
+ # make path absolute
+ $try = File::Spec->catfile( $cwd ,$try);
+ }
+ # simplify path
+ $try = File::Spec->canonpath($try);
+
+ my $start_in;
+ my $modname_prefix;
+ if($self->{'dir_prefix'}) {
+ $start_in = File::Spec->catdir(
+ $try,
+ grep length($_), split '[\\/:]+', $self->{'dir_prefix'}
+ );
+ $modname_prefix = [grep length($_), split m{[:/\\]}, $self->{'dir_prefix'}];
+ $verbose and print "Appending \"$self->{'dir_prefix'}\" to $try, ",
+ "giving $start_in (= @$modname_prefix)\n";
+ } else {
+ $start_in = $try;
+ }
+
+ if( $self->{'_dirs_visited'}{$start_in} ) {
+ $verbose and print "Directory '$start_in' already seen, skipping.\n";
+ next;
+ } else {
+ $self->{'_dirs_visited'}{$start_in} = 1;
+ }
+
+ unless(-e $start_in) {
+ $verbose and print "Skipping non-existent $start_in\n";
+ next;
+ }
+
+ my $closure = $self->_make_search_callback;
+
+ if(-d $start_in) {
+ # Normal case:
+ $verbose and print "Beginning excursion under $start_in\n";
+ $self->_recurse_dir( $start_in, $closure, $modname_prefix );
+ $verbose and print "Back from excursion under $start_in\n\n";
+
+ } elsif(-f _) {
+ # A excursion consisting of just one file!
+ $_ = basename($start_in);
+ $verbose and print "Pondering $start_in ($_)\n";
+ $closure->($start_in, $_, 0, []);
+
+ } else {
+ $verbose and print "Skipping mysterious $start_in\n";
+ }
+ }
+ $self->progress and $self->progress->done(
+ "Noted $$self{'_scan_count'} Pod files total");
+
+ return unless defined wantarray; # void
+ return $self->name2path unless wantarray; # scalar
+ return $self->name2path, $self->path2name; # list
+}
+
+
+#==========================================================================
+sub _make_search_callback {
+ my $self = $_[0];
+
+ # Put the options in variables, for easy access
+ my( $laborious, $verbose, $shadows, $limit_re, $callback, $progress,$path2name,$name2path) =
+ map scalar($self->$_()),
+ qw(laborious verbose shadows limit_re callback progress path2name name2path);
+
+ my($file, $shortname, $isdir, $modname_bits);
+ return sub {
+ ($file, $shortname, $isdir, $modname_bits) = @_;
+
+ if($isdir) { # this never gets called on the startdir itself, just subdirs
+
+ if( $self->{'_dirs_visited'}{$file} ) {
+ $verbose and print "Directory '$file' already seen, skipping.\n";
+ return 'PRUNE';
+ }
+
+ print "Looking in dir $file\n" if $verbose;
+
+ unless ($laborious) { # $laborious overrides pruning
+ if( m/^(\d+\.[\d_]{3,})\z/s
+ and do { my $x = $1; $x =~ tr/_//d; $x != $] }
+ ) {
+ $verbose and print "Perl $] version mismatch on $_, skipping.\n";
+ return 'PRUNE';
+ }
+
+ if( m/^([A-Za-z][a-zA-Z0-9_]*)\z/s ) {
+ $verbose and print "$_ is a well-named module subdir. Looking....\n";
+ } else {
+ $verbose and print "$_ is a fishy directory name. Skipping.\n";
+ return 'PRUNE';
+ }
+ } # end unless $laborious
+
+ $self->{'_dirs_visited'}{$file} = 1;
+ return; # (not pruning);
+ }
+
+
+ # Make sure it's a file even worth even considering
+ if($laborious) {
+ unless(
+ m/\.(pod|pm|plx?)\z/i || -x _ and -T _
+ # Note that the cheapest operation (the RE) is run first.
+ ) {
+ $verbose > 1 and print " Brushing off uninteresting $file\n";
+ return;
+ }
+ } else {
+ unless( m/^[-_a-zA-Z0-9]+\.(?:pod|pm|plx?)\z/is ) {
+ $verbose > 1 and print " Brushing off oddly-named $file\n";
+ return;
+ }
+ }
+
+ $verbose and print "Considering item $file\n";
+ my $name = $self->_path2modname( $file, $shortname, $modname_bits );
+ $verbose > 0.01 and print " Nominating $file as $name\n";
+
+ if($limit_re and $name !~ m/$limit_re/i) {
+ $verbose and print "Shunning $name as not matching $limit_re\n";
+ return;
+ }
+
+ if( !$shadows and $name2path->{$name} ) {
+ $verbose and print "Not worth considering $file ",
+ "-- already saw $name as ",
+ join(' ', grep($path2name->{$_} eq $name, keys %$path2name)), "\n";
+ return;
+ }
+
+ # Put off until as late as possible the expense of
+ # actually reading the file:
+ if( m/\.pod\z/is ) {
+ # just assume it has pod, okay?
+ } else {
+ $progress and $progress->reach($self->{'_scan_count'}, "Scanning $file");
+ return unless $self->contains_pod( $file );
+ }
+ ++ $self->{'_scan_count'};
+
+ # Or finally take note of it:
+ if( $name2path->{$name} ) {
+ $verbose and print
+ "Duplicate POD found (shadowing?): $name ($file)\n",
+ " Already seen in ",
+ join(' ', grep($path2name->{$_} eq $name, keys %$path2name)), "\n";
+ } else {
+ $name2path->{$name} = $file; # Noting just the first occurrence
+ }
+ $verbose and print " Noting $name = $file\n";
+ if( $callback ) {
+ local $_ = $_; # insulate from changes, just in case
+ $callback->($file, $name);
+ }
+ $path2name->{$file} = $name;
+ return;
+ }
+}
+
+#==========================================================================
+
+sub _path2modname {
+ my($self, $file, $shortname, $modname_bits) = @_;
+
+ # this code simplifies the POD name for Perl modules:
+ # * remove "site_perl"
+ # * remove e.g. "i586-linux" (from 'archname')
+ # * remove e.g. 5.00503
+ # * remove pod/ if followed by perl*.pod (e.g. in pod/perlfunc.pod)
+ # * dig into the file for case-preserved name if not already mixed case
+
+ my @m = @$modname_bits;
+ my $x;
+ my $verbose = $self->verbose;
+
+ # Shaving off leading naughty-bits
+ while(@m
+ and defined($x = lc( $m[0] ))
+ and( $x eq 'site_perl'
+ or($x eq 'pod' and @m == 1 and $shortname =~ m{^perl.*\.pod$}s )
+ or $x =~ m{\\d+\\.z\\d+([_.]?\\d+)?} # if looks like a vernum
+ or $x eq lc( $Config::Config{'archname'} )
+ )) { shift @m }
+
+ my $name = join '::', @m, $shortname;
+ $self->_simplify_base($name);
+
+ # On VMS, case-preserved document names can't be constructed from
+ # filenames, so try to extract them from the "=head1 NAME" tag in the
+ # file instead.
+ if ($^O eq 'VMS' && ($name eq lc($name) || $name eq uc($name))) {
+ open PODFILE, "<" ,"$file" or die "_path2modname: Can't open $file: $!";
+ my $in_pod = 0;
+ my $in_name = 0;
+ my $line;
+ while ($line = ) {
+ chomp $line;
+ $in_pod = 1 if ($line =~ m/^=\w/);
+ $in_pod = 0 if ($line =~ m/^=cut/);
+ next unless $in_pod; # skip non-pod text
+ next if ($line =~ m/^\s*\z/); # and blank lines
+ next if ($in_pod && ($line =~ m/^X)); # and commands
+ if ($in_name) {
+ if ($line =~ m/(\w+::)?(\w+)/) {
+ # substitute case-preserved version of name
+ my $podname = $2;
+ my $prefix = $1 || '';
+ $verbose and print "Attempting case restore of '$name' from '$prefix$podname'\n";
+ unless ($name =~ s/$prefix$podname/$prefix$podname/i) {
+ $verbose and print "Attempting case restore of '$name' from '$podname'\n";
+ $name =~ s/$podname/$podname/i;
+ }
+ last;
+ }
+ }
+ $in_name = 1 if ($line =~ m/^=head1 NAME/);
+ }
+ close PODFILE;
+ }
+
+ return $name;
+}
+
+#==========================================================================
+
+sub _recurse_dir {
+ my($self, $startdir, $callback, $modname_bits) = @_;
+
+ my $maxdepth = $self->{'fs_recursion_maxdepth'} || 10;
+ my $verbose = $self->verbose;
+
+ my $here_string = File::Spec->curdir;
+ my $up_string = File::Spec->updir;
+ $modname_bits ||= [];
+
+ my $recursor;
+ $recursor = sub {
+ my($dir_long, $dir_bare) = @_;
+ if( @$modname_bits >= 10 ) {
+ $verbose and print "Too deep! [@$modname_bits]\n";
+ return;
+ }
+
+ unless(-d $dir_long) {
+ $verbose > 2 and print "But it's not a dir! $dir_long\n";
+ return;
+ }
+ unless( opendir(INDIR, $dir_long) ) {
+ $verbose > 2 and print "Can't opendir $dir_long : $!\n";
+ closedir(INDIR);
+ return
+ }
+ my @items = sort readdir(INDIR);
+ closedir(INDIR);
+
+ push @$modname_bits, $dir_bare unless $dir_bare eq '';
+
+ my $i_full;
+ foreach my $i (@items) {
+ next if $i eq $here_string or $i eq $up_string or $i eq '';
+ $i_full = File::Spec->catfile( $dir_long, $i );
+
+ if(!-r $i_full) {
+ $verbose and print "Skipping unreadable $i_full\n";
+
+ } elsif(-f $i_full) {
+ $_ = $i;
+ $callback->( $i_full, $i, 0, $modname_bits );
+
+ } elsif(-d _) {
+ $i =~ s/\.DIR\z//i if $^O eq 'VMS';
+ $_ = $i;
+ my $rv = $callback->( $i_full, $i, 1, $modname_bits ) || '';
+
+ if($rv eq 'PRUNE') {
+ $verbose > 1 and print "OK, pruning";
+ } else {
+ # Otherwise, recurse into it
+ $recursor->( File::Spec->catdir($dir_long, $i) , $i);
+ }
+ } else {
+ $verbose > 1 and print "Skipping oddity $i_full\n";
+ }
+ }
+ pop @$modname_bits;
+ return;
+ };;
+
+ local $_;
+ $recursor->($startdir, '');
+
+ undef $recursor; # allow it to be GC'd
+
+ return;
+}
+
+
+#==========================================================================
+
+sub run {
+ # A function, useful in one-liners
+
+ my $self = __PACKAGE__->new;
+ $self->limit_glob($ARGV[0]) if @ARGV;
+ $self->callback( sub {
+ my($file, $name) = @_;
+ my $version = '';
+
+ # Yes, I know we won't catch the version in like a File/Thing.pm
+ # if we see File/Thing.pod first. That's just the way the
+ # cookie crumbles. -- SMB
+
+ if($file =~ m/\.pod$/i) {
+ # Don't bother looking for $VERSION in .pod files
+ DEBUG and print "Not looking for \$VERSION in .pod $file\n";
+ } elsif( !open(INPOD, "<", $file) ) {
+ DEBUG and print "Couldn't open $file: $!\n";
+ close(INPOD);
+ } else {
+ # Sane case: file is readable
+ my $lines = 0;
+ while() {
+ last if $lines++ > $MAX_VERSION_WITHIN; # some degree of sanity
+ if( s/^\s*\$VERSION\s*=\s*//s and m/\d/ ) {
+ DEBUG and print "Found version line (#$lines): $_";
+ s/\s*\#.*//s;
+ s/\;\s*$//s;
+ s/\s+$//s;
+ s/\t+/ /s; # nix tabs
+ # Optimize the most common cases:
+ $_ = "v$1"
+ if m{^v?["']?([0-9_]+(\.[0-9_]+)*)["']?$}s
+ # like in $VERSION = "3.14159";
+ or m{\$Revision:\s*([0-9_]+(?:\.[0-9_]+)*)\s*\$}s
+ # like in sprintf("%d.%02d", q$Revision: 4.13 $ =~ /(\d+)\.(\d+)/);
+ ;
+
+ # Like in sprintf("%d.%s", map {s/_//g; $_} q$Name: release-0_55-public $ =~ /-(\d+)_([\d_]+)/)
+ $_ = sprintf("v%d.%s",
+ map {s/_//g; $_}
+ $1 =~ m/-(\d+)_([\d_]+)/) # snare just the numeric part
+ if m{\$Name:\s*([^\$]+)\$}s
+ ;
+ $version = $_;
+ DEBUG and print "Noting $version as version\n";
+ last;
+ }
+ }
+ close(INPOD);
+ }
+ print "$name\t$version\t$file\n";
+ return;
+ # End of callback!
+ });
+
+ $self->survey;
+}
+
+#==========================================================================
+
+sub simplify_name {
+ my($self, $str) = @_;
+
+ # Remove all path components
+ # XXX Why not just use basename()? -- SMB
+
+ if ($^O eq 'MacOS') { $str =~ s{^.*:+}{}s }
+ else { $str =~ s{^.*/+}{}s }
+
+ $self->_simplify_base($str);
+ return $str;
+}
+
+#==========================================================================
+
+sub _simplify_base { # Internal method only
+
+ # strip Perl's own extensions
+ $_[1] =~ s/\.(pod|pm|plx?)\z//i;
+
+ # strip meaningless extensions on Win32 and OS/2
+ $_[1] =~ s/\.(bat|exe|cmd)\z//i if $^O =~ /mswin|os2/i;
+
+ # strip meaningless extensions on VMS
+ $_[1] =~ s/\.(com)\z//i if $^O eq 'VMS';
+
+ return;
+}
+
+#==========================================================================
+
+sub _expand_inc {
+ my($self, $search_dirs) = @_;
+
+ return unless $self->{'inc'};
+
+ if ($^O eq 'MacOS') {
+ push @$search_dirs,
+ grep $_ ne File::Spec->curdir, $self->_mac_whammy(@INC);
+ # Any other OSs need custom handling here?
+ } else {
+ push @$search_dirs, grep $_ ne File::Spec->curdir, @INC;
+ }
+
+ $self->{'laborious'} = 0; # Since inc said to use INC
+ return;
+}
+
+#==========================================================================
+
+sub _mac_whammy { # Tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS
+ my @them;
+ (undef,@them) = @_;
+ for $_ (@them) {
+ if ( $_ eq '.' ) {
+ $_ = ':';
+ } elsif ( $_ =~ s|^((?:\.\./)+)|':' x (length($1)/3)|e ) {
+ $_ = ':'. $_;
+ } else {
+ $_ =~ s|^\./|:|;
+ }
+ }
+ return @them;
+}
+
+#==========================================================================
+
+sub _limit_glob_to_limit_re {
+ my $self = $_[0];
+ my $limit_glob = $self->{'limit_glob'} || return;
+
+ my $limit_re = '^' . quotemeta($limit_glob) . '$';
+ $limit_re =~ s/\\\?/./g; # glob "?" => "."
+ $limit_re =~ s/\\\*/.*?/g; # glob "*" => ".*?"
+ $limit_re =~ s/\.\*\?\$$//s; # final glob "*" => ".*?$" => ""
+
+ $self->{'verbose'} and print "Turning limit_glob $limit_glob into re $limit_re\n";
+
+ # A common optimization:
+ if(!exists($self->{'dir_prefix'})
+ and $limit_glob =~ m/^(?:\w+\:\:)+/s # like "File::*" or "File::Thing*"
+ # Optimize for sane and common cases (but not things like "*::File")
+ ) {
+ $self->{'dir_prefix'} = join "::", $limit_glob =~ m/^(?:\w+::)+/sg;
+ $self->{'verbose'} and print " and setting dir_prefix to $self->{'dir_prefix'}\n";
+ }
+
+ return $limit_re;
+}
+
+#==========================================================================
+
+# contribution mostly from Tim Jenness
+
+sub find {
+ my($self, $pod, @search_dirs) = @_;
+ $self = $self->new unless ref $self; # tolerate being a class method
+
+ # Check usage
+ Carp::carp 'Usage: \$self->find($podname, ...)'
+ unless defined $pod and length $pod;
+
+ my $verbose = $self->verbose;
+
+ # Split on :: and then join the name together using File::Spec
+ my @parts = split /::/, $pod;
+ $verbose and print "Chomping {$pod} => {@parts}\n";
+
+ #@search_dirs = File::Spec->curdir unless @search_dirs;
+
+ if( $self->inc ) {
+ if( $^O eq 'MacOS' ) {
+ push @search_dirs, $self->_mac_whammy(@INC);
+ } else {
+ push @search_dirs, @INC;
+ }
+
+ # Add location of pod documentation for perl man pages (eg perlfunc)
+ # This is a pod directory in the private install tree
+ #my $perlpoddir = File::Spec->catdir($Config::Config{'installprivlib'},
+ # 'pod');
+ #push (@search_dirs, $perlpoddir)
+ # if -d $perlpoddir;
+
+ # Add location of binaries such as pod2text:
+ push @search_dirs, $Config::Config{'scriptdir'};
+ # and if that's undef or q{} or nonexistent, we just ignore it later
+ }
+
+ my %seen_dir;
+ Dir:
+ foreach my $dir ( @search_dirs ) {
+ next unless defined $dir and length $dir;
+ next if $seen_dir{$dir};
+ $seen_dir{$dir} = 1;
+ unless(-d $dir) {
+ print "Directory $dir does not exist\n" if $verbose;
+ next Dir;
+ }
+
+ print "Looking in directory $dir\n" if $verbose;
+ my $fullname = File::Spec->catfile( $dir, @parts );
+ print "Filename is now $fullname\n" if $verbose;
+
+ foreach my $ext ('', '.pod', '.pm', '.pl') { # possible extensions
+ my $fullext = $fullname . $ext;
+ if( -f $fullext and $self->contains_pod( $fullext ) ){
+ print "FOUND: $fullext\n" if $verbose;
+ return $fullext;
+ }
+ }
+ my $subdir = File::Spec->catdir($dir,'pod');
+ if(-d $subdir) { # slip in the ./pod dir too
+ $verbose and print "Noticing $subdir and stopping there...\n";
+ $dir = $subdir;
+ redo Dir;
+ }
+ }
+
+ return undef;
+}
+
+#==========================================================================
+
+sub contains_pod {
+ my($self, $file) = @_;
+ my $verbose = $self->{'verbose'};
+
+ # check for one line of POD
+ $verbose > 1 and print " Scanning $file for pod...\n";
+ unless( open(MAYBEPOD,"<", "$file") ) {
+ print "Error: $file is unreadable: $!\n";
+ return undef;
+ }
+
+ sleep($SLEEPY - 1) if $SLEEPY;
+ # avoid totally hogging the processor on OSs with poor process control
+
+ local $_;
+ while( ) {
+ if(m/^=(head\d|pod|over|item)\b/s) {
+ close(MAYBEPOD) || die "Bizarre error closing $file: $!\nAborting";
+ chomp;
+ $verbose > 1 and print " Found some pod ($_) in $file\n";
+ return 1;
+ }
+ }
+ close(MAYBEPOD) || die "Bizarre error closing $file: $!\nAborting";
+ $verbose > 1 and print " No POD in $file, skipping.\n";
+ return 0;
+}
+
+#==========================================================================
+
+sub _accessorize { # A simple-minded method-maker
+ shift;
+ no strict 'refs';
+ foreach my $attrname (@_) {
+ *{caller() . '::' . $attrname} = sub {
+ use strict;
+ $Carp::CarpLevel = 1, Carp::croak(
+ "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)"
+ ) unless (@_ == 1 or @_ == 2) and ref $_[0];
+
+ # Read access:
+ return $_[0]->{$attrname} if @_ == 1;
+
+ # Write access:
+ $_[0]->{$attrname} = $_[1];
+ return $_[0]; # RETURNS MYSELF!
+ };
+ }
+ # Ya know, they say accessories make the ensemble!
+ return;
+}
+
+#==========================================================================
+sub _state_as_string {
+ my $self = $_[0];
+ return '' unless ref $self;
+ my @out = "{\n # State of $self ...\n";
+ foreach my $k (sort keys %$self) {
+ push @out, " ", _esc($k), " => ", _esc($self->{$k}), ",\n";
+ }
+ push @out, "}\n";
+ my $x = join '', @out;
+ $x =~ s/^/#/mg;
+ return $x;
+}
+
+sub _esc {
+ my $in = $_[0];
+ return 'undef' unless defined $in;
+ $in =~
+ s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
+ <'\\x'.(unpack("H2",$1))>eg;
+ return qq{"$in"};
+}
+
+#==========================================================================
+
+run() unless caller; # run if "perl whatever/Search.pm"
+
+1;
+
+#==========================================================================
+
+__END__
+
+
+=head1 NAME
+
+Pod::Simple::Search - find POD documents in directory trees
+
+=head1 SYNOPSIS
+
+ use Pod::Simple::Search;
+ my $name2path = Pod::Simple::Search->new->limit_glob('LWP::*')->survey;
+ print "Looky see what I found: ",
+ join(' ', sort keys %$name2path), "\n";
+
+ print "LWPUA docs = ",
+ Pod::Simple::Search->new->find('LWP::UserAgent') || "?",
+ "\n";
+
+=head1 DESCRIPTION
+
+B is a class that you use for running searches
+for Pod files. An object of this class has several attributes
+(mostly options for controlling search options), and some methods
+for searching based on those attributes.
+
+The way to use this class is to make a new object of this class,
+set any options, and then call one of the search options
+(probably C or C). The sections below discuss the
+syntaxes for doing all that.
+
+
+=head1 CONSTRUCTOR
+
+This class provides the one constructor, called C.
+It takes no parameters:
+
+ use Pod::Simple::Search;
+ my $search = Pod::Simple::Search->new;
+
+=head1 ACCESSORS
+
+This class defines several methods for setting (and, occasionally,
+reading) the contents of an object. With two exceptions (discussed at
+the end of this section), these attributes are just for controlling the
+way searches are carried out.
+
+Note that each of these return C<$self> when you call them as
+C<< $self->I >>. That's so that you can chain
+together set-attribute calls like this:
+
+ my $name2path =
+ Pod::Simple::Search->new
+ -> inc(0) -> verbose(1) -> callback(\&blab)
+ ->survey(@there);
+
+...which works exactly as if you'd done this:
+
+ my $search = Pod::Simple::Search->new;
+ $search->inc(0);
+ $search->verbose(1);
+ $search->callback(\&blab);
+ my $name2path = $search->survey(@there);
+
+=over
+
+=item $search->inc( I );
+
+This attribute, if set to a true value, means that searches should
+implicitly add perl's I<@INC> paths. This
+automatically considers paths specified in the C environment
+as this is prepended to I<@INC> by the Perl interpreter itself.
+This attribute's default value is B. If you want to search
+only specific directories, set $self->inc(0) before calling
+$inc->survey or $inc->find.
+
+
+=item $search->verbose( I );
+
+This attribute, if set to a nonzero positive value, will make searches output
+(via C) notes about what they're doing as they do it.
+This option may be useful for debugging a pod-related module.
+This attribute's default value is zero, meaning that no C messages
+are produced. (Setting verbose to 1 turns on some messages, and setting
+it to 2 turns on even more messages, i.e., makes the following search(es)
+even more verbose than 1 would make them.)
+
+
+=item $search->limit_glob( I );
+
+This option means that you want to limit the results just to items whose
+podnames match the given glob/wildcard expression. For example, you
+might limit your search to just "LWP::*", to search only for modules
+starting with "LWP::*" (but not including the module "LWP" itself); or
+you might limit your search to "LW*" to see only modules whose (full)
+names begin with "LW"; or you might search for "*Find*" to search for
+all modules with "Find" somewhere in their full name. (You can also use
+"?" in a glob expression; so "DB?" will match "DBI" and "DBD".)
+
+
+=item $search->callback( I<\&some_routine> );
+
+This attribute means that every time this search sees a matching
+Pod file, it should call this callback routine. The routine is called
+with two parameters: the current file's filespec, and its pod name.
+(For example: C<("/etc/perljunk/File/Crunk.pm", "File::Crunk")> would
+be in C<@_>.)
+
+The callback routine's return value is not used for anything.
+
+This attribute's default value is false, meaning that no callback
+is called.
+
+=item $search->laborious( I );
+
+Unless you set this attribute to a true value, Pod::Search will
+apply Perl-specific heuristics to find the correct module PODs quickly.
+This attribute's default value is false. You won't normally need
+to set this to true.
+
+Specifically: Turning on this option will disable the heuristics for
+seeing only files with Perl-like extensions, omitting subdirectories
+that are numeric but do I match the current Perl interpreter's
+version ID, suppressing F as a module hierarchy name, etc.
+
+
+=item $search->shadows( I );
+
+Unless you set this attribute to a true value, Pod::Simple::Search will
+consider only the first file of a given modulename as it looks thru the
+specified directories; that is, with this option off, if
+Pod::Simple::Search has seen a C already in this
+search, then it won't bother looking at a C
+later on in that search, because that file is merely a "shadow". But if
+you turn on C<< $self->shadows(1) >>, then these "shadow" files are
+inspected too, and are noted in the pathname2podname return hash.
+
+This attribute's default value is false; and normally you won't
+need to turn it on.
+
+
+=item $search->limit_re( I );
+
+Setting this attribute (to a value that's a regexp) means that you want
+to limit the results just to items whose podnames match the given
+regexp. Normally this option is not needed, and the more efficient
+C attribute is used instead.
+
+
+=item $search->dir_prefix( I );
+
+Setting this attribute to a string value means that the searches should
+begin in the specified subdirectory name (like "Pod" or "File::Find",
+also expressable as "File/Find"). For example, the search option
+C<< $search->limit_glob("File::Find::R*") >>
+is the same as the combination of the search options
+C<< $search->limit_re("^File::Find::R") -> dir_prefix("File::Find") >>.
+
+Normally you don't need to know about the C option, but I
+include it in case it might prove useful for someone somewhere.
+
+(Implementationally, searching with limit_glob ends up setting limit_re
+and usually dir_prefix.)
+
+
+=item $search->progress( I );
+
+If you set a value for this attribute, the value is expected
+to be an object (probably of a class that you define) that has a
+C method and a C method. This is meant for reporting
+progress during the search, if you don't want to use a simple
+callback.
+
+Normally you don't need to know about the C option, but I
+include it in case it might prove useful for someone somewhere.
+
+While a search is in progress, the progress object's C and
+C methods are called like this:
+
+ # Every time a file is being scanned for pod:
+ $progress->reach($count, "Scanning $file"); ++$count;
+
+ # And then at the end of the search:
+ $progress->done("Noted $count Pod files total");
+
+Internally, we often set this to an object of class
+Pod::Simple::Progress. That class is probably undocumented,
+but you may wish to look at its source.
+
+
+=item $name2path = $self->name2path;
+
+This attribute is not a search parameter, but is used to report the
+result of C method, as discussed in the next section.
+
+=item $path2name = $self->path2name;
+
+This attribute is not a search parameter, but is used to report the
+result of C method, as discussed in the next section.
+
+=back
+
+=head1 MAIN SEARCH METHODS
+
+Once you've actually set any options you want (if any), you can go
+ahead and use the following methods to search for Pod files
+in particular ways.
+
+
+=head2 C<< $search->survey( @directories ) >>
+
+The method C searches for POD documents in a given set of
+files and/or directories. This runs the search according to the various
+options set by the accessors above. (For example, if the C attribute
+is on, as it is by default, then the perl @INC directories are implicitly
+added to the list of directories (if any) that you specify.)
+
+The return value of C is two hashes:
+
+=over
+
+=item C
+
+A hash that maps from each pod-name to the filespec (like
+"Stuff::Thing" => "/whatever/plib/Stuff/Thing.pm")
+
+=item C
+
+A hash that maps from each Pod filespec to its pod-name (like
+"/whatever/plib/Stuff/Thing.pm" => "Stuff::Thing")
+
+=back
+
+Besides saving these hashes as the hashref attributes
+C and C, calling this function also returns
+these hashrefs. In list context, the return value of
+C<< $search->survey >> is the list C<(\%name2path, \%path2name)>.
+In scalar context, the return value is C<\%name2path>.
+Or you can just call this in void context.
+
+Regardless of calling context, calling C saves
+its results in its C and C attributes.
+
+E.g., when searching in F<$HOME/perl5lib>, the file
+F<$HOME/perl5lib/MyModule.pm> would get the POD name I,
+whereas F<$HOME/perl5lib/Myclass/Subclass.pm> would be
+I. The name information can be used for POD
+translators.
+
+Only text files containing at least one valid POD command are found.
+
+In verbose mode, a warning is printed if shadows are found (i.e., more
+than one POD file with the same POD name is found, e.g. F in
+different directories). This usually indicates duplicate occurrences of
+modules in the I<@INC> search path, which is occasionally inadvertent
+(but is often simply a case of a user's path dir having a more recent
+version than the system's general path dirs in general.)
+
+The options to this argument is a list of either directories that are
+searched recursively, or files. (Usually you wouldn't specify files,
+but just dirs.) Or you can just specify an empty-list, as in
+$name2path; with the
+C option on, as it is by default, teh
+
+The POD names of files are the plain basenames with any Perl-like
+extension (.pm, .pl, .pod) stripped, and path separators replaced by
+C<::>'s.
+
+Calling Pod::Simple::Search->search(...) is short for
+Pod::Simple::Search->new->search(...). That is, a throwaway object
+with default attribute values is used.
+
+
+=head2 C<< $search->simplify_name( $str ) >>
+
+The method B is equivalent to B, but also
+strips Perl-like extensions (.pm, .pl, .pod) and extensions like
+F<.bat>, F<.cmd> on Win32 and OS/2, or F<.com> on VMS, respectively.
+
+
+=head2 C<< $search->find( $pod ) >>
+
+=head2 C<< $search->find( $pod, @search_dirs ) >>
+
+Returns the location of a Pod file, given a Pod/module/script name
+(like "Foo::Bar" or "perlvar" or "perldoc"), and an idea of
+what files/directories to look in.
+It searches according to the various options set by the accessors above.
+(For example, if the C attribute is on, as it is by default, then
+the perl @INC directories are implicitly added to the list of
+directories (if any) that you specify.)
+
+This returns the full path of the first occurrence to the file.
+Package names (eg 'A::B') are automatically converted to directory
+names in the selected directory. Additionally, '.pm', '.pl' and '.pod'
+are automatically appended to the search as required.
+(So, for example, under Unix, "A::B" is converted to "somedir/A/B.pm",
+"somedir/A/B.pod", or "somedir/A/B.pl", as appropriate.)
+
+If no such Pod file is found, this method returns undef.
+
+If any of the given search directories contains a F subdirectory,
+then it is searched. (That's how we manage to find F,
+for example, which is usually in F in most Perl dists.)
+
+The C and C attributes influence the behavior of this
+search; notably, C, if true, adds @INC I to the list of directories to search.
+
+It is common to simply say C<< $filename = Pod::Simple::Search-> new
+->find("perlvar") >> so that just the @INC (well, and scriptdir)
+directories are searched. (This happens because the C
+attribute is true by default.)
+
+Calling Pod::Simple::Search->find(...) is short for
+Pod::Simple::Search->new->find(...). That is, a throwaway object
+with default attribute values is used.
+
+
+=head2 C<< $self->contains_pod( $file ) >>
+
+Returns true if the supplied filename (not POD module) contains some Pod
+documentation.
+
+
+=head1 AUTHOR
+
+Sean M. Burke Esburke@cpan.orgE
+borrowed code from
+Marek Rouchal's Pod::Find, which in turn
+heavily borrowed code from Nick Ing-Simmons' PodToHtml.
+
+Tim Jenness Et.jenness@jach.hawaii.eduE provided
+C and C to Pod::Find.
+
+=head1 SEE ALSO
+
+L, L
+
+=cut
+
--- /dev/null 2009-01-07 14:55:01.000000000 +1100
+++ lib/Pod/Simple/HTMLBatch.pm 2009-01-07 13:40:15.000000000 +1100
@@ -0,0 +1,1342 @@
+
+require 5;
+package Pod::Simple::HTMLBatch;
+use strict;
+use vars qw( $VERSION $HTML_RENDER_CLASS $HTML_EXTENSION
+ $CSS $JAVASCRIPT $SLEEPY $SEARCH_CLASS @ISA
+);
+$VERSION = '3.02';
+@ISA = (); # Yup, we're NOT a subclass of Pod::Simple::HTML!
+
+# TODO: nocontents stylesheets. Strike some of the color variations?
+
+use Pod::Simple::HTML ();
+BEGIN {*esc = \&Pod::Simple::HTML::esc }
+use File::Spec ();
+use UNIVERSAL ();
+ # "Isn't the Universe an amazing place? I wouldn't live anywhere else!"
+
+use Pod::Simple::Search;
+$SEARCH_CLASS ||= 'Pod::Simple::Search';
+
+BEGIN {
+ if(defined &DEBUG) { } # no-op
+ elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
+ else { *DEBUG = sub () {0}; }
+}
+
+$SLEEPY = 1 if !defined $SLEEPY and $^O =~ /mswin|mac/i;
+# flag to occasionally sleep for $SLEEPY - 1 seconds.
+
+$HTML_RENDER_CLASS ||= "Pod::Simple::HTML";
+
+#
+# Methods beginning with "_" are particularly internal and possibly ugly.
+#
+
+Pod::Simple::_accessorize( __PACKAGE__,
+ 'verbose', # how verbose to be during batch conversion
+ 'html_render_class', # what class to use to render
+ 'contents_file', # If set, should be the name of a file (in current directory)
+ # to write the list of all modules to
+ 'index', # will set $htmlpage->index(...) to this (true or false)
+ 'progress', # progress object
+ 'contents_page_start', 'contents_page_end',
+
+ 'css_flurry', '_css_wad', 'javascript_flurry', '_javascript_wad',
+ 'no_contents_links', # set to true to suppress automatic adding of << links.
+ '_contents',
+);
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Just so we can run from the command line more easily
+sub go {
+ @ARGV == 2 or die sprintf(
+ "Usage: perl -M%s -e %s:go indirs outdir\n (or use \"\@INC\" for indirs)\n",
+ __PACKAGE__, __PACKAGE__,
+ );
+
+ if(defined($ARGV[1]) and length($ARGV[1])) {
+ my $d = $ARGV[1];
+ -e $d or die "I see no output directory named \"$d\"\nAborting";
+ -d $d or die "But \"$d\" isn't a directory!\nAborting";
+ -w $d or die "Directory \"$d\" isn't writeable!\nAborting";
+ }
+
+ __PACKAGE__->batch_convert(@ARGV);
+}
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+sub new {
+ my $new = bless {}, ref($_[0]) || $_[0];
+ $new->html_render_class($HTML_RENDER_CLASS);
+ $new->verbose(1 + DEBUG);
+ $new->_contents([]);
+
+ $new->index(1);
+
+ $new-> _css_wad([]); $new->css_flurry(1);
+ $new->_javascript_wad([]); $new->javascript_flurry(1);
+
+ $new->contents_file(
+ 'index' . ($HTML_EXTENSION || $Pod::Simple::HTML::HTML_EXTENSION)
+ );
+
+ $new->contents_page_start( join "\n", grep $_,
+ $Pod::Simple::HTML::Doctype_decl,
+ "",
+ "Perl Documentation ",
+ $Pod::Simple::HTML::Content_decl,
+ "",
+ "\n\nPerl Documentation \n"
+ ); # override if you need a different title
+
+
+ $new->contents_page_end( sprintf(
+ "\n\n\n\n\n",
+ esc(
+ ref($new),
+ eval {$new->VERSION} || $VERSION,
+ $], scalar(gmtime), scalar(localtime),
+ )));
+
+ return $new;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub muse {
+ my $self = shift;
+ if($self->verbose) {
+ print 'T+', int(time() - $self->{'_batch_start_time'}), "s: ", @_, "\n";
+ }
+ return 1;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub batch_convert {
+ my($self, $dirs, $outdir) = @_;
+ $self ||= __PACKAGE__; # tolerate being called as an optionless function
+ $self = $self->new unless ref $self; # tolerate being used as a class method
+
+ if(!defined($dirs) or $dirs eq '' or $dirs eq '@INC' ) {
+ $dirs = '';
+ } elsif(ref $dirs) {
+ # OK, it's an explicit set of dirs to scan, specified as an arrayref.
+ } else {
+ # OK, it's an explicit set of dirs to scan, specified as a
+ # string like "/thing:/also:/whatever/perl" (":"-delim, as usual)
+ # or, under MSWin, like "c:/thing;d:/also;c:/whatever/perl" (";"-delim!)
+ require Config;
+ my $ps = quotemeta( $Config::Config{'path_sep'} || ":" );
+ $dirs = [ grep length($_), split qr/$ps/, $dirs ];
+ }
+
+ $outdir = $self->filespecsys->curdir
+ unless defined $outdir and length $outdir;
+
+ $self->_batch_convert_main($dirs, $outdir);
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _batch_convert_main {
+ my($self, $dirs, $outdir) = @_;
+ # $dirs is either false, or an arrayref.
+ # $outdir is a pathspec.
+
+ $self->{'_batch_start_time'} ||= time();
+
+ $self->muse( "= ", scalar(localtime) );
+ $self->muse( "Starting batch conversion to \"$outdir\"" );
+
+ my $progress = $self->progress;
+ if(!$progress and $self->verbose > 0 and $self->verbose() <= 5) {
+ require Pod::Simple::Progress;
+ $progress = Pod::Simple::Progress->new(
+ ($self->verbose < 2) ? () # Default omission-delay
+ : ($self->verbose == 2) ? 1 # Reduce the omission-delay
+ : 0 # Eliminate the omission-delay
+ );
+ $self->progress($progress);
+ }
+
+ if($dirs) {
+ $self->muse(scalar(@$dirs), " dirs to scan: @$dirs");
+ } else {
+ $self->muse("Scanning \@INC. This could take a minute or two.");
+ }
+ my $mod2path = $self->find_all_pods($dirs ? $dirs : ());
+ $self->muse("Done scanning.");
+
+ my $total = keys %$mod2path;
+ unless($total) {
+ $self->muse("No pod found. Aborting batch conversion.\n");
+ return $self;
+ }
+
+ $progress and $progress->goal($total);
+ $self->muse("Now converting pod files to HTML.",
+ ($total > 25) ? " This will take a while more." : ()
+ );
+
+ $self->_spray_css( $outdir );
+ $self->_spray_javascript( $outdir );
+
+ $self->_do_all_batch_conversions($mod2path, $outdir);
+
+ $progress and $progress->done(sprintf (
+ "Done converting %d files.", $self->{"__batch_conv_page_count"}
+ ));
+ return $self->_batch_convert_finish($outdir);
+ return $self;
+}
+
+
+sub _do_all_batch_conversions {
+ my($self, $mod2path, $outdir) = @_;
+ $self->{"__batch_conv_page_count"} = 0;
+
+ foreach my $module (sort {lc($a) cmp lc($b)} keys %$mod2path) {
+ $self->_do_one_batch_conversion($module, $mod2path, $outdir);
+ sleep($SLEEPY - 1) if $SLEEPY;
+ }
+
+ return;
+}
+
+sub _batch_convert_finish {
+ my($self, $outdir) = @_;
+ $self->write_contents_file($outdir);
+ $self->muse("Done with batch conversion. $$self{'__batch_conv_page_count'} files done.");
+ $self->muse( "= ", scalar(localtime) );
+ $self->progress and $self->progress->done("All done!");
+ return;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _do_one_batch_conversion {
+ my($self, $module, $mod2path, $outdir, $outfile) = @_;
+
+ my $retval;
+ my $total = scalar keys %$mod2path;
+ my $infile = $mod2path->{$module};
+ my @namelets = grep m/\S/, split "::", $module;
+ # this can stick around in the contents LoL
+ my $depth = scalar @namelets;
+ die "Contentless thingie?! $module $infile" unless @namelets; #sanity
+
+ $outfile ||= do {
+ my @n = @namelets;
+ $n[-1] .= $HTML_EXTENSION || $Pod::Simple::HTML::HTML_EXTENSION;
+ $self->filespecsys->catfile( $outdir, @n );
+ };
+
+ my $progress = $self->progress;
+
+ my $page = $self->html_render_class->new;
+ if(DEBUG > 5) {
+ $self->muse($self->{"__batch_conv_page_count"} + 1, "/$total: ",
+ ref($page), " render ($depth) $module => $outfile");
+ } elsif(DEBUG > 2) {
+ $self->muse($self->{"__batch_conv_page_count"} + 1, "/$total: $module => $outfile")
+ }
+
+ # Give each class a chance to init the converter:
+
+ $page->batch_mode_page_object_init($self, $module, $infile, $outfile, $depth)
+ if $page->can('batch_mode_page_object_init');
+ $self->batch_mode_page_object_init($page, $module, $infile, $outfile, $depth)
+ if $self->can('batch_mode_page_object_init');
+
+ # Now get busy...
+ $self->makepath($outdir => \@namelets);
+
+ $progress and $progress->reach($self->{"__batch_conv_page_count"}, "Rendering $module");
+
+ if( $retval = $page->parse_from_file($infile, $outfile) ) {
+ ++ $self->{"__batch_conv_page_count"} ;
+ $self->note_for_contents_file( \@namelets, $infile, $outfile );
+ } else {
+ $self->muse("Odd, parse_from_file(\"$infile\", \"$outfile\") returned false.");
+ }
+
+ $page->batch_mode_page_object_kill($self, $module, $infile, $outfile, $depth)
+ if $page->can('batch_mode_page_object_kill');
+ # The following isn't a typo. Note that it switches $self and $page.
+ $self->batch_mode_page_object_kill($page, $module, $infile, $outfile, $depth)
+ if $self->can('batch_mode_page_object_kill');
+
+ DEBUG > 4 and printf "%s %sb < $infile %s %sb\n",
+ $outfile, -s $outfile, $infile, -s $infile
+ ;
+
+ undef($page);
+ return $retval;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+sub filespecsys { $_[0]{'_filespecsys'} || 'File::Spec' }
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub note_for_contents_file {
+ my($self, $namelets, $infile, $outfile) = @_;
+
+ # I think the infile and outfile parts are never used. -- SMB
+ # But it's handy to have them around for debugging.
+
+ if( $self->contents_file ) {
+ my $c = $self->_contents();
+ push @$c,
+ [ join("::", @$namelets), $infile, $outfile, $namelets ]
+ # 0 1 2 3
+ ;
+ DEBUG > 3 and print "Noting @$c[-1]\n";
+ }
+ return;
+}
+
+#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+
+sub write_contents_file {
+ my($self, $outdir) = @_;
+ my $outfile = $self->_contents_filespec($outdir) || return;
+
+ $self->muse("Preparing list of modules for ToC");
+
+ my($toplevel, # maps toplevelbit => [all submodules]
+ $toplevel_form_freq, # ends up being 'foo' => 'Foo'
+ ) = $self->_prep_contents_breakdown;
+
+ my $Contents = eval { $self->_wopen($outfile) };
+ if( $Contents ) {
+ $self->muse( "Writing contents file $outfile" );
+ } else {
+ warn "Couldn't write-open contents file $outfile: $!\nAbort writing to $outfile at all";
+ return;
+ }
+
+ $self->_write_contents_start( $Contents, $outfile, );
+ $self->_write_contents_middle( $Contents, $outfile, $toplevel, $toplevel_form_freq );
+ $self->_write_contents_end( $Contents, $outfile, );
+ return $outfile;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _write_contents_start {
+ my($self, $Contents, $outfile) = @_;
+ my $starter = $self->contents_page_start || '';
+
+ {
+ my $css_wad = $self->_css_wad_to_markup(1);
+ if( $css_wad ) {
+ $starter =~ s{()}{\n$css_wad\n$1}i; # otherwise nevermind
+ }
+
+ my $javascript_wad = $self->_javascript_wad_to_markup(1);
+ if( $javascript_wad ) {
+ $starter =~ s{()}{\n$javascript_wad\n$1}i; # otherwise nevermind
+ }
+ }
+
+ unless(print $Contents $starter, "\n" ) {
+ warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all";
+ close($Contents);
+ return 0;
+ }
+ return 1;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _write_contents_middle {
+ my($self, $Contents, $outfile, $toplevel2submodules, $toplevel_form_freq) = @_;
+
+ foreach my $t (sort keys %$toplevel2submodules) {
+ my @downlines = sort {$a->[-1] cmp $b->[-1]}
+ @{ $toplevel2submodules->{$t} };
+
+ printf $Contents qq[%s \n\n],
+ esc( $t, $toplevel_form_freq->{$t} )
+ ;
+
+ my($path, $name);
+ foreach my $e (@downlines) {
+ $name = $e->[0];
+ $path = join( "/", '.', esc( @{$e->[3]} ) )
+ . ($HTML_EXTENSION || $Pod::Simple::HTML::HTML_EXTENSION);
+ print $Contents qq{ }, esc($name), " \n";
+ }
+ print $Contents " \n\n";
+ }
+ return 1;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _write_contents_end {
+ my($self, $Contents, $outfile) = @_;
+ unless(
+ print $Contents " \n",
+ $self->contents_page_end || '',
+ ) {
+ warn "Couldn't write to $outfile: $!";
+ }
+ close($Contents) or warn "Couldn't close $outfile: $!";
+ return 1;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _prep_contents_breakdown {
+ my($self) = @_;
+ my $contents = $self->_contents;
+ my %toplevel; # maps lctoplevelbit => [all submodules]
+ my %toplevel_form_freq; # ends up being 'foo' => 'Foo'
+ # (mapping anycase forms to most freq form)
+
+ foreach my $entry (@$contents) {
+ my $toplevel =
+ $entry->[0] =~ m/^perl\w*$/ ? 'perl_core_docs'
+ # group all the perlwhatever docs together
+ : $entry->[3][0] # normal case
+ ;
+ ++$toplevel_form_freq{ lc $toplevel }{ $toplevel };
+ push @{ $toplevel{ lc $toplevel } }, $entry;
+ push @$entry, lc($entry->[0]); # add a sort-order key to the end
+ }
+
+ foreach my $toplevel (sort keys %toplevel) {
+ my $fgroup = $toplevel_form_freq{$toplevel};
+ $toplevel_form_freq{$toplevel} =
+ (
+ sort { $fgroup->{$b} <=> $fgroup->{$a} or $a cmp $b }
+ keys %$fgroup
+ # This hash is extremely unlikely to have more than 4 members, so this
+ # sort isn't so very wasteful
+ )[0];
+ }
+
+ return(\%toplevel, \%toplevel_form_freq) if wantarray;
+ return \%toplevel;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _contents_filespec {
+ my($self, $outdir) = @_;
+ my $outfile = $self->contents_file;
+ return unless $outfile;
+ return $self->filespecsys->catfile( $outdir, $outfile );
+}
+
+#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+
+sub makepath {
+ my($self, $outdir, $namelets) = @_;
+ return unless @$namelets > 1;
+ for my $i (0 .. ($#$namelets - 1)) {
+ my $dir = $self->filespecsys->catdir( $outdir, @$namelets[0 .. $i] );
+ if(-e $dir) {
+ die "$dir exists but not as a directory!?" unless -d $dir;
+ next;
+ }
+ DEBUG > 3 and print " Making $dir\n";
+ mkdir $dir, 0777
+ or die "Can't mkdir $dir: $!\nAborting"
+ ;
+ }
+ return;
+}
+
+#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+
+sub batch_mode_page_object_init {
+ my $self = shift;
+ my($page, $module, $infile, $outfile, $depth) = @_;
+
+ # TODO: any further options to percolate onto this new object here?
+
+ $page->default_title($module);
+ $page->index( $self->index );
+
+ $page->html_css( $self-> _css_wad_to_markup($depth) );
+ $page->html_javascript( $self->_javascript_wad_to_markup($depth) );
+
+ $self->add_header_backlink($page, $module, $infile, $outfile, $depth);
+ $self->add_footer_backlink($page, $module, $infile, $outfile, $depth);
+
+
+ return $self;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub add_header_backlink {
+ my $self = shift;
+ return if $self->no_contents_links;
+ my($page, $module, $infile, $outfile, $depth) = @_;
+ $page->html_header_after_title( join '',
+ $page->html_header_after_title || '',
+
+ qq[<<
\n],
+ )
+ if $self->contents_file
+ ;
+ return;
+}
+
+sub add_footer_backlink {
+ my $self = shift;
+ return if $self->no_contents_links;
+ my($page, $module, $infile, $outfile, $depth) = @_;
+ $page->html_footer( join '',
+ qq[<<
\n],
+
+ $page->html_footer || '',
+ )
+ if $self->contents_file
+ ;
+ return;
+}
+
+sub url_up_to_contents {
+ my($self, $depth) = @_;
+ --$depth;
+ return join '/', ('..') x $depth, esc($self->contents_file);
+}
+
+#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+
+sub find_all_pods {
+ my($self, $dirs) = @_;
+ # You can override find_all_pods in a subclass if you want to
+ # do extra filtering or whatnot. But for the moment, we just
+ # pass to modnames2paths:
+ return $self->modnames2paths($dirs);
+}
+
+#_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+
+sub modnames2paths { # return a hashref mapping modulenames => paths
+ my($self, $dirs) = @_;
+
+ my $m2p;
+ {
+ my $search = $SEARCH_CLASS->new;
+ DEBUG and print "Searching via $search\n";
+ $search->verbose(1) if DEBUG > 10;
+ $search->progress( $self->progress->copy->goal(0) ) if $self->progress;
+ $search->shadows(0); # don't bother noting shadowed files
+ $search->inc( $dirs ? 0 : 1 );
+ $search->survey( $dirs ? @$dirs : () );
+ $m2p = $search->name2path;
+ die "What, no name2path?!" unless $m2p;
+ }
+
+ $self->muse("That's odd... no modules found!") unless keys %$m2p;
+ if( DEBUG > 4 ) {
+ print "Modules found (name => path):\n";
+ foreach my $m (sort {lc($a) cmp lc($b)} keys %$m2p) {
+ print " $m $$m2p{$m}\n";
+ }
+ print "(total ", scalar(keys %$m2p), ")\n\n";
+ } elsif( DEBUG ) {
+ print "Found ", scalar(keys %$m2p), " modules.\n";
+ }
+ $self->muse( "Found ", scalar(keys %$m2p), " modules." );
+
+ # return the Foo::Bar => /whatever/Foo/Bar.pod|pm hashref
+ return $m2p;
+}
+
+#===========================================================================
+
+sub _wopen {
+ # this is abstracted out so that the daemon class can override it
+ my($self, $outpath) = @_;
+ require Symbol;
+ my $out_fh = Symbol::gensym();
+ DEBUG > 5 and print "Write-opening to $outpath\n";
+ return $out_fh if open($out_fh, ">", "$outpath");
+ require Carp;
+ Carp::croak("Can't write-open $outpath: $!");
+}
+
+#==========================================================================
+
+sub add_css {
+ my($self, $url, $is_default, $name, $content_type, $media, $_code) = @_;
+ return unless $url;
+ unless($name) {
+ # cook up a reasonable name based on the URL
+ $name = $url;
+ if( $name !~ m/\?/ and $name =~ m{([^/]+)$}s ) {
+ $name = $1;
+ $name =~ s/\.css//i;
+ }
+ }
+ $media ||= 'all';
+ $content_type ||= 'text/css';
+
+ my $bunch = [$url, $name, $content_type, $media, $_code];
+ if($is_default) { unshift @{ $self->_css_wad }, $bunch }
+ else { push @{ $self->_css_wad }, $bunch }
+ return;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _spray_css {
+ my($self, $outdir) = @_;
+
+ return unless $self->css_flurry();
+ $self->_gen_css_wad();
+
+ my $lol = $self->_css_wad;
+ foreach my $chunk (@$lol) {
+ my $url = $chunk->[0];
+ my $outfile;
+ if( ref($chunk->[-1]) and $url =~ m{^(_[-a-z0-9_]+\.css$)} ) {
+ $outfile = $self->filespecsys->catfile( $outdir, "$1" );
+ DEBUG > 5 and print "Noting $$chunk[0] as a file I'll create.\n";
+ } else {
+ DEBUG > 5 and print "OK, noting $$chunk[0] as an external CSS.\n";
+ # Requires no further attention.
+ next;
+ }
+
+ #$self->muse( "Writing autogenerated CSS file $outfile" );
+ my $Cssout = $self->_wopen($outfile);
+ print $Cssout ${$chunk->[-1]}
+ or warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all";
+ close($Cssout);
+ DEBUG > 5 and print "Wrote $outfile\n";
+ }
+
+ return;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _css_wad_to_markup {
+ my($self, $depth) = @_;
+
+ my @css = @{ $self->_css_wad || return '' };
+ return '' unless @css;
+
+ my $rel = 'stylesheet';
+ my $out = '';
+
+ --$depth;
+ my $uplink = $depth ? ('../' x $depth) : '';
+
+ foreach my $chunk (@css) {
+ next unless $chunk and @$chunk;
+
+ my( $url1, $url2, $title, $type, $media) = (
+ $self->_maybe_uplink( $chunk->[0], $uplink ),
+ esc(grep !ref($_), @$chunk)
+ );
+
+ $out .= qq{ \n};
+
+ $rel = 'alternate stylesheet'; # alternates = all non-first iterations
+ }
+ return $out;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+sub _maybe_uplink {
+ # if the given URL looks relative, return the given uplink string --
+ # otherwise return emptystring
+ my($self, $url, $uplink) = @_;
+ ($url =~ m{^\./} or $url !~ m{[/\:]} )
+ ? $uplink
+ : ''
+ # qualify it, if/as needed
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+sub _gen_css_wad {
+ my $self = $_[0];
+ my $css_template = $self->_css_template;
+ foreach my $variation (
+
+ # Commented out for sake of concision:
+ #
+ # 011n=black_with_red_on_white
+ # 001n=black_with_yellow_on_white
+ # 101n=black_with_green_on_white
+ # 110=white_with_yellow_on_black
+ # 010=white_with_green_on_black
+ # 011=white_with_blue_on_black
+ # 100=white_with_red_on_black
+
+ qw[
+ 110n=black_with_blue_on_white
+ 010n=black_with_magenta_on_white
+ 100n=black_with_cyan_on_white
+
+ 101=white_with_purple_on_black
+ 001=white_with_navy_blue_on_black
+
+ 010a=grey_with_green_on_black
+ 010b=white_with_green_on_grey
+ 101an=black_with_green_on_grey
+ 101bn=grey_with_green_on_white
+ ]) {
+
+ my $outname = $variation;
+ my($flipmode, @swap) = ( ($4 || ''), $1,$2,$3)
+ if $outname =~ s/^([012])([012])([[012])([a-z]*)=?//s;
+ @swap = () if '010' eq join '', @swap; # 010 is a swop-no-op!
+
+ my $this_css =
+ "/* This file is autogenerated. Do not edit. $variation */\n\n"
+ . $css_template;
+
+ # Only look at three-digitty colors, for now at least.
+ if( $flipmode =~ m/n/ ) {
+ $this_css =~ s/(#[0-9a-fA-F]{3})\b/_color_negate($1)/eg;
+ $this_css =~ s/\bthin\b/medium/g;
+ }
+ $this_css =~ s<#([0-9a-fA-F])([0-9a-fA-F])([0-9a-fA-F])\b>
+ < join '', '#', ($1,$2,$3)[@swap] >eg if @swap;
+
+ if( $flipmode =~ m/a/)
+ { $this_css =~ s/#fff\b/#999/gi } # black -> dark grey
+ elsif($flipmode =~ m/b/)
+ { $this_css =~ s/#000\b/#666/gi } # white -> light grey
+
+ my $name = $outname;
+ $name =~ tr/-_/ /;
+ $self->add_css( "_$outname.css", 0, $name, 0, 0, \$this_css);
+ }
+
+ # Now a few indexless variations:
+ foreach my $variation (qw[
+ black_with_blue_on_white white_with_purple_on_black
+ white_with_green_on_grey grey_with_green_on_white
+ ]) {
+ my $outname = "indexless_$variation";
+ my $this_css = join "\n",
+ "/* This file is autogenerated. Do not edit. $outname */\n",
+ "\@import url(\"./_$variation.css\");",
+ ".indexgroup { display: none; }",
+ "\n",
+ ;
+ my $name = $outname;
+ $name =~ tr/-_/ /;
+ $self->add_css( "_$outname.css", 0, $name, 0, 0, \$this_css);
+ }
+
+ return;
+}
+
+sub _color_negate {
+ my $x = lc $_[0];
+ $x =~ tr[0123456789abcdef]
+ [fedcba9876543210];
+ return $x;
+}
+
+#===========================================================================
+
+sub add_javascript {
+ my($self, $url, $content_type, $_code) = @_;
+ return unless $url;
+ push @{ $self->_javascript_wad }, [
+ $url, $content_type || 'text/javascript', $_code
+ ];
+ return;
+}
+
+sub _spray_javascript {
+ my($self, $outdir) = @_;
+ return unless $self->javascript_flurry();
+ $self->_gen_javascript_wad();
+
+ my $lol = $self->_javascript_wad;
+ foreach my $script (@$lol) {
+ my $url = $script->[0];
+ my $outfile;
+
+ if( ref($script->[-1]) and $url =~ m{^(_[-a-z0-9_]+\.js$)} ) {
+ $outfile = $self->filespecsys->catfile( $outdir, "$1" );
+ DEBUG > 5 and print "Noting $$script[0] as a file I'll create.\n";
+ } else {
+ DEBUG > 5 and print "OK, noting $$script[0] as an external JavaScript.\n";
+ next;
+ }
+
+ #$self->muse( "Writing JavaScript file $outfile" );
+ my $Jsout = $self->_wopen($outfile);
+
+ print $Jsout ${$script->[-1]}
+ or warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all";
+ close($Jsout);
+ DEBUG > 5 and print "Wrote $outfile\n";
+ }
+
+ return;
+}
+
+sub _gen_javascript_wad {
+ my $self = $_[0];
+ my $js_code = $self->_javascript || return;
+ $self->add_javascript( "_podly.js", 0, \$js_code);
+ return;
+}
+
+sub _javascript_wad_to_markup {
+ my($self, $depth) = @_;
+
+ my @scripts = @{ $self->_javascript_wad || return '' };
+ return '' unless @scripts;
+
+ my $out = '';
+
+ --$depth;
+ my $uplink = $depth ? ('../' x $depth) : '';
+
+ foreach my $s (@scripts) {
+ next unless $s and @$s;
+
+ my( $url1, $url2, $type, $media) = (
+ $self->_maybe_uplink( $s->[0], $uplink ),
+ esc(grep !ref($_), @$s)
+ );
+
+ $out .= qq{\n};
+ }
+ return $out;
+}
+
+#===========================================================================
+
+sub _css_template { return $CSS }
+sub _javascript { return $JAVASCRIPT }
+
+$CSS = <<'EOCSS';
+/* For accessibility reasons, never specify text sizes in px/pt/pc/in/cm/mm */
+
+@media all { .hide { display: none; } }
+
+@media print {
+ .noprint, div.indexgroup, .backlinktop, .backlinkbottom { display: none }
+
+ * {
+ border-color: black !important;
+ color: black !important;
+ background-color: transparent !important;
+ background-image: none !important;
+ }
+
+ dl.superindex > dd {
+ word-spacing: .6em;
+ }
+}
+
+@media aural, braille, embossed {
+ div.indexgroup { display: none; } /* Too noisy, don't you think? */
+ dl.superindex > dt:before { content: "Group "; }
+ dl.superindex > dt:after { content: " contains:"; }
+ .backlinktop a:before { content: "Back to contents"; }
+ .backlinkbottom a:before { content: "Back to contents"; }
+}
+
+@media aural {
+ dl.superindex > dt { pause-before: 600ms; }
+}
+
+@media screen, tty, tv, projection {
+ .noscreen { display: none; }
+
+ a:link { color: #7070ff; text-decoration: underline; }
+ a:visited { color: #e030ff; text-decoration: underline; }
+ a:active { color: #800000; text-decoration: underline; }
+ body.contentspage a { text-decoration: none; }
+ a.u { color: #fff !important; text-decoration: none; }
+
+ body.pod {
+ margin: 0 5px;
+ color: #fff;
+ background-color: #000;
+ }
+
+ body.pod h1, body.pod h2, body.pod h3, body.pod h4 {
+ font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif;
+ font-weight: normal;
+ margin-top: 1.2em;
+ margin-bottom: .1em;
+ border-top: thin solid transparent;
+ /* margin-left: -5px; border-left: 2px #7070ff solid; padding-left: 3px; */
+ }
+
+ body.pod h1 { border-top-color: #0a0; }
+ body.pod h2 { border-top-color: #080; }
+ body.pod h3 { border-top-color: #040; }
+ body.pod h4 { border-top-color: #010; }
+
+ p.backlinktop + h1 { border-top: none; margin-top: 0em; }
+ p.backlinktop + h2 { border-top: none; margin-top: 0em; }
+ p.backlinktop + h3 { border-top: none; margin-top: 0em; }
+ p.backlinktop + h4 { border-top: none; margin-top: 0em; }
+
+ body.pod dt {
+ font-size: 105%; /* just a wee bit more than normal */
+ }
+
+ .indexgroup { font-size: 80%; }
+
+ .backlinktop, .backlinkbottom {
+ margin-left: -5px;
+ margin-right: -5px;
+ background-color: #040;
+ border-top: thin solid #050;
+ border-bottom: thin solid #050;
+ }
+
+ .backlinktop a, .backlinkbottom a {
+ text-decoration: none;
+ color: #080;
+ background-color: #000;
+ border: thin solid #0d0;
+ }
+ .backlinkbottom { margin-bottom: 0; padding-bottom: 0; }
+ .backlinktop { margin-top: 0; padding-top: 0; }
+
+ body.contentspage {
+ color: #fff;
+ background-color: #000;
+ }
+
+ body.contentspage h1 {
+ color: #0d0;
+ margin-left: 1em;
+ margin-right: 1em;
+ text-indent: -.9em;
+ font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif;
+ font-weight: normal;
+ border-top: thin solid #fff;
+ border-bottom: thin solid #fff;
+ text-align: center;
+ }
+
+ dl.superindex > dt {
+ font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif;
+ font-weight: normal;
+ font-size: 90%;
+ margin-top: .45em;
+ /* margin-bottom: -.15em; */
+ }
+ dl.superindex > dd {
+ word-spacing: .6em; /* most important rule here! */
+ }
+ dl.superindex > a:link {
+ text-decoration: none;
+ color: #fff;
+ }
+
+ .contentsfooty {
+ border-top: thin solid #999;
+ font-size: 90%;
+ }
+
+}
+
+/* The End */
+
+EOCSS
+
+#==========================================================================
+
+$JAVASCRIPT = <<'EOJAVASCRIPT';
+
+// From http://www.alistapart.com/articles/alternate/
+
+function setActiveStyleSheet(title) {
+ var i, a, main;
+ for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) {
+ if(a.getAttribute("rel").indexOf("style") != -1 && a.getAttribute("title")) {
+ a.disabled = true;
+ if(a.getAttribute("title") == title) a.disabled = false;
+ }
+ }
+}
+
+function getActiveStyleSheet() {
+ var i, a;
+ for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) {
+ if( a.getAttribute("rel").indexOf("style") != -1
+ && a.getAttribute("title")
+ && !a.disabled
+ ) return a.getAttribute("title");
+ }
+ return null;
+}
+
+function getPreferredStyleSheet() {
+ var i, a;
+ for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) {
+ if( a.getAttribute("rel").indexOf("style") != -1
+ && a.getAttribute("rel").indexOf("alt") == -1
+ && a.getAttribute("title")
+ ) return a.getAttribute("title");
+ }
+ return null;
+}
+
+function createCookie(name,value,days) {
+ if (days) {
+ var date = new Date();
+ date.setTime(date.getTime()+(days*24*60*60*1000));
+ var expires = "; expires="+date.toGMTString();
+ }
+ else expires = "";
+ document.cookie = name+"="+value+expires+"; path=/";
+}
+
+function readCookie(name) {
+ var nameEQ = name + "=";
+ var ca = document.cookie.split(';');
+ for(var i=0 ; i < ca.length ; i++) {
+ var c = ca[i];
+ while (c.charAt(0)==' ') c = c.substring(1,c.length);
+ if (c.indexOf(nameEQ) == 0) return c.substring(nameEQ.length,c.length);
+ }
+ return null;
+}
+
+window.onload = function(e) {
+ var cookie = readCookie("style");
+ var title = cookie ? cookie : getPreferredStyleSheet();
+ setActiveStyleSheet(title);
+}
+
+window.onunload = function(e) {
+ var title = getActiveStyleSheet();
+ createCookie("style", title, 365);
+}
+
+var cookie = readCookie("style");
+var title = cookie ? cookie : getPreferredStyleSheet();
+setActiveStyleSheet(title);
+
+// The End
+
+EOJAVASCRIPT
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+1;
+__END__
+
+
+=head1 NAME
+
+Pod::Simple::HTMLBatch - convert several Pod files to several HTML files
+
+=head1 SYNOPSIS
+
+ perl -MPod::Simple::HTMLBatch -e 'Pod::Simple::HTMLBatch::go' in out
+
+
+=head1 DESCRIPTION
+
+This module is used for running batch-conversions of a lot of HTML
+documents
+
+This class is NOT a subclass of Pod::Simple::HTML
+(nor of bad old Pod::Html) -- although it uses
+Pod::Simple::HTML for doing the conversion of each document.
+
+The normal use of this class is like so:
+
+ use Pod::Simple::HTMLBatch;
+ my $batchconv = Pod::Simple::HTMLBatch->new;
+ $batchconv->some_option( some_value );
+ $batchconv->some_other_option( some_other_value );
+ $batchconv->batch_convert( \@search_dirs, $output_dir );
+
+=head2 FROM THE COMMAND LINE
+
+Note that this class also provides
+(but does not export) the function Pod::Simple::HTMLBatch::go.
+This is basically just a shortcut for C<<
+Pod::Simple::HTMLBatch->batch_convert(@ARGV) >>.
+It's meant to be handy for calling from the command line.
+
+However, the shortcut requires that you specify exactly two command-line
+arguments, C and C.
+
+Example:
+
+ % mkdir out_html
+ % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go @INC out_html
+ (to convert the pod from Perl's @INC
+ files under the directory ../htmlversion)
+
+(Note that the command line there contains a literal atsign-I-N-C. This
+is handled as a special case by batch_convert, in order to save you having
+to enter the odd-looking "" as the first command-line parameter when you
+mean "just use whatever's in @INC".)
+
+Example:
+
+ % mkdir ../seekrut
+ % chmod og-rx ../seekrut
+ % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go . ../htmlversion
+ (to convert the pod under the current dir into HTML
+ files under the directory ../htmlversion)
+
+Example:
+
+ % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go happydocs .
+ (to convert all pod from happydocs into the current directory)
+
+
+
+=head1 MAIN METHODS
+
+=over
+
+=item $batchconv = Pod::Simple::HTMLBatch->new;
+
+This TODO
+
+
+=item $batchconv->batch_convert( I, I );
+
+this TODO
+
+=item $batchconv->batch_convert( undef , ...);
+
+=item $batchconv->batch_convert( q{@INC}, ...);
+
+These two values for I specify that the normal Perl @INC
+
+=item $batchconv->batch_convert( \@dirs , ...);
+
+This specifies that the input directories are the items in
+the arrayref C<\@dirs>.
+
+=item $batchconv->batch_convert( "somedir" , ...);
+
+This specifies that the director "somedir" is the input.
+(This can be an absolute or relative path, it doesn't matter.)
+
+A common value you might want would be just "." for the current
+directory:
+
+ $batchconv->batch_convert( "." , ...);
+
+
+=item $batchconv->batch_convert( 'somedir:someother:also' , ...);
+
+This specifies that you want the dirs "somedir", "somother", and "also"
+scanned, just as if you'd passed the arrayref
+C<[qw( somedir someother also)]>. Note that a ":"-separator is normal
+under Unix, but Under MSWin, you'll need C<'somedir;someother;also'>
+instead, since the pathsep on MSWin is ";" instead of ":". (And
+I is because ":" often comes up in paths, like
+C<"c:/perl/lib">.)
+
+(Exactly what separator character should be used, is gotten from
+C<$Config::Config{'path_sep'}>, via the L module.)
+
+=item $batchconv->batch_convert( ... , undef );
+
+This specifies that you want the HTML output to go into the current
+directory.
+
+(Note that a missing or undefined value means a different thing in
+the first slot than in the second. That's so that C
+with no arguments (or undef arguments) means "go from @INC, into
+the current directory.)
+
+=item $batchconv->batch_convert( ... , 'somedir' );
+
+This specifies that you want the HTML output to go into the
+directory 'somedir'.
+(This can be an absolute or relative path, it doesn't matter.)
+
+=back
+
+
+Note that you can also call C as a class method,
+like so:
+
+ Pod::Simple::HTMLBatch->batch_convert( ... );
+
+That is just short for this:
+
+ Pod::Simple::HTMLBatch-> new-> batch_convert(...);
+
+That is, it runs a conversion with default options, for
+whatever inputdirs and output dir you specify.
+
+
+=head2 ACCESSOR METHODS
+
+The following are all accessor methods -- that is, they don't do anything
+on their own, but just alter the contents of the conversion object,
+which comprises the options for this particular batch conversion.
+
+We show the "put" form of the accessors below (i.e., the syntax you use
+for setting the accessor to a specific value). But you can also
+call each method with no parameters to get its current value. For
+example, C<< $self->contents_file() >> returns the current value of
+the contents_file attribute.
+
+=over
+
+
+=item $batchconv->verbose( I );
+
+This controls how verbose to be during batch conversion, as far as
+notes to STDOUT (or whatever is C'd) about how the conversion
+is going. If 0, no progress information is printed.
+If 1 (the default value), some progress information is printed.
+Higher values print more information.
+
+
+=item $batchconv->index( I );
+
+This controls whether or not each HTML page is liable to have a little
+table of contents at the top (which we call an "index" for historical
+reasons). This is true by default.
+
+
+=item $batchconv->contents_file( I );
+
+If set, should be the name of a file (in the output directory)
+to write the HTML index to. The default value is "index.html".
+If you set this to a false value, no contents file will be written.
+
+=item $batchconv->contents_page_start( I );
+
+This specifies what string should be put at the beginning of
+the contents page.
+The default is a string more or less like this:
+
+
+ Perl Documentation
+
+ Perl Documentation
+
+=item $batchconv->contents_page_end( I );
+
+This specifies what string should be put at the end of the contents page.
+The default is a string more or less like this:
+
+
+
+
+
+=item $batchconv->add_css( $url );
+
+TODO
+
+=item $batchconv->add_javascript( $url );
+
+TODO
+
+=item $batchconv->css_flurry( I );
+
+If true (the default value), we autogenerate some CSS files in the
+output directory, and set our HTML files to use those.
+TODO: continue
+
+=item $batchconv->javascript_flurry( I );
+
+If true (the default value), we autogenerate a JavaScript in the
+output directory, and set our HTML files to use it. Currently,
+the JavaScript is used only to get the browser to remember what
+stylesheet it prefers.
+TODO: continue
+
+=item $batchconv->no_contents_links( I );
+
+TODO
+
+=item $batchconv->html_render_class( I );
+
+This sets what class is used for rendering the files.
+The default is "Pod::Simple::Search". If you set it to something else,
+it should probably be a subclass of Pod::Simple::Search, and you should
+C or C that class so that's it's loaded before
+Pod::Simple::HTMLBatch tries loading it.
+
+=back
+
+
+
+
+=head1 NOTES ON CUSTOMIZATION
+
+TODO
+
+ call add_css($someurl) to add stylesheet as alternate
+ call add_css($someurl,1) to add as primary stylesheet
+
+ call add_javascript
+
+ subclass Pod::Simple::HTML and set $batchconv->html_render_class to
+ that classname
+ and maybe override
+ $page->batch_mode_page_object_init($self, $module, $infile, $outfile, $depth)
+ or maybe override
+ $batchconv->batch_mode_page_object_init($page, $module, $infile, $outfile, $depth)
+
+
+
+=head1 ASK ME!
+
+If you want to do some kind of big pod-to-HTML version with some
+particular kind of option that you don't see how to achieve using this
+module, email me (C) and I'll probably have a good idea
+how to do it. For reasons of concision and energetic laziness, some
+methods and options in this module (and the dozen modules it depends on)
+are undocumented; but one of those undocumented bits might be just what
+you're looking for.
+
+
+=head1 SEE ALSO
+
+L, L, L, L
+
+
+
+
+=head1 COPYRIGHT AND DISCLAIMERS
+
+Copyright (c) 2004 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C
+
+=cut
+
+
+
--- /dev/null 2009-01-07 14:55:01.000000000 +1100
+++ lib/Pod/Simple/HTMLLegacy.pm 2009-01-07 13:36:39.000000000 +1100
@@ -0,0 +1,104 @@
+
+require 5;
+package Pod::Simple::HTMLLegacy;
+use strict;
+
+use vars qw($VERSION);
+use Getopt::Long;
+
+$VERSION = "5.01";
+
+#--------------------------------------------------------------------------
+#
+# This class is meant to thinly emulate bad old Pod::Html
+#
+# TODO: some basic docs
+
+sub pod2html {
+ my @args = (@_);
+
+ my( $verbose, $infile, $outfile, $title );
+ my $index = 1;
+
+ {
+ my($help);
+
+ my($netscape); # dummy
+ local @ARGV = @args;
+ GetOptions(
+ "help" => \$help,
+ "verbose!" => \$verbose,
+ "infile=s" => \$infile,
+ "outfile=s" => \$outfile,
+ "title=s" => \$title,
+ "index!" => \$index,
+
+ "netscape!" => \$netscape,
+ ) or return bad_opts(@args);
+ bad_opts(@args) if @ARGV; # it should be all switches!
+ return help_message() if $help;
+ }
+
+ for($infile, $outfile) { $_ = undef unless defined and length }
+
+ if($verbose) {
+ warn sprintf "%s version %s\n", __PACKAGE__, $VERSION;
+ warn "OK, processed args [@args] ...\n";
+ warn sprintf
+ " Verbose: %s\n Index: %s\n Infile: %s\n Outfile: %s\n Title: %s\n",
+ map defined($_) ? $_ : "(nil)",
+ $verbose, $index, $infile, $outfile, $title,
+ ;
+ *Pod::Simple::HTML::DEBUG = sub(){1};
+ }
+ require Pod::Simple::HTML;
+ Pod::Simple::HTML->VERSION(3);
+
+ die "No such input file as $infile\n"
+ if defined $infile and ! -e $infile;
+
+
+ my $pod = Pod::Simple::HTML->new;
+ $pod->force_title($title) if defined $title;
+ $pod->index($index);
+ return $pod->parse_from_file($infile, $outfile);
+}
+
+#--------------------------------------------------------------------------
+
+sub bad_opts { die _help_message(); }
+sub help_message { print STDOUT _help_message() }
+
+#--------------------------------------------------------------------------
+
+sub _help_message {
+
+ join '',
+
+"[", __PACKAGE__, " version ", $VERSION, qq~]
+Usage: pod2html --help --infile= --outfile=
+ --verbose --index --noindex
+
+Options:
+ --help - prints this message.
+ --[no]index - generate an index at the top of the resulting html
+ (default behavior).
+ --infile - filename for the pod to convert (input taken from stdin
+ by default).
+ --outfile - filename for the resulting html file (output sent to
+ stdout by default).
+ --title - title that will appear in resulting html file.
+ --[no]verbose - self-explanatory (off by default).
+
+Note that pod2html is DEPRECATED, and this version implements only
+ some of the options known to older versions.
+For more information, see 'perldoc pod2html'.
+~;
+
+}
+
+1;
+__END__
+
+OVER the underpass! UNDER the overpass! Around the FUTURE and BEYOND REPAIR!!
+
--- /dev/null 2009-01-07 14:55:01.000000000 +1100
+++ lib/Pod/Simple/XHTML.pm 2009-01-07 13:36:39.000000000 +1100
@@ -0,0 +1,400 @@
+=pod
+
+=head1 NAME
+
+Pod::Simple::XHTML -- format Pod as validating XHTML
+
+=head1 SYNOPSIS
+
+ use Pod::Simple::XHTML;
+
+ my $parser = Pod::Simple::XHTML->new();
+
+ ...
+
+ $parser->parse_file('path/to/file.pod');
+
+=head1 DESCRIPTION
+
+This class is a formatter that takes Pod and renders it as XHTML
+validating HTML.
+
+This is a subclass of L and inherits all its
+methods. The implementation is entirely different than
+L, but it largely preserves the same interface.
+
+=cut
+
+package Pod::Simple::XHTML;
+use strict;
+use vars qw( $VERSION @ISA $HAS_HTML_ENTITIES );
+$VERSION = '3.04';
+use Carp ();
+use Pod::Simple::Methody ();
+@ISA = ('Pod::Simple::Methody');
+
+BEGIN {
+ $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
+}
+
+my %entities = (
+ q{>} => 'gt',
+ q{<} => 'lt',
+ q{'} => '#39',
+ q{"} => 'quot',
+ q{&} => 'amp',
+);
+
+sub encode_entities {
+ return HTML::Entities::encode_entities( $_[0] ) if $HAS_HTML_ENTITIES;
+ my $str = $_[0];
+ my $ents = join '', keys %entities;
+ $str =~ s/([$ents])/'&' . $entities{$1} . ';'/ge;
+ return $str;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+=head1 METHODS
+
+Pod::Simple::XHTML offers a number of methods that modify the format of
+the HTML output. Call these after creating the parser object, but before
+the call to C:
+
+ my $parser = Pod::PseudoPod::HTML->new();
+ $parser->set_optional_param("value");
+ $parser->parse_file($file);
+
+=head2 perldoc_url_prefix
+
+In turning L into http://whatever/Foo%3a%3aBar, what
+to put before the "Foo%3a%3aBar". The default value is
+"http://search.cpan.org/perldoc?".
+
+=head2 perldoc_url_postfix
+
+What to put after "Foo%3a%3aBar" in the URL. This option is not set by
+default.
+
+=head2 title_prefix, title_postfix
+
+What to put before and after the title in the head. The values should
+already be &-escaped.
+
+=head2 html_css
+
+ $parser->html_css('path/to/style.css');
+
+The URL or relative path of a CSS file to include. This option is not
+set by default.
+
+=head2 html_javascript
+
+The URL or relative path of a JavaScript file to pull in. This option is
+not set by default.
+
+=head2 html_doctype
+
+A document type tag for the file. This option is not set by default.
+
+=head2 html_header_tags
+
+Additional arbitrary HTML tags for the header of the document. The
+default value is just a content type header tag:
+
+
+
+Add additional meta tags here, or blocks of inline CSS or JavaScript
+(wrapped in the appropriate tags).
+
+=head2 default_title
+
+Set a default title for the page if no title can be determined from the
+content. The value of this string should already be &-escaped.
+
+=head2 force_title
+
+Force a title for the page (don't try to determine it from the content).
+The value of this string should already be &-escaped.
+
+=head2 html_header, html_footer
+
+Set the HTML output at the beginning and end of each file. The default
+header includes a title, a doctype tag (if C is set), a
+content tag (customized by C), a tag for a CSS file
+(if C is set), and a tag for a Javascript file (if
+C is set). The default footer simply closes the C
+and C tags.
+
+The options listed above customize parts of the default header, but
+setting C or C completely overrides the
+built-in header or footer. These may be useful if you want to use
+template tags instead of literal HTML headers and footers or are
+integrating converted POD pages in a larger website.
+
+If you want no headers or footers output in the HTML, set these options
+to the empty string.
+
+=head2 index
+
+TODO -- Not implemented.
+
+Whether to add a table-of-contents at the top of each page (called an
+index for the sake of tradition).
+
+
+=cut
+
+__PACKAGE__->_accessorize(
+ 'perldoc_url_prefix',
+ 'perldoc_url_postfix',
+ 'title_prefix', 'title_postfix',
+ 'html_css',
+ 'html_javascript',
+ 'html_doctype',
+ 'html_header_tags',
+ 'title', # Used internally for the title extracted from the content
+ 'default_title',
+ 'force_title',
+ 'html_header',
+ 'html_footer',
+ 'index',
+ 'batch_mode', # whether we're in batch mode
+ 'batch_mode_current_level',
+ # When in batch mode, how deep the current module is: 1 for "LWP",
+ # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
+);
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+=head1 SUBCLASSING
+
+If the standard options aren't enough, you may want to subclass
+Pod::Simple::XHMTL. These are the most likely candidates for methods
+you'll want to override when subclassing.
+
+=cut
+
+sub new {
+ my $self = shift;
+ my $new = $self->SUPER::new(@_);
+ $new->{'output_fh'} ||= *STDOUT{IO};
+ $new->accept_targets( 'html', 'HTML' );
+ $new->perldoc_url_prefix('http://search.cpan.org/perldoc?');
+ $new->html_header_tags(' ');
+ $new->nix_X_codes(1);
+ $new->codes_in_verbatim(1);
+ $new->{'scratch'} = '';
+ return $new;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+=head2 handle_text
+
+This method handles the body of text within any element: it's the body
+of a paragraph, or everything between a "=begin" tag and the
+corresponding "=end" tag, or the text within an L entity, etc. You would
+want to override this if you are adding a custom element type that does
+more than just display formatted text. Perhaps adding a way to generate
+HTML tables from an extended version of POD.
+
+So, let's say you want add a custom element called 'foo'. In your
+subclass's C method, after calling C you'd call:
+
+ $new->accept_targets_as_text( 'foo' );
+
+Then override the C method in the subclass to check for when
+"$flags->{'target'}" is equal to 'foo' and set a flag that marks that
+you're in a foo block (maybe "$self->{'in_foo'} = 1"). Then override the
+C method to check for the flag, and pass $text to your
+custom subroutine to construct the HTML output for 'foo' elements,
+something like:
+
+ sub handle_text {
+ my ($self, $text) = @_;
+ if ($self->{'in_foo'}) {
+ $self->{'scratch'} .= build_foo_html($text);
+ } else {
+ $self->{'scratch'} .= $text;
+ }
+ }
+
+=cut
+
+sub handle_text {
+ # escape special characters in HTML (<, >, &, etc)
+ $_[0]{'scratch'} .= $_[0]{'in_verbatim'} ? encode_entities( $_[1] ) : $_[1]
+}
+
+sub start_Para { $_[0]{'scratch'} = '' }
+sub start_Verbatim { $_[0]{'scratch'} = '
'; $_[0]{'in_verbatim'} = 1}
+
+sub start_head1 { $_[0]{'scratch'} = '' }
+sub start_head2 { $_[0]{'scratch'} = '' }
+sub start_head3 { $_[0]{'scratch'} = '' }
+sub start_head4 { $_[0]{'scratch'} = '' }
+
+sub start_item_bullet { $_[0]{'scratch'} = ' ' }
+sub start_item_number { $_[0]{'scratch'} = " $_[1]{'number'}. " }
+sub start_item_text { $_[0]{'scratch'} = ' ' }
+
+sub start_over_bullet { $_[0]{'scratch'} = ''; $_[0]->emit }
+sub start_over_text { $_[0]{'scratch'} = ''; $_[0]->emit }
+sub start_over_block { $_[0]{'scratch'} = ''; $_[0]->emit }
+sub start_over_number { $_[0]{'scratch'} = ''; $_[0]->emit }
+
+sub end_over_bullet { $_[0]{'scratch'} .= ' '; $_[0]->emit }
+sub end_over_text { $_[0]{'scratch'} .= ' '; $_[0]->emit }
+sub end_over_block { $_[0]{'scratch'} .= ' '; $_[0]->emit }
+sub end_over_number { $_[0]{'scratch'} .= ''; $_[0]->emit }
+
+# . . . . . Now the actual formatters:
+
+sub end_Para { $_[0]{'scratch'} .= '
'; $_[0]->emit }
+sub end_Verbatim {
+ $_[0]{'scratch'} .= '';
+ $_[0]{'in_verbatim'} = 0;
+ $_[0]->emit;
+}
+
+sub end_head1 { $_[0]{'scratch'} .= ''; $_[0]->emit }
+sub end_head2 { $_[0]{'scratch'} .= ''; $_[0]->emit }
+sub end_head3 { $_[0]{'scratch'} .= ''; $_[0]->emit }
+sub end_head4 { $_[0]{'scratch'} .= ''; $_[0]->emit }
+
+sub end_item_bullet { $_[0]{'scratch'} .= ''; $_[0]->emit }
+sub end_item_number { $_[0]{'scratch'} .= ''; $_[0]->emit }
+sub end_item_text { $_[0]->emit }
+
+# This handles =begin and =for blocks of all kinds.
+sub start_for {
+ my ($self, $flags) = @_;
+ $self->{'scratch'} .= '{'scratch'} .= ' class="'.$flags->{'target'}.'"' if ($flags->{'target'});
+ $self->{'scratch'} .= '>';
+ $self->emit;
+
+}
+sub end_for {
+ my ($self) = @_;
+ $self->{'scratch'} .= '
';
+ $self->emit;
+}
+
+sub start_Document {
+ my ($self) = @_;
+ if (defined $self->html_header) {
+ $self->{'scratch'} .= $self->html_header;
+ $self->emit unless $self->html_header eq "";
+ } else {
+ my ($doctype, $title, $metatags);
+ $doctype = $self->html_doctype || '';
+ $title = $self->force_title || $self->title || $self->default_title || '';
+ $metatags = $self->html_header_tags || '';
+ if ($self->html_css) {
+ $metatags .= "\n ";
+ }
+ if ($self->html_javascript) {
+ $metatags .= "\n";
+ }
+ $self->{'scratch'} .= <<"HTML";
+$doctype
+
+
+$title
+$metatags
+
+
+HTML
+ $self->emit;
+ }
+}
+
+sub end_Document {
+ my ($self) = @_;
+ if (defined $self->html_footer) {
+ $self->{'scratch'} .= $self->html_footer;
+ $self->emit unless $self->html_footer eq "";
+ } else {
+ $self->{'scratch'} .= "\n";
+ $self->emit;
+ }
+}
+
+# Handling code tags
+sub start_B { $_[0]{'scratch'} .= '' }
+sub end_B { $_[0]{'scratch'} .= ' ' }
+
+sub start_C { $_[0]{'scratch'} .= '' }
+sub end_C { $_[0]{'scratch'} .= '
' }
+
+sub start_E { $_[0]{'scratch'} .= '&' }
+sub end_E { $_[0]{'scratch'} .= ';' }
+
+sub start_F { $_[0]{'scratch'} .= '' }
+sub end_F { $_[0]{'scratch'} .= ' ' }
+
+sub start_I { $_[0]{'scratch'} .= '' }
+sub end_I { $_[0]{'scratch'} .= ' ' }
+
+sub start_L {
+ my ($self, $flags) = @_;
+ my $url;
+ if ($flags->{'type'} eq 'url') {
+ $url = $flags->{'to'};
+ } elsif ($flags->{'type'} eq 'pod') {
+ $url .= $self->perldoc_url_prefix || '';
+ $url .= $flags->{'to'} || '';
+ $url .= '/' . $flags->{'section'} if ($flags->{'section'});
+ $url .= $self->perldoc_url_postfix || '';
+# require Data::Dumper;
+# print STDERR Data::Dumper->Dump([$flags]);
+ }
+
+ $self->{'scratch'} .= '';
+}
+sub end_L { $_[0]{'scratch'} .= ' ' }
+
+sub start_S { $_[0]{'scratch'} .= '' }
+sub end_S { $_[0]{'scratch'} .= ' ' }
+
+sub emit {
+ my($self) = @_;
+ my $out = $self->{'scratch'} . "\n";
+ print {$self->{'output_fh'}} $out, "\n";
+ $self->{'scratch'} = '';
+ return;
+}
+
+# Bypass built-in E<> handling to preserve entity encoding
+sub _treat_Es {}
+
+1;
+
+__END__
+
+=head1 SEE ALSO
+
+L, L
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003-2005 Allison Randal.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself. The full text of the license
+can be found in the LICENSE file included with this module.
+
+This library is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Allison Randal
+
+=cut
+