summaryrefslogtreecommitdiffstats
path: root/emacs.d/nxhtml/nxhtml/html-chklnk
diff options
context:
space:
mode:
Diffstat (limited to 'emacs.d/nxhtml/nxhtml/html-chklnk')
-rw-r--r--emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/LinkWalker.pm774
-rw-r--r--emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/ParserTagEnd.pm448
-rw-r--r--emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/datadir.txt1
-rw-r--r--emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/PathSubs.pm207
-rw-r--r--emacs.d/nxhtml/nxhtml/html-chklnk/link_checker.pl328
5 files changed, 1758 insertions, 0 deletions
diff --git a/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/LinkWalker.pm b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/LinkWalker.pm
new file mode 100644
index 0000000..14b0ccb
--- /dev/null
+++ b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/LinkWalker.pm
@@ -0,0 +1,774 @@
+### File: LinkWalker.pm
+### Author: Lennart Borgman
+### All rights reserved
+
+##########################################################
+### UserAgent module
+##########################################################
+package LWP::WalkerUA;
+require LWP::UserAgent;
+@ISA = qw(LWP::UserAgent);
+
+### Mirror to another file (why???)
+sub mirror
+{
+ my($self, $url, $file, $mirr_tmp) = @_;
+ die "no mirr_tmp" unless defined $mirr_tmp;
+
+ LWP::Debug::trace('()');
+ my $request = new HTTP::Request('GET', $url);
+
+ if (-e $file) {
+ my($mtime) = (stat($file))[9];
+ if($mtime) {
+ $request->header('If-Modified-Since' =>
+ HTTP::Date::time2str($mtime));
+ }
+ }
+ my $tmpfile = "$file-$$";
+
+ my $response = $self->request($request, $tmpfile);
+ if ($response->is_success) {
+
+ my $file_length = (stat($tmpfile))[7];
+ my($content_length) = $response->header('Content-length');
+
+ if (defined $content_length and $file_length < $content_length) {
+ unlink($tmpfile);
+ die "Transfer truncated: " .
+ "only $file_length out of $content_length bytes received\n";
+ } elsif (defined $content_length and $file_length > $content_length) {
+ unlink($tmpfile);
+ die "Content-length mismatch: " .
+ "expected $content_length bytes, got $file_length\n";
+ } else {
+ # OK
+ if (-e $mirr_tmp) {
+ # Some dosish systems fail to rename if the target exists
+ chmod 0777, $mirr_tmp;
+ unlink $mirr_tmp;
+ }
+ rename($tmpfile, $mirr_tmp) or
+ die "Cannot rename '$tmpfile' to '$mirr_tmp': $!\n";
+
+ if (my $lm = $response->last_modified) {
+ # make sure the file has the same last modification time
+ utime $lm, $lm, $mirr_tmp;
+ }
+ }
+ } else {
+ unlink($tmpfile);
+ }
+ return $response;
+}
+
+
+##########################################################
+### Parser module
+##########################################################
+package HTML::WalkerParser;
+require HTML::ParserTagEnd;
+@ISA = qw(HTML::ParserTagEnd);
+use strict;
+use vars qw(%LINK_ELEMENT);
+
+# Elements that might contain links and the name of the link attribute
+%LINK_ELEMENT =
+(
+ body => 'background',
+ base => 'href',
+ a => 'href',
+ img => [qw(src lowsrc usemap)], # 'lowsrc' is a Netscape invention
+ form => 'action',
+ input => 'src',
+'link' => 'href', # need quoting since link is a perl builtin
+ frame => 'src',
+ applet => [qw(codebase code)],
+ area => 'href',
+ iframe => 'src', # Netscape 2.0 extention
+ embed => 'src', # used in Netscape 2.0 for Shockwave and things like that
+);
+
+my %LINKATTRIBS = (
+ "href" => 1,
+ "src" => 1,
+ "action" => 1,
+ "background" => 1,
+ "usemap" => 1,
+ "code" => 1,
+ "codebase" => 1,
+ "lowsrc" => 1,
+ );
+my %MAYBECONT = (
+ a => 'href',
+ area => 'href',
+ form => 'action',
+ frame => 'src',
+ iframe => 'src',
+ );
+
+sub maybecont($$) {
+ my $tag = shift;
+ my $att = shift;
+ return unless exists $MAYBECONT{$tag};
+ return ($MAYBECONT{$tag} eq $att);
+}
+
+sub new {
+ my($class, $parsed_fh) = @_;
+ my $self = $class->SUPER::new;
+ $self->{parsed_fh} = $parsed_fh;
+ $self;
+}
+
+
+
+
+
+
+
+##########################################################
+### Walker module
+##########################################################
+package HTML::LinkWalker;
+use strict;
+
+use IO::File;
+use File::Copy qw();
+use File::Path qw();
+use PathSubs qw();
+use HTML::Entities;
+use FindBin qw();
+
+
+##########################################################
+### Globals
+##########################################################
+my $ua;
+my $m_ua_personality = "LinkWalker/0.9";
+my %m_is_outside;
+my %m_is_container;
+my $m_bOnlyCont;
+my @m_sLinkRoots;
+my $m_subReport;
+my $m_subAction;
+my $m_subMirrorAction;
+
+
+#############################
+### Collecting info
+#############################
+my %m_CheckedLinks;
+my %m_MissedLinks;
+
+sub tell_bad_link($$$$$) {
+ my $what = shift;
+ my $file = shift;
+ my $lnum = shift;
+ my $link = shift;
+ my $line = shift;
+ $file = "START" unless defined $file;
+ $lnum = "(start)" unless defined $lnum;
+ my $longMsg = "<<$what>>";
+ my $shortMsg = $what;
+ if (defined $link) {
+ my @lines = split("\\s+", $line);
+ my $disp_line = join("\n\t\t ", @lines);
+ $longMsg .= ",\n\t\tlink=$link\n\t\t$disp_line";
+ }
+ my @msg = ($shortMsg, $longMsg);
+ $m_CheckedLinks{$file}->{ERR}->{$lnum} = \@msg;
+ &$m_subReport("\t* Error * " . $what . "\n");
+} # tell_bad_link
+
+
+#############################
+### Helpers
+#############################
+
+sub get_contenttype($) {
+ my $response = shift;
+ my @rh = $response->header("Content-Type");
+ for my $r (@rh) {
+ my $c = $r;
+ if ((my $iPos = index($r, ";")) > -1) {
+ $c = substr($r, 0, $iPos);
+ }
+ return $c;
+ }
+}
+sub is_linked_contenttype($) {
+ my $response = shift;
+ return (get_contenttype($response) eq "text/html");
+}
+
+sub ending_is_container($) {
+ my $link_addr = shift;
+ $link_addr =~ s!#.*$!!;
+ $link_addr =~ s!\?.*$!!;
+ return (($link_addr =~ m!\.s?html?$!i) ? 1 : 0);
+}
+
+my $m_sMirrorRoot;
+my $m_bMirror = 1;
+
+sub mk_mirror_name($) {
+ my $orig_name = shift;
+ $orig_name =~ tr!\\!/!;
+ my $mirr_name = $orig_name;
+ my ($orig_host) = ($orig_name =~ m!(^https?://[^/]*)!i);
+ if (defined $orig_host) {
+ my $host = $orig_host;
+ $host =~ tr!:!_!;
+ $host =~ tr!/!_!;
+ $mirr_name =~ s!^$orig_host!$host!;
+ if (substr($mirr_name, -1) eq "/") { $mirr_name .= "default.html"; }
+ } else {
+ die "Can't find host in $orig_name\n";
+ }
+ my $mirr_full = sMirrorRoot() . $mirr_name;
+ if (!$m_bMirror) {
+ my $sExt = $mirr_name; $sExt =~ s!.*\.([^\.]*$)!$1!;
+ $mirr_full = sMirrorRoot() . "temp.$sExt";
+ }
+ my $mirr_fold = $mirr_full;
+ $mirr_fold =~ s![^/]*$!!;
+ File::Path::mkpath($mirr_fold, 0, 0777);
+ return $mirr_full;
+}
+
+#############################
+### Checks
+#############################
+sub is_outside($) {
+ my $uq_link_addr = shift;
+ if (!exists $m_is_outside{$uq_link_addr}) {
+ $m_is_outside{$uq_link_addr} = test_is_outside($uq_link_addr, \@m_sLinkRoots);
+ }
+ return $m_is_outside{$uq_link_addr};
+}
+sub set_is_container($$) {
+ my $uq_link_addr = shift;
+ return if exists $m_is_container{$uq_link_addr};
+ $m_is_container{$uq_link_addr} = shift;
+}
+sub is_outside_container($) {
+ my $uq_link_addr = shift;
+ if (exists $m_is_container{$uq_link_addr}) {
+ if ($m_is_container{$uq_link_addr}) {
+ return is_outside($uq_link_addr);
+ }
+ }
+}
+sub test_is_outside($$) {
+ my $uq_link_addr = shift;
+ my $link_roots = shift;
+ if (defined $link_roots) {
+ my $in_roots;
+ for my $link_root (@$link_roots) {
+ if (substr($uq_link_addr, 0, length($link_root)) eq $link_root) {
+ return 0;
+ }
+ }
+ return 1;
+ }
+} # is_outside
+
+
+
+##########################################################
+### Parsing
+##########################################################
+
+
+### Parser subs
+sub HTML::WalkerParser::declaration {
+ my($self, $decl) = @_;
+ return unless defined $self->{parsed_fh};
+ my $fh = $self->{parsed_fh};
+ print $fh "<!" . $decl . ">";
+}
+my $m_start_cb;
+sub HTML::WalkerParser::start {
+ my($self, $tag, $attr, $ended) = @_;
+ &$m_start_cb($tag, $attr);
+ return unless defined $self->{parsed_fh};
+ my $t = "<$tag";
+ for my $k (keys %$attr) {
+ my $encoded = encode_entities($$attr{$k});
+ $t .= qq( $k="$encoded");
+ }
+ if ($ended) {
+ $t .= " />";
+ } else {
+ $t .= ">";
+ }
+ my $fh = $self->{parsed_fh};
+ print $fh $t;
+}
+sub HTML::WalkerParser::end {
+ my ($self, $tag) = @_;
+ return unless defined $self->{parsed_fh};
+ my $fh = $self->{parsed_fh};
+ print $fh "</" . $tag . ">";
+}
+sub HTML::WalkerParser::text {
+ my ($self, $txt) = @_;
+ return unless defined $self->{parsed_fh};
+ my $fh = $self->{parsed_fh};
+ print $fh $txt;
+}
+sub HTML::WalkerParser::comment {
+ my($self, $comment) = @_;
+ return unless defined $self->{parsed_fh};
+ my $fh = $self->{parsed_fh};
+ print $fh "<!--" . $comment . "-->";
+}
+
+
+
+
+### Main parsing routine
+
+sub parse_file($$$$$$$$$) {
+ my ($file_name, $parsed_fh, $uq_link_addr, $link_roots,
+ $ref_links, $ref_anchs, $ref_lines, $ref_tagname, $ref_attname) = @_;
+ my $fh;
+ if (-d $file_name) {
+ $file_name = PathSubs::uniq_dir($file_name) . "default.html";
+ $uq_link_addr .= "/" unless substr($uq_link_addr, -1) eq "/";
+ $uq_link_addr .= "default.html";
+ &$m_subReport("dir => $file_name\n");
+ }
+ $fh = new IO::File($file_name);
+ die "Can't read $file_name: $!\n" unless defined $fh;
+ my $base_href;
+ my $n;
+ my $line;
+ my $uq_link_fold = $uq_link_addr; $uq_link_fold =~ s![^/]*$!!;
+
+ my $start_cb =
+ sub {
+ my ($tag, $attr_hash) = @_;
+ for my $k (keys %$attr_hash) {
+ if (($k eq "id") || ($k eq "name")) {
+ my $v = $$attr_hash{$k};
+ $$ref_anchs{$v} = $n;
+ $$ref_lines{$n} = $line;
+ } elsif (exists $LINKATTRIBS{$k}) {
+ my $v = $$attr_hash{$k};
+ next if $v =~ m!^javascript:!;
+ next if $v =~ m!^ftp://!;
+ next if $v =~ m!^mailto://!;
+ if ($tag eq "base") { $base_href = $v if $k eq "href"; next; }
+ my $v_abs; my $v_rel;
+ my $v_is_abs = PathSubs::is_abs_path($v);
+ if ($v_is_abs) {
+ $v_abs = $v;
+ $v_rel = PathSubs::mk_relative_link($uq_link_addr, $v_abs);
+ } else {
+ $v_rel = $v;
+ if (defined $base_href) {
+ $v_abs = PathSubs::mk_abs_link($base_href, $v);
+ } else {
+ if (substr($v_rel, 0, 1) ne "#") {
+ $v_abs = $uq_link_fold . $v_rel;
+ } else {
+ $v_abs = $uq_link_addr . $v_rel;
+ }
+ $v_abs = PathSubs::resolve_dotdot($v_abs);
+ }
+ }
+ next if exists $m_CheckedLinks{$v_abs};
+ if (is_outside($v_abs)) {
+ if (!$v_is_abs) {
+ if (ending_is_container($v_abs)) {
+ $m_CheckedLinks{$v_abs} = {};
+ tell_bad_link("Outside relative link ($v_rel)",
+ $uq_link_addr, $n, $v, $line);
+ }
+ }
+ ### Skip outside absolute links
+ ### Could be things like banners etc...
+ next;
+ }
+ $$ref_links{$v_rel} = $n;
+ $$ref_lines{$n} = $line;
+ if (substr($v_rel, 0, 1) ne "#") {
+ my $v_rel_name = $v_rel;
+ $v_rel_name =~ s!#.*$!!;
+ $v_rel_name =~ s!\?.*$!!;
+ $$ref_tagname{$v_rel_name} = $tag;
+ $$ref_attname{$v_rel_name} = $k;
+ }
+ if ($v_is_abs && ($v_rel ne $v)) { $$attr_hash{$k} = $v_rel; }
+ }
+ }
+ }; # $start_cb
+
+ $m_start_cb = $start_cb;
+ my $p = HTML::WalkerParser->new($parsed_fh);
+ while ($line = <$fh>) {
+ $n++;
+ $p->parse($line);
+ }
+ $fh->close();
+} # parse_file
+
+
+
+##########################################################
+### Do the walk...
+##########################################################
+sub walk_link($$;$$$$) {
+ die "$#_" unless ($#_ == 1 || $#_ == 5);
+ my $link_fold = shift;
+ my $link_file = shift;
+ my $parent_url = shift;
+ my $parent_lnum = shift;
+ my $parent_link = shift;
+ my $parent_line = shift;
+
+ my $link_addr = $link_fold . $link_file;
+ my $uq_link_addr;
+ my $is_file = ($link_addr !~ m!^https?://!i);
+ if ($is_file) {
+ $uq_link_addr = PathSubs::uniq_file($link_addr);
+ } else {
+ $uq_link_addr = PathSubs::resolve_dotdot($link_addr);
+ }
+ return if exists $m_CheckedLinks{$uq_link_addr};
+ return if exists $m_MissedLinks{$uq_link_addr};
+ $m_CheckedLinks{$uq_link_addr} = {};
+ my $link_is_container = ending_is_container($uq_link_addr);
+ if ($link_is_container) {
+ set_is_container($uq_link_addr, 1);
+ return if is_outside($uq_link_addr);
+ } else {
+ return if $m_bOnlyCont;
+ }
+ my $response;
+ my $contenttype;
+ my $bDoRewrite;
+ my $file_name;
+ if ($is_file) {
+ if (!-r $uq_link_addr) {
+ tell_bad_link("Can't read file ($uq_link_addr)",
+ $parent_url, $parent_lnum, $parent_link, $parent_line);
+ $m_MissedLinks{$uq_link_addr} = 1;
+ return;
+ }
+ $file_name = $uq_link_addr;
+ } else {
+ $file_name = mk_mirror_name($uq_link_addr);
+ if (!defined $ua) {
+ $ua = new LWP::UserAgent;
+ $ua->agent($m_ua_personality);
+ #$ua->delay(0.1);
+ }
+ if ($m_bMirror) {
+ $response = $ua->mirror($uq_link_addr, $file_name);
+ &$m_subMirrorAction($uq_link_addr, $file_name, $response);
+ } else {
+ my $request = new HTTP::Request('GET', $uq_link_addr);
+ $response = $ua->request($request, $file_name);
+ }
+ #dump_response($response); exit;
+ if ($response->code != 304) {
+ if (!$response->is_success) {
+ tell_bad_link($response->status_line . " ($uq_link_addr)",
+ $parent_url, $parent_lnum, $parent_link, $parent_line);
+ $m_MissedLinks{$uq_link_addr} = 1;
+ return;
+ }
+ $bDoRewrite = $m_bMirror;
+ $contenttype = get_contenttype($response);
+ $link_is_container = is_linked_contenttype($response);
+ }
+ if ($uq_link_addr ne $response->base) {
+ if ($m_bMirror) {
+ my $base_file = mk_mirror_name($response->base);
+ if (!File::Copy::copy($file_name, $base_file)) {
+ die "Can't copy($file_name, $base_file): $!\n";
+ }
+ if (my $lm = $response->last_modified) { utime $lm, $lm, $base_file; }
+ $file_name = $base_file;
+ }
+ $uq_link_addr = $response->base;
+ }
+ }
+ ### Test again, could be new info from net!
+ if ($link_is_container) {
+ set_is_container($uq_link_addr, 1);
+ return if is_outside($uq_link_addr);
+ } else {
+ return if $m_bOnlyCont;
+ return;
+ }
+ &$m_subReport("$uq_link_addr ...");
+
+ my %links;
+ my %anchs;
+ my %lines;
+ my %tagname;
+ my %attname;
+ my $parsed_fh;
+ my $parsed_file;
+ my $file_to_parse = $file_name;
+ if ($bDoRewrite) {
+ $parsed_file = $file_to_parse . "-p$$";
+ &$m_subReport(" <<GET");
+ $parsed_fh = new IO::File("> $parsed_file");
+ die "Can't create $parsed_file: $!\n" unless defined $parsed_fh;
+ print $parsed_fh "<!-- parsed version -->\n";
+ }
+ &$m_subReport("\n");
+ parse_file($file_to_parse, $parsed_fh, $uq_link_addr,
+ \@m_sLinkRoots,
+ \%links, \%anchs, \%lines, \%tagname, \%attname);
+ if (defined $parsed_fh) {
+ $parsed_fh->close();
+ if (-e $file_name) { unlink $file_name or die "Can't unlink $file_name: $!"; }
+ rename($parsed_file, $file_name) or die "Can't rename($parsed_file, $file_name): $!\n";
+ if (my $lm = $response->last_modified) { utime $lm, $lm, $file_name; }
+ }
+ ### Now we know...
+ if ($link_is_container) { return if is_outside($uq_link_addr); }
+
+ $m_CheckedLinks{$uq_link_addr}->{ANC} = \%anchs;
+ my $file_dir;
+ if ($is_file) {
+ $file_dir = $uq_link_addr;
+ $file_dir =~ s![^/]*$!!;
+ #chdir $file_dir;
+ }
+ my $container_folder = $uq_link_addr; $container_folder =~ s![^/]*$!!;
+ &$m_subAction($uq_link_addr, $file_name, $contenttype);
+ for my $link (sort keys %links) {
+ # Next line is for onclick lines in prepared docs
+ next if ($link eq "#");
+ my $lnum = $links{$link};
+ my $line = $lines{$lnum};
+ if ($link eq "") {
+ tell_bad_link("Empty link", $uq_link_addr, $lnum, $link, $line);
+ next;
+ }
+ if ($link =~ m!(.*)\?!) { $link = $1; }
+ my $anchor;
+ if ($link =~ m!(.*)#(.*)!) { $link = $1; $anchor = $2; }
+ if ($link eq "") {
+ if (!exists $anchs{$anchor}) {
+ tell_bad_link("Anchor not found ($anchor)", $uq_link_addr, $lnum, $link, $line);
+ }
+ next;
+ }
+ my $sub_fold;
+ my $sub_file;
+ my $uq_sublink;
+ if ($link =~ m!^https?://!i) {
+ $sub_fold = "";
+ $sub_file = $link;
+ $uq_sublink = $link;
+ } else {
+ $sub_file = $link;
+ if ($is_file) {
+ $sub_fold = $file_dir;
+ $uq_sublink = PathSubs::uniq_file($sub_fold . $sub_file);
+ } else {
+ $sub_fold = $container_folder;
+ $uq_sublink = $sub_fold . $sub_file;
+ }
+ }
+ next if (exists $m_CheckedLinks{$uq_sublink});
+ if (defined $anchor) {
+ $m_CheckedLinks{$uq_link_addr}->{EXTANC}->{$uq_sublink} =
+ { ANC=> $anchor, LINE=>$line, LNUM=>$lnum};
+ }
+ if ($m_bOnlyCont) {
+ die "link=$link\tattr=$tagname{$link}\n" unless exists $tagname{$link};
+ next unless maybecont($tagname{$link}, $attname{$link});
+ }
+ if (is_outside($uq_link_addr)) {
+ if (maybecont($tagname{$link}, $attname{$link}) ) {
+ next;
+ }
+ }
+ walk_link($sub_fold, $sub_file, $uq_link_addr, $lnum, $link, $line);
+ }
+} # walk_link
+
+
+
+
+############################################
+### Some more checks!
+############################################
+sub check_external_anchors() {
+ &$m_subReport("\nChecking external anchors...\n");
+ for my $f (sort keys %m_CheckedLinks) {
+ my $fnode = $m_CheckedLinks{$f};
+ if (exists ${$fnode}{"EXTANC"}) {
+ my $extanc_hash = ${$fnode}{"EXTANC"};
+ for my $fx (keys %$extanc_hash) {
+ next unless (exists $m_CheckedLinks{$fx});
+ my $ea_hash = ${$extanc_hash}{$fx};
+ my $ea = ${$ea_hash}{ANC};
+ my $fxnode = $m_CheckedLinks{$fx};
+ my $fx_anc_hash = ${$fxnode}{"ANC"};
+ if (!exists ${$fx_anc_hash}{$ea}) {
+ my $line = ${$ea_hash}{LINE};
+ my $lnum = ${$ea_hash}{LNUM};
+ &$m_subReport("From $f\n");
+ tell_bad_link("Ext anchor not found ($fx#$ea)",
+ $f, $lnum, "$fx#$ea", $line);
+ }
+ }
+ }
+ }
+} # check_external_anchors
+
+
+
+#############################
+### Reporting
+#############################
+sub report_errors($$) {
+ my $bSum = shift;
+ my $bDet = shift;
+ my $errors_reported;
+ my $errors_found;
+ for my $f (sort keys %m_CheckedLinks) {
+ my $fnode = $m_CheckedLinks{$f};
+ if (exists ${$fnode}{ERR}) {
+ $errors_found = 1;
+ last unless $bSum;
+ if (!defined $errors_reported) {
+ $errors_reported = 1;
+ &$m_subReport("\n\n*********** Summary ERRORS and WARNINGS **********\n");
+ }
+ &$m_subReport("$f\n");
+ my $err_hash = ${$fnode}{ERR};
+ for my $e (sort keys %$err_hash) {
+ my $refE = ${$err_hash}{$e};
+ &$m_subReport("\t" . ${$refE}[0] . "\n");
+ }
+ }
+ }
+ undef $errors_reported;
+ if ($bDet) {
+ for my $f (sort keys %m_CheckedLinks) {
+ my $fnode = $m_CheckedLinks{$f};
+ if (exists ${$fnode}{ERR}) {
+ if (!defined $errors_reported) {
+ $errors_reported = 1;
+ &$m_subReport("\n\n*********** Detailed ERRORS and WARNINGS **********\n");
+ }
+ &$m_subReport("$f\n");
+ my $err_hash = ${$fnode}{ERR};
+ for my $e (sort keys %$err_hash) {
+ my $refE = ${$err_hash}{$e};
+ &$m_subReport("\tat line $e: " . ${$refE}[1] . "\n");
+ }
+ }
+ }
+ }
+ if ($errors_found) {
+ die "\n*** There where errors ***\n";
+ } else {
+ &$m_subReport("No errors found\n");
+ }
+} # report_errors
+
+sub dump_response($) {
+ my $response = shift;
+ &$m_subReport( $response->code . " " . $response->message . "\n");
+ &$m_subReport( "****************************************\n");
+ #&$m_subReport( $response->request . "\n");
+ #&$m_subReport( "****************************************\n");
+ #&$m_subReport( $response->previous . "\n");
+ #&$m_subReport( "****************************************\n");
+ &$m_subReport( " i=" . $response->is_info .
+ ", s=" . $response->is_success .
+ ", r=" . $response->is_redirect .
+ ", e=" . $response->is_error . "\n");
+ &$m_subReport( "****************************************\n");
+ &$m_subReport( "content: " . $response->content . "\n");
+ &$m_subReport( "****************************************\n");
+ &$m_subReport( "base: " . $response->base . "\n");
+ &$m_subReport( "****************************************\n");
+ &$m_subReport( $response->as_string);
+ &$m_subReport( "****************************************\n");
+ &$m_subReport( $response->current_age . "\n");
+ &$m_subReport( "****************************************\n");
+ my @rh = $response->header("Content-Type");
+ for my $r (@rh) { &$m_subReport( "ct: $r\n"); }
+ &$m_subReport( "****************************************\n");
+} # dump_response
+
+
+#############################
+### Parameters
+#############################
+sub sMirrorRoot() {
+ my $val = shift;
+ $m_sMirrorRoot = PathSubs::get_temp_path() . "LinkWalker/" unless defined $m_sMirrorRoot;
+ my $old = $m_sMirrorRoot;
+ $m_sMirrorRoot = PathSubs::uniq_dir($val) if defined $val;
+ return $old;
+}
+sub bMirror(;$) {
+ my $val = shift;
+ my $old = $m_bMirror;
+ $m_bMirror = $val if defined $val;
+ $old;
+}
+
+sub subReporter(;$) {
+ my $val = shift;
+ my $old = $m_subReport;
+ $m_subReport = $val if defined $val;
+ $old
+}
+sub subAction(;$) {
+ my $val = shift;
+ my $old = $m_subAction;
+ $m_subAction = $val if defined $val;
+ $old
+}
+sub bOnlyCont(;$) {
+ my $val = shift;
+ my $old = $m_bOnlyCont;
+ $m_bOnlyCont = $val if defined $val;
+ $old
+}
+sub ua_personality(;$) {
+ my $val = shift;
+ my $old = $m_ua_personality;
+ $m_ua_personality = $val if defined $val;
+ $old
+}
+
+sub clear_roots() { @m_sLinkRoots = (); }
+sub get_roots() { return \@m_sLinkRoots; }
+sub add_root($) { push @m_sLinkRoots, shift; }
+sub add_files_root($) {
+ my $file = shift;
+ my $default_root;
+ my ($host) = ($file =~ m!(^https?://[^/]*)!i);
+ if (defined $host) {
+ $default_root = $file;
+ } else {
+ die "Can't find $file\n" unless -e $file;
+ $default_root = PathSubs::uniq_file($file);
+ }
+ $default_root =~ s![^/]*$!!;
+ add_root($default_root);
+}
+
+### Default actions
+sub default_sub {}
+$m_subReport = \&default_sub;
+$m_subAction = \&default_sub;
+$m_subMirrorAction = \&default_sub;
+
+1;
diff --git a/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/ParserTagEnd.pm b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/ParserTagEnd.pm
new file mode 100644
index 0000000..32407d6
--- /dev/null
+++ b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/ParserTagEnd.pm
@@ -0,0 +1,448 @@
+package HTML::ParserTagEnd;
+
+# Author address: <gisle@aas.no>
+### Modified for <tag />, Lennart
+
+use strict;
+use HTML::Entities ();
+
+use vars qw($VERSION);
+$VERSION = "2.23"; # $Date: 1999/06/09 10:27:16 $
+
+
+sub new
+{
+ my $class = shift;
+ my $self = bless { '_buf' => '',
+ '_strict_comment' => 0,
+ }, $class;
+ $self;
+}
+
+
+# A little note about the observed Netscape behaviour:
+#
+# It parse <xmp> in the depreceated 'literal' mode, i.e. no tags are
+# recognized until a </xmp> is found.
+#
+# <listing> is parsed like <pre>, i.e. tags are recognized. <listing>
+# are presentend in smaller font than <pre>
+#
+# Netscape does not parse this comment correctly (it terminates the comment
+# too early):
+#
+# <! -- comment -- --> more comment -->
+#
+# Netscape ignores '<!--' and '-->' within the <SCRIPT> and <STYLE> tag.
+# This is used as a trick to make non-script-aware browsers ignore
+# the scripts.
+
+
+sub parse
+{
+ my $self = shift;
+ my $buf = \ $self->{'_buf'};
+ unless (defined $_[0]) {
+ # signals EOF (assume rest is plain text)
+ $self->text($$buf) if length $$buf;
+ $$buf = '';
+ return $self;
+ }
+ $$buf .= $_[0];
+ my $netscape_comment = !$self->{'_strict_comment'};
+
+ # Parse html text in $$buf. The strategy is to remove complete
+ # tokens from the beginning of $$buf until we can't deside whether
+ # it is a token or not, or the $$buf is empty.
+
+ TOKEN:
+ while (1) {
+
+ # First we try to pull off any plain text (anything before a "<" char)
+ if ($$buf =~ s|^([^<]+)||) {
+ if (length $$buf) {
+ $self->text($1);
+ } else {
+ my $text = $1;
+ # At the end of the buffer, we should not parse white space
+ # but leave it for parsing on the next round.
+ if ($text =~ s|(\s+)$||) {
+ $$buf = $1;
+ # Same treatment for chopped up entites and words.
+ # We must wait until we have it all.
+ } elsif ($text =~ s|(\s*\S+)$||) {
+ $$buf = $1;
+ };
+ $self->text($text) if length $text;
+ last TOKEN;
+ }
+
+ # Netscapes buggy comments are easy to handle
+ } elsif ($netscape_comment && $$buf =~ m|^<!\s*--|) {
+ if ($$buf =~ s|^<!\s*--(.*?)--\s*>||s) {
+ $self->comment($1);
+ } else {
+ last TOKEN; # must wait until we see the end of it
+ }
+
+ # Then, markup declarations (usually either <!DOCTYPE...> or a comment)
+ } elsif ($$buf =~ s|^(<!)||) {
+ my $eaten = $1;
+ my $text = '';
+ my @com = (); # keeps comments until we have seen the end
+ # Eat text and beginning of comment
+ while ($$buf =~ s|^(([^>]*?)--)||) {
+ $eaten .= $1;
+ $text .= $2;
+ # Look for end of comment
+ if ($$buf =~ s|^((.*?)--)||s) {
+ $eaten .= $1;
+ push(@com, $2);
+ } else {
+ # Need more data to get all comment text.
+ $$buf = $eaten . $$buf;
+ last TOKEN;
+ }
+ }
+ # Can we finish the tag
+ if ($$buf =~ s|^([^>]*)>||) {
+ $text .= $1;
+ $self->declaration($text) if $text =~ /\S/;
+ # then tell about all the comments we found
+ for (@com) { $self->comment($_); }
+ } else {
+ $$buf = $eaten . $$buf; # must start with it all next time
+ last TOKEN;
+ }
+
+ # Should we look for 'processing instructions' <? ...> ??
+ #} elsif ($$buf =~ s|<\?||) {
+ # ...
+
+ # Then, look for a end tag
+ } elsif ($$buf =~ s|^</||) {
+ # end tag
+ if ($$buf =~ s|^([a-zA-Z][a-zA-Z0-9\.\-]*)(\s*>)||) {
+ $self->end(lc($1), "</$1$2");
+ } elsif ($$buf =~ m|^[a-zA-Z]*[a-zA-Z0-9\.\-]*\s*$|) {
+ $$buf = "</" . $$buf; # need more data to be sure
+ last TOKEN;
+ } else {
+ # it is plain text after all
+ $self->text("</");
+ }
+
+ # Then, finally we look for a start tag
+ } elsif ($$buf =~ s|^(<([a-zA-Z]+)>)||) {
+ # special case plain start tags for slight speed-up (2.5%)
+ ### mod Lennart
+ $self->start(lc($2), {}, 0, [], $1);
+
+ } elsif ($$buf =~ s|^<||) {
+ # start tag
+ my $eaten = '<';
+
+ # This first thing we must find is a tag name. RFC1866 says:
+ # A name consists of a letter followed by letters,
+ # digits, periods, or hyphens. The length of a name is
+ # limited to 72 characters by the `NAMELEN' parameter in
+ # the SGML declaration for HTML, 9.5, "SGML Declaration
+ # for HTML". In a start-tag, the element name must
+ # immediately follow the tag open delimiter `<'.
+ if ($$buf =~ s|^(([a-zA-Z][a-zA-Z0-9\.\-]*)\s*)||) {
+ $eaten .= $1;
+ my $tag = lc $2;
+ my %attr;
+ my @attrseq;
+
+ # Then we would like to find some attributes
+ #
+ # Arrgh!! Since stupid Netscape violates RCF1866 by
+ # using "_" in attribute names (like "ADD_DATE") of
+ # their bookmarks.html, we allow this too.
+ while ($$buf =~ s|^(([a-zA-Z][a-zA-Z0-9\.\-_]*)\s*)||) {
+ $eaten .= $1;
+ my $attr = lc $2;
+ my $val;
+ # The attribute might take an optional value (first we
+ # check for an unquoted value)
+ if ($$buf =~ s|(^=\s*([^\"\'>\s][^>\s]*)\s*)||) {
+ $eaten .= $1;
+ $val = $2;
+ HTML::Entities::decode($val);
+ # or quoted by " or '
+ } elsif ($$buf =~ s|(^=\s*([\"\'])(.*?)\2\s*)||s) {
+ $eaten .= $1;
+ $val = $3;
+ HTML::Entities::decode($val);
+ # truncated just after the '=' or inside the attribute
+ } elsif ($$buf =~ m|^(=\s*)$| or
+ $$buf =~ m|^(=\s*[\"\'].*)|s) {
+ $$buf = "$eaten$1";
+ last TOKEN;
+ } else {
+ # assume attribute with implicit value
+ $val = $attr;
+ }
+ $attr{$attr} = $val;
+ push(@attrseq, $attr);
+ }
+
+ # At the end there should be a closing ">"
+### Modified for <tag />, Lennart
+ if ($$buf =~ s|^/>||) {
+ $self->start($tag, \%attr, 1, \@attrseq, "$eaten>");
+ } elsif ($$buf =~ s|^>||) {
+ #if ($$buf =~ s|^>||) {
+ $self->start($tag, \%attr, 0, \@attrseq, "$eaten>");
+ } elsif (length $$buf) {
+ # Not a conforming start tag, regard it as normal text
+ $self->text($eaten);
+ } else {
+ $$buf = $eaten; # need more data to know
+ last TOKEN;
+ }
+
+ } elsif (length $$buf) {
+ $self->text($eaten);
+ } else {
+ $$buf = $eaten . $$buf; # need more data to parse
+ last TOKEN;
+ }
+
+ } else {
+ #die if length($$buf); # This should never happen
+ last TOKEN; # The buffer should be empty now
+ }
+ }
+
+ $self;
+}
+
+
+sub eof
+{
+ shift->parse(undef);
+}
+
+
+sub parse_file
+{
+ my($self, $file) = @_;
+ no strict 'refs'; # so that a symbol ref as $file works
+ local(*F);
+ unless (ref($file) || $file =~ /^\*[\w:]+$/) {
+ # Assume $file is a filename
+ open(F, $file) || die "Can't open $file: $!";
+ $file = \*F;
+ }
+ my $chunk = '';
+ while(read($file, $chunk, 512)) {
+ $self->parse($chunk);
+ }
+ close($file);
+ $self->eof;
+}
+
+
+sub strict_comment
+{
+ my $self = shift;
+ my $old = $self->{'_strict_comment'};
+ $self->{'_strict_comment'} = shift if @_;
+ return $old;
+}
+
+
+sub netscape_buggy_comment # legacy
+{
+ my $self = shift;
+ my $old = !$self->strict_comment;
+ $self->strict_comment(!shift) if @_;
+ return $old;
+}
+
+
+sub text
+{
+ # my($self, $text) = @_;
+}
+
+sub declaration
+{
+ # my($self, $decl) = @_;
+}
+
+sub comment
+{
+ # my($self, $comment) = @_;
+}
+
+sub start
+{
+die "hie";
+ # my($self, $tag, $attr, $attrseq, $origtext) = @_;
+ # $attr is reference to a HASH, $attrseq is reference to an ARRAY
+}
+
+sub end
+{
+ # my($self, $tag, $origtext) = @_;
+}
+
+1;
+
+
+__END__
+
+
+=head1 NAME
+
+HTML::Parser - SGML parser class
+
+=head1 SYNOPSIS
+
+ require HTML::Parser;
+ $p = HTML::Parser->new; # should really a be subclass
+ $p->parse($chunk1);
+ $p->parse($chunk2);
+ #...
+ $p->eof; # signal end of document
+
+ # Parse directly from file
+ $p->parse_file("foo.html");
+ # or
+ open(F, "foo.html") || die;
+ $p->parse_file(\*F);
+
+=head1 DESCRIPTION
+
+The C<HTML::Parser> will tokenize an HTML document when the parse()
+method is called by invoking various callback methods. The document to
+be parsed can be supplied in arbitrary chunks.
+
+The external interface the an I<HTML::Parser> is:
+
+=over 4
+
+=item $p = HTML::Parser->new
+
+The object constructor takes no arguments.
+
+=item $p->parse( $string );
+
+Parse the $string as an HTML document. Can be called multiple times.
+The return value is a reference to the parser object.
+
+=item $p->eof
+
+Signals end of document. Call eof() to flush any remaining buffered
+text. The return value is a reference to the parser object.
+
+=item $p->parse_file( $file );
+
+This method can be called to parse text from a file. The argument can
+be a filename or an already opened file handle. The return value from
+parse_file() is a reference to the parser object.
+
+=item $p->strict_comment( [$bool] )
+
+By default we parse comments similar to how the popular browsers (like
+Netscape and MSIE) do it. This means that comments will always be
+terminated by the first occurrence of "-->". This is not correct
+according to the "official" HTML standards. The official behaviour
+can be enabled by calling the strict_comment() method with a TRUE
+argument.
+
+The return value from strict_comment() is the old attribute value.
+
+=back
+
+
+
+In order to make the parser do anything interesting, you must make a
+subclass where you override one or more of the following methods as
+appropriate:
+
+=over 4
+
+=item $self->declaration($decl)
+
+This method is called when a I<markup declaration> has been
+recognized. For typical HTML documents, the only declaration you are
+likely to find is <!DOCTYPE ...>. The initial "<!" and ending ">" is
+not part of the string passed as argument. Comments are removed and
+entities will B<not> be expanded.
+
+=item $self->start($tag, $attr, $attrseq, $origtext)
+
+This method is called when a complete start tag has been recognized.
+The first argument is the tag name (in lower case) and the second
+argument is a reference to a hash that contain all attributes found
+within the start tag. The attribute keys are converted to lower case.
+Entities found in the attribute values are already expanded. The
+third argument is a reference to an array with the lower case
+attribute keys in the original order. The fourth argument is the
+original HTML text.
+
+
+=item $self->end($tag, $origtext)
+
+This method is called when an end tag has been recognized. The
+first argument is the lower case tag name, the second the original
+HTML text of the tag.
+
+=item $self->text($text)
+
+This method is called when plain text in the document is recognized.
+The text is passed on unmodified and might contain multiple lines.
+Note that for efficiency reasons entities in the text are B<not>
+expanded. You should call HTML::Entities::decode($text) before you
+process the text any further.
+
+A sequence of text in the HTML document can be broken between several
+invocations of $self->text. The parser will make sure that it does
+not break a word or a sequence of spaces between two invocations of
+$self->text().
+
+=item $self->comment($comment)
+
+This method is called as comments are recognized. The leading and
+trailing "--" sequences have been stripped off the comment text.
+
+=back
+
+The default implementation of these methods do nothing, i.e., the
+tokens are just ignored.
+
+There is really nothing in the basic parser that is HTML specific, so
+it is likely that the parser can parse other kinds of SGML documents.
+SGML has many obscure features (not implemented by this module) that
+prevent us from renaming this module as C<SGML::Parser>.
+
+=head1 EFFICIENCY
+
+The parser is fairly inefficient if the chunks passed to $p->parse()
+are too big. The reason is probably that perl ends up with a lot of
+character copying when tokens are removed from the beginning of the
+strings. A chunk size of about 256-512 bytes was optimal in a test I
+made with some real world HTML documents. (The parser was about 3
+times slower with a chunk size of 20K).
+
+=head1 SEE ALSO
+
+L<HTML::Entities>, L<HTML::TokeParser>, L<HTML::Filter>,
+L<HTML::HeadParser>, L<HTML::LinkExtor>
+
+L<HTML::TreeBuilder> (part of the I<HTML-Tree> distribution)
+
+=head1 COPYRIGHT
+
+Copyright 1996-1999 Gisle Aas. All rights reserved.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
+
+
diff --git a/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/datadir.txt b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/datadir.txt
new file mode 100644
index 0000000..1ba751d
--- /dev/null
+++ b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/datadir.txt
@@ -0,0 +1 @@
+C:/TEMP/i2data
diff --git a/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/PathSubs.pm b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/PathSubs.pm
new file mode 100644
index 0000000..e95b8d5
--- /dev/null
+++ b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/PathSubs.pm
@@ -0,0 +1,207 @@
+# Copyright 2006 Lennart Borgman, http://OurComments.org/. All rights
+# reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+
+package PathSubs;
+
+#####################################################
+### This package contains general path handling
+### routines and some win32 specific dito.
+### The latter should ev be moved to a new module!
+#####################################################
+use strict;
+
+use File::Spec;
+
+### Absolute path names
+
+sub is_abs_path ($) {
+ my $path = shift;
+ return 0 if $path eq "";
+ return 1 if File::Spec->file_name_is_absolute($path);
+ #return 1 if substr($path, 1, 1) eq ":"; # MSWin32
+ #return 1 if substr($path, 0, 1) eq "/";
+ return 1 if $path =~ /^https?:/i;
+ return 1 if $path =~ /^file:/i;
+ return 1 if $path =~ /^javascript:/i;
+ return 1 if $path =~ /^mailto:/i;
+}
+sub is_abs_netpath($) {
+ my $path = shift;
+ return 1 if $path =~ /^https?:/i;
+ # New
+ return 1 if $path =~ /^ftp:/i;
+ return 1 if $path =~ /^mailto:/i;
+}
+
+
+sub uniq_file($) {
+ my $fname = shift;
+ $fname =~ s!^\s+|\s+$!!g;
+ return "" if ($fname eq "");
+ $fname = File::Spec->rel2abs($fname);
+ if (!File::Spec->file_name_is_absolute($fname)) {
+ die "File name is not absolute: $fname";
+ }
+ #print STDERR "uniq_file($fname)\n";
+ $fname =~ tr!\\!/!;
+ if (-e $fname) {
+ #print STDERR "exists $fname\n";
+ ### There is an error in 522, compensate for this!
+ #die substr($fname, -1);
+ if (substr($fname, -1) eq "/") { chop $fname; }
+ #print STDERR "exists $fname\n";
+ ### Translate ..
+ if (substr($fname, 1, 1) eq ":") {
+ my $ffname = Win32::GetFullPathName($fname);
+ ### Get case
+ my $lfname = Win32::GetLongPathName($ffname);
+ #print STDERR "lexists $lfname\n";
+ $fname = $lfname if ($lfname ne "");
+ }
+ } else {
+ #print STDERR "NOT exists $fname\n";
+ if (substr($fname, -1) eq "/") { chop $fname; }
+ my $head = "";
+ if (substr($fname, 0, 2) eq "//") {
+ $head = "//";
+ $fname = substr($fname, 2);
+ }
+ my @fname = split("/", $fname);
+ my $tail = pop @fname;
+ $fname = uniq_dir($head . join("/", @fname)) . $tail;
+ }
+ if (substr($fname, 1, 1) eq ":") {
+ $fname = uc(substr($fname, 0, 1)) . substr($fname, 1);
+ #print STDERR "fname $fname\n";
+ }
+ $fname =~ tr!\\!/!;
+ #print STDERR "fname ($fname)\n";
+ return $fname;
+}
+sub uniq_dir($) {
+ my $dir = shift;
+ my $uq_dir = uniq_file($dir);
+ if (substr($uq_dir, -1) ne "/") { $uq_dir .= "/"; }
+ return $uq_dir;
+}
+
+
+
+### Relative paths
+sub _get_link_root($) {
+ my $lnk = shift;
+ if ($lnk =~ m!^(/|ftp://[^/]*|https?://[^/]*|[a-z]:/)!i) {
+ return $1;
+ } else {
+ return "";
+ }
+}
+
+sub resolve_dotdot($) {
+ my $orig_url = shift;
+ my $root = _get_link_root($orig_url);
+ return $orig_url if length($root) == length($orig_url);
+ my $url = substr($orig_url, length($root));
+ if (substr($root, -1) eq "/") {
+ chop $root;
+ $url = "/$url";
+ }
+ #die "$root\n$url";
+ my $iPosSearch = 2;
+ #print "url=$url\n";
+ while ((my $iPos = index($url, "/../", $iPosSearch)) > -1) {
+ my $sLeft = substr($url, 0, $iPos);
+ if (substr($sLeft, -2) eq "..") {
+ $iPosSearch += 3;
+ next;
+ }
+ my $sRight = substr($url, $iPos+3);
+ #print "url=$url\n";
+ #print "iPos=$iPos\n";
+ #print "sLeft=$sLeft\n";
+ $sLeft =~ s!/[^/]*$!!;
+ #print "sLeft=$sLeft\n";
+ #print "sRight=$sRight\n";
+ $url = $sLeft . $sRight;
+ #print "\t***url=$url\n";
+ #print "url=$url\n";
+ }
+ if (index($url, "../") > -1) {
+ return $orig_url;
+ }
+ return $root . $url;
+}
+
+sub mk_relative_link($$;$) {
+ my $from = shift;
+ my $to = shift;
+ my $norm = shift;
+ if ($norm) {
+ $from = uniq_file($from);
+ $to = uniq_file($to);
+ }
+ if (-e $from) {
+ $from = uniq_file($from);
+ } else {
+ $from = resolve_dotdot($from);
+ }
+ if (-e $to) {
+ $to = uniq_file($to);
+ } else {
+ $to = resolve_dotdot($to);
+ }
+ my $root_from = _get_link_root($from);
+ my $root_to = _get_link_root($to );
+ if ($root_from ne $root_to) {
+ return $to;
+ }
+ my @from = split "/", $from;
+ my @to = split "/", $to;
+ while (@to) {
+ last if ($to[0] ne $from[0]);
+ shift @to;
+ shift @from;
+ }
+ if (@to == 1 && @from == 1) {
+ if (length($to[0]) > length($from[0])) {
+ if (substr($to[0], 0, length($from[0])+1) eq ($from[0] . "#")) {
+ return substr($to[0], length($from[0]));
+ }
+ }
+ }
+ my $rl;
+ for (1..$#from) { $rl .= "../"; }
+ $rl .= join("/", @to);
+
+ return $rl;
+}
+
+
+
+sub mk_absolute_link($$) {
+ my $from = shift;
+ my $rel_to = shift;
+ my $abs = $from;
+ $abs =~ s![^/]*$!!;
+ $abs .= $rel_to;
+ if (!is_abs_netpath($abs)) { $abs = uniq_file($abs); }
+ $abs;
+}
+
+
+1;
diff --git a/emacs.d/nxhtml/nxhtml/html-chklnk/link_checker.pl b/emacs.d/nxhtml/nxhtml/html-chklnk/link_checker.pl
new file mode 100644
index 0000000..0925b1c
--- /dev/null
+++ b/emacs.d/nxhtml/nxhtml/html-chklnk/link_checker.pl
@@ -0,0 +1,328 @@
+#! perl
+
+# Copyright 2006, 2007 Lennart Borgman, http://OurComments.org/. All
+# rights reserved.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+use strict;
+use warnings;
+
+use IO::File;
+use File::Spec;
+use File::Find;
+
+sub check_file($);
+
+#############################
+### Collecting info
+#############################
+my $m_site_dir; # Site root directory (every file should be in this)
+my %m_CheckedFiles;
+my %m_FilesToCheck;
+my %m_MissedFiles;
+my $m_errors_found;
+
+sub tell_bad_link($$$$$) {
+ my $what = shift;
+ my $file = shift;
+ my $lnum = shift;
+ my $link = shift;
+ my $line = shift;
+ $line =~ s/^\s+|\s+$//g;
+ $m_CheckedFiles{$file}->{"ERR"}->{$lnum} = "$what\n Link: \"$link\"\n";
+ #$line";
+}
+
+#############################
+### Helpers
+#############################
+sub add_file_to_check($) {
+ $m_FilesToCheck{File::Spec->canonpath(shift)} = 1;
+}
+# sub full_uq_file($) {
+# my $file = shift;
+# my $full_file = $file;
+# if (! File::Spec->file_name_is_absolute($full_file)) {
+# #$full_file = Win32::GetFullPathName($file);
+# $full_file = File::Spec->rel2abs($full_file, $m_site_dir);
+# }
+# if (($^O eq "MSWin32") || ($^O eq "cygwin")) {
+# $full_file =~ tr!A-Z!a-z!;
+# }
+# #print "ull_uq_file: full_file=$file\n";
+# return $full_file;
+# }
+
+#############################
+### Checks
+#############################
+sub check_next_file() {
+ if (scalar(keys %m_FilesToCheck) > 0) {
+ my @FilesToCheck = sort keys %m_FilesToCheck;
+ my $next_file = $FilesToCheck[0];
+ delete $m_FilesToCheck{$next_file};
+ check_file($next_file);
+ }
+}
+sub not_a_local_file($) {
+ my $url = shift;
+ (
+ $url =~ m!^javascript:!
+ ||
+ $url =~ m!^mailto:!
+ ||
+ $url =~ m!^[a-z]+://!
+ );
+}
+
+sub check_file($) {
+ my $fname = shift;
+ if (! File::Spec->file_name_is_absolute($fname)) {
+ die "check_file: File is not abs: $fname";
+ }
+ my $only_name = (File::Spec->splitpath($fname))[2];
+ print "Checking $fname ... ";
+ sleep 0.5;
+ $m_CheckedFiles{$fname} = {};
+ my %links;
+ my %anchs;
+ my %lines;
+ my $fh = new IO::File($fname);
+ die "Can't read $fname: $!\n" unless defined $fh;
+ my $whole;
+ my $n;
+ my $found_errors = 0;
+ while (my $line = <$fh>) {
+ $n++;
+ chomp $line;
+ $whole = $line;
+ while ($whole =~ m!(?:\s|^)id="(.*?)"!g) {
+ $anchs{$1} = $n;
+ $lines{$n} = $line;
+ }
+ while ($whole =~ m!(?:\s|^)name="(.*?)"!g) {
+ $anchs{$1} = $n;
+ $lines{$n} = $line;
+ }
+ while ($whole =~ m!(?:\s|^)href="(.*?)"!g) {
+ my $l = $1;
+ next if not_a_local_file($l);
+ if ($l =~ m!^#!) {
+ $l = $only_name . $l;
+ }
+ $links{$l} = $n;
+ $lines{$n} = $line;
+ }
+ while ($whole =~ m!(?:\s|^)src="(.*?)"!g) {
+ my $l = $1; $l =~ tr!A-Z!a-z!;
+ $links{$l} = $n;
+ $lines{$n} = $line;
+ }
+ }
+ $fh->close();
+ $m_CheckedFiles{$fname}->{ANC} = \%anchs;
+ my ($fv, $fd, $ff) = File::Spec->splitpath($fname);
+ my $fdir = File::Spec->catpath($fv, $fd, "");
+ for my $link (sort keys %links) {
+ # Next line is for onclick lines
+ next if ($link eq "#");
+ my $lnum = $links{$link};
+ my $line = $lines{$lnum};
+ if ($link eq "") {
+ tell_bad_link("empty link", $fname, $lnum, $link, $line);
+ $found_errors = 1;
+ next;
+ }
+ if ($link =~ m!(.*)\?!) { $link = $1; }
+ my $anchor;
+ if ($link =~ m!(.*)#(.*)!) { $link = $1; $anchor = $2; }
+ if ($link eq "") {
+ if (!exists $anchs{$anchor}) {
+ tell_bad_link("bad internal anchor ref ($anchor)", $fname, $lnum, $link, $line);
+ $found_errors = 1;
+ }
+ next;
+ }
+ $link =~ m!([^\.]*)$!;
+ my $link_file_type = $1;
+ my $subfile = $link;
+ if (!File::Spec->file_name_is_absolute($subfile)) {
+ $subfile = File::Spec->catpath($fv, $fd, $link);
+ }
+ $subfile = File::Spec->canonpath($subfile);
+ die "Contained .." if $subfile =~ m/\.\./;
+ next if (exists $m_MissedFiles{$subfile});
+ if (! -r $subfile) {
+ tell_bad_link("Can't read linked file: $!", $fname, $lnum, $link, $line);
+ $found_errors = 1;
+ $m_MissedFiles{$subfile} = 1;
+ next;
+ }
+ next unless $link_file_type =~ m!^html?$!i;
+ if (defined $anchor) {
+ $m_CheckedFiles{$fname}->{EXTANC}->{$subfile} =
+ { ANC=> $anchor, LINE=>$line, LNUM=>$lnum};
+ }
+ next if (exists $m_CheckedFiles{$subfile});
+ #check_file($subfile);
+ my $rel_root = File::Spec->abs2rel($subfile, $m_site_dir);
+ if (substr($rel_root, 0, 2) eq "..") {
+ tell_bad_link("Reference to file outside site", $fname, $lnum, $link, $line);
+ $found_errors = 1;
+ } else {
+ #$m_FilesToCheck{$subfile} = 1;
+ add_file_to_check($subfile);
+ }
+ }
+ if ($found_errors) {
+ print "Errors found\n";
+ } else {
+ print "Ok\n";
+ }
+ sleep 0.5;
+ check_next_file();
+} # check_file
+
+
+sub check_external_anchors() {
+ for my $f (sort keys %m_CheckedFiles) {
+ my $fnode = $m_CheckedFiles{$f};
+ if (exists ${$fnode}{"EXTANC"}) {
+ my $extanc_hash = ${$fnode}{"EXTANC"};
+ for my $fx (keys %$extanc_hash) {
+ next unless (exists $m_CheckedFiles{$fx});
+ my $ea_hash = ${$extanc_hash}{$fx};
+ my $ea = ${$ea_hash}{ANC};
+ my $fxnode = $m_CheckedFiles{$fx};
+ my $fx_anc_hash = ${$fxnode}{"ANC"};
+ if (!exists ${$fx_anc_hash}{$ea}) {
+ my $line = ${$ea_hash}{LINE};
+ my $lnum = ${$ea_hash}{LNUM};
+ tell_bad_link("Hash not found", $f, $lnum, "$fx#$ea", $line);
+ }
+ }
+ }
+ }
+} # check_external_anchors
+
+
+
+#############################
+### Reporting
+#############################
+sub report_errors() {
+ for my $f (sort keys %m_CheckedFiles) {
+ my $fnode = $m_CheckedFiles{$f};
+ if (exists ${$fnode}{"ERR"}) {
+ if (!defined $m_errors_found) {
+ $m_errors_found = 1;
+ print "\n\n*********** Error details: **********\n";
+ sleep 0.5;
+ }
+ #print "\n$f";
+ my $err_hash = ${$fnode}{"ERR"};
+ for my $e (sort keys %$err_hash) {
+ print "\n$f";
+ print " at line $e:\n " . ${$err_hash}{$e} . "\n";
+ sleep 0.5;
+ }
+ }
+ }
+ if ($m_errors_found) {
+ die "\n*** There where errors ***\n";
+ } else {
+ print "Everything that was checked is ok\n";
+ }
+} # report_errors
+
+#############################
+### Help
+#############################
+sub usage() {
+ die "Usage: $0 --site=SITE-DIR --start=START-FILE\n";
+}
+
+#############################
+### Parameters
+#############################
+#my $m_start_file; # File to start checking in
+sub get_params() {
+ usage() unless $#ARGV > -1;
+ for (my $i = 0; $i <= $#ARGV; $i++) {
+ my ($k, $v) = ($ARGV[$i] =~ m!-?-?(.*?)=(.*)!);
+ if ($k eq "site") {
+ $m_site_dir = $v;
+ } elsif( $k eq "start") {
+ #$m_FilesToCheck{$v} = 1;
+ add_file_to_check($v);
+ } else {
+ print STDERR "Unknown parameter: $ARGV[$i]\n";
+ usage();
+ }
+ }
+ foreach my $key (keys %m_FilesToCheck) {
+ die "Can't find $key\n" unless -e $key;
+ }
+ if (! $m_site_dir) {
+ print STDERR "No site directory given\n";
+ usage();
+ }
+ die "Can't find $m_site_dir\n" unless -d $m_site_dir;
+ if ((scalar keys %m_FilesToCheck) == 0) {
+ my $add_files =
+ sub {
+ return unless m/.html?$/i;
+ return if -d $_;
+ #$m_FilesToCheck{$File::Find::name} = 1;
+ add_file_to_check($File::Find::name);
+ };
+ File::Find::find($add_files, $m_site_dir);
+ }
+}
+
+sub check_canonpath() {
+ my $testpath = "/test/../some.txt";
+ if ($testpath eq File::Spec->canonpath($testpath)) {
+ my $errmsg = <<_BADCANON_
+
+** Fatal Error:
+
+ File::Spec->canonpath does not clean up path.
+
+ If you are doing this from Emacs with html-chklnk-check-site-links
+ it may be because you are using Cygwin as your shell. You can cure
+ this in the following ways:
+
+ 1) Use w32shell.el - this will temporary switch to "cmd" as shell.
+ 2) Use the default shell on w32.
+
+_BADCANON_
+;
+
+ die $errmsg;
+ }
+}
+
+#############################
+### Main
+#############################
+
+check_canonpath();
+
+$| = 1; # flush or blush!
+
+print "\n";
+get_params();
+
+check_next_file();
+check_external_anchors();
+report_errors();