5 files changed, 1758 insertions, 0 deletions
diff --git a/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/LinkWalker.pm b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/LinkWalker.pm
new file mode 100644
index 0000000..14b0ccb
--- /dev/null
+++ b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/LinkWalker.pm
@@ -0,0 +1,774 @@
+### File:	LinkWalker.pm
+### Author:	Lennart Borgman
+###	All rights reserved
+
+##########################################################
+### UserAgent module
+##########################################################
+package LWP::WalkerUA;
+require LWP::UserAgent;
+@ISA = qw(LWP::UserAgent);
+
+### Mirror to another file (why???)
+sub mirror
+{
+    my($self, $url, $file, $mirr_tmp) = @_;
+    die "no mirr_tmp" unless defined $mirr_tmp;
+
+    LWP::Debug::trace('()');
+    my $request = new HTTP::Request('GET', $url);
+
+    if (-e $file) {
+	my($mtime) = (stat($file))[9];
+	if($mtime) {
+	    $request->header('If-Modified-Since' =>
+			     HTTP::Date::time2str($mtime));
+	}
+    }
+    my $tmpfile = "$file-$$";
+
+    my $response = $self->request($request, $tmpfile);
+    if ($response->is_success) {
+
+	my $file_length = (stat($tmpfile))[7];
+	my($content_length) = $response->header('Content-length');
+
+	if (defined $content_length and $file_length < $content_length) {
+	    unlink($tmpfile);
+	    die "Transfer truncated: " .
+		"only $file_length out of $content_length bytes received\n";
+	} elsif (defined $content_length and $file_length > $content_length) {
+	    unlink($tmpfile);
+	    die "Content-length mismatch: " .
+		"expected $content_length bytes, got $file_length\n";
+	} else {
+	    # OK
+	    if (-e $mirr_tmp) {
+		# Some dosish systems fail to rename if the target exists
+		chmod 0777, $mirr_tmp;
+		unlink $mirr_tmp;
+	    }
+	    rename($tmpfile, $mirr_tmp) or
+		die "Cannot rename '$tmpfile' to '$mirr_tmp': $!\n";
+
+	    if (my $lm = $response->last_modified) {
+		# make sure the file has the same last modification time
+		utime $lm, $lm, $mirr_tmp;
+	    }
+	}
+    } else {
+	unlink($tmpfile);
+    }
+    return $response;
+}
+
+
+##########################################################
+### Parser module
+##########################################################
+package HTML::WalkerParser;
+require HTML::ParserTagEnd;
+@ISA = qw(HTML::ParserTagEnd);
+use strict;
+use vars qw(%LINK_ELEMENT);
+
+# Elements that might contain links and the name of the link attribute
+%LINK_ELEMENT =
+(
+ body   => 'background',
+ base   => 'href',
+ a      => 'href',
+ img    => [qw(src lowsrc usemap)],   # 'lowsrc' is a Netscape invention
+ form   => 'action',
+ input  => 'src',
+'link'  => 'href',          # need quoting since link is a perl builtin
+ frame  => 'src',
+ applet => [qw(codebase code)],
+ area   => 'href',
+ iframe  => 'src',   # Netscape 2.0 extention
+ embed  => 'src',   # used in Netscape 2.0 for Shockwave and things like that
+);
+
+my %LINKATTRIBS = (
+	"href" 			=> 1,
+	"src"			=> 1,
+	"action"		=> 1,
+	"background" 	=> 1,
+	"usemap"		=> 1,
+	"code"			=> 1,
+	"codebase"		=> 1,
+	"lowsrc"		=> 1,
+	);
+my %MAYBECONT = (
+	a => 'href',
+	area   => 'href',
+	form   => 'action',
+	frame  => 'src',
+	iframe  => 'src',
+	);
+
+sub maybecont($$) {
+	my $tag = shift;
+	my $att = shift;
+	return unless exists $MAYBECONT{$tag};
+	return ($MAYBECONT{$tag} eq $att);
+}
+
+sub new {
+    my($class, $parsed_fh) = @_;
+    my $self = $class->SUPER::new;
+    $self->{parsed_fh} = $parsed_fh;
+    $self;
+}
+
+
+
+
+
+
+
+##########################################################
+### Walker module
+##########################################################
+package HTML::LinkWalker;
+use strict;
+
+use IO::File;
+use File::Copy qw();
+use File::Path qw();
+use PathSubs qw();
+use HTML::Entities;
+use FindBin qw();
+
+
+##########################################################
+### Globals
+##########################################################
+my $ua;
+my $m_ua_personality = "LinkWalker/0.9";
+my %m_is_outside;
+my %m_is_container;
+my $m_bOnlyCont;
+my @m_sLinkRoots;
+my $m_subReport;
+my $m_subAction;
+my $m_subMirrorAction;
+
+
+#############################
+### Collecting info
+#############################
+my %m_CheckedLinks;
+my %m_MissedLinks;
+
+sub tell_bad_link($$$$$) {
+	my $what = shift;
+	my $file = shift;
+	my $lnum = shift;
+	my $link = shift;
+	my $line = shift;
+	$file = "START" unless defined $file;
+	$lnum = "(start)" unless defined $lnum;
+	my $longMsg = "<<$what>>";
+	my $shortMsg = $what;
+	if (defined $link) {
+		my @lines = split("\\s+", $line);
+		my $disp_line = join("\n\t\t  ", @lines);
+		$longMsg .= ",\n\t\tlink=$link\n\t\t$disp_line";
+	}
+	my @msg = ($shortMsg, $longMsg);
+	$m_CheckedLinks{$file}->{ERR}->{$lnum} = \@msg;
+	&$m_subReport("\t* Error * " . $what . "\n");
+} # tell_bad_link
+
+
+#############################
+### Helpers
+#############################
+
+sub get_contenttype($) {
+	my $response = shift;
+	my @rh = $response->header("Content-Type");
+	for my $r (@rh) {
+		my $c = $r;
+		if ((my $iPos = index($r, ";")) > -1) {
+			$c = substr($r, 0, $iPos);
+		}
+		return $c;
+	}
+}
+sub is_linked_contenttype($) {
+	my $response = shift;
+	return (get_contenttype($response) eq "text/html");
+}
+
+sub ending_is_container($) {
+	my $link_addr = shift;
+	$link_addr =~ s!#.*$!!;
+	$link_addr =~ s!\?.*$!!;
+	return (($link_addr =~ m!\.s?html?$!i) ? 1 : 0);
+}
+
+my $m_sMirrorRoot;
+my $m_bMirror = 1;
+
+sub mk_mirror_name($) {
+	my $orig_name = shift;
+	$orig_name =~ tr!\\!/!;
+	my $mirr_name = $orig_name;
+	my ($orig_host) = ($orig_name =~ m!(^https?://[^/]*)!i);
+	if (defined $orig_host) {
+		my $host = $orig_host;
+		$host =~ tr!:!_!;
+		$host =~ tr!/!_!;
+		$mirr_name =~ s!^$orig_host!$host!;
+		if (substr($mirr_name, -1) eq "/") { $mirr_name .= "default.html"; }
+	} else {
+		die "Can't find host in $orig_name\n";
+	}
+	my $mirr_full = sMirrorRoot() . $mirr_name;
+	if (!$m_bMirror) {
+		my $sExt = $mirr_name; $sExt =~ s!.*\.([^\.]*$)!$1!;
+		$mirr_full = sMirrorRoot() . "temp.$sExt";
+	}
+	my $mirr_fold = $mirr_full;
+	$mirr_fold =~ s![^/]*$!!;
+	File::Path::mkpath($mirr_fold, 0, 0777);
+	return $mirr_full;
+}
+
+#############################
+### Checks
+#############################
+sub is_outside($) {
+	my $uq_link_addr = shift;
+	if (!exists $m_is_outside{$uq_link_addr}) {
+		$m_is_outside{$uq_link_addr} = test_is_outside($uq_link_addr, \@m_sLinkRoots);
+	}
+	return $m_is_outside{$uq_link_addr};
+}
+sub set_is_container($$) {
+	my $uq_link_addr = shift;
+	return if exists $m_is_container{$uq_link_addr};
+	$m_is_container{$uq_link_addr} = shift;
+}
+sub is_outside_container($) {
+	my $uq_link_addr = shift;
+	if (exists $m_is_container{$uq_link_addr}) {
+		if ($m_is_container{$uq_link_addr}) {
+			return is_outside($uq_link_addr);
+		}
+	}
+}
+sub test_is_outside($$) {
+	my $uq_link_addr = shift;
+	my $link_roots   = shift;
+	if (defined $link_roots) {
+		my $in_roots;
+		for my $link_root (@$link_roots) {
+			if (substr($uq_link_addr, 0, length($link_root)) eq $link_root) {
+				return 0;
+			}
+		}
+		return 1;
+	}
+} # is_outside
+
+
+
+##########################################################
+### Parsing
+##########################################################
+
+
+### Parser subs
+sub HTML::WalkerParser::declaration {
+    my($self, $decl) = @_;
+	return unless defined $self->{parsed_fh};
+	my $fh = $self->{parsed_fh};
+	print $fh "<!" . $decl . ">";
+}
+my $m_start_cb;
+sub HTML::WalkerParser::start {
+    my($self, $tag, $attr, $ended) = @_;
+	&$m_start_cb($tag, $attr);
+	return unless defined $self->{parsed_fh};
+	my $t = "<$tag";
+	for my $k (keys %$attr) {
+		my $encoded = encode_entities($$attr{$k});
+		$t .= qq( $k="$encoded");
+	}
+	if ($ended) {
+		$t .= " />";
+	} else {
+		$t .= ">";
+	}
+	my $fh = $self->{parsed_fh};
+	print $fh $t;
+}
+sub HTML::WalkerParser::end {
+	my ($self, $tag) = @_;
+	return unless defined $self->{parsed_fh};
+	my $fh = $self->{parsed_fh};
+	print $fh "</" . $tag . ">";
+}
+sub HTML::WalkerParser::text {
+	my ($self, $txt) = @_;
+	return unless defined $self->{parsed_fh};
+	my $fh = $self->{parsed_fh};
+	print $fh $txt;
+}
+sub HTML::WalkerParser::comment {
+    my($self, $comment) = @_;
+	return unless defined $self->{parsed_fh};
+	my $fh = $self->{parsed_fh};
+	print $fh "<!--" . $comment . "-->";
+}
+
+
+
+
+### Main parsing routine
+
+sub parse_file($$$$$$$$$) {
+	my ($file_name, $parsed_fh, $uq_link_addr, $link_roots,
+		$ref_links, $ref_anchs, $ref_lines, $ref_tagname, $ref_attname) = @_;
+	my $fh;
+	if (-d $file_name) {
+		$file_name = PathSubs::uniq_dir($file_name) . "default.html";
+		$uq_link_addr .= "/" unless substr($uq_link_addr, -1) eq "/";
+		$uq_link_addr .= "default.html";
+		&$m_subReport("dir => $file_name\n");
+	}
+	$fh = new IO::File($file_name);
+	die "Can't read $file_name: $!\n" unless defined $fh;
+	my $base_href;
+	my $n;
+	my $line;
+	my $uq_link_fold = $uq_link_addr; $uq_link_fold =~ s![^/]*$!!;
+
+	my $start_cb =
+		sub {
+			my ($tag, $attr_hash) = @_;
+			for my $k (keys %$attr_hash) {
+				if (($k eq "id") || ($k eq "name")) {
+					my $v = $$attr_hash{$k};
+					$$ref_anchs{$v} = $n;
+					$$ref_lines{$n} = $line;
+				} elsif (exists $LINKATTRIBS{$k}) {
+					my $v = $$attr_hash{$k};
+					next if $v =~ m!^javascript:!;
+					next if $v =~ m!^ftp://!;
+					next if $v =~ m!^mailto://!;
+					if ($tag eq "base") { $base_href = $v if $k eq "href"; next; }
+					my $v_abs; my $v_rel;
+					my $v_is_abs = PathSubs::is_abs_path($v);
+					if ($v_is_abs) {
+						$v_abs = $v;
+						$v_rel = PathSubs::mk_relative_link($uq_link_addr, $v_abs);
+					} else {
+						$v_rel = $v;
+						if (defined $base_href) {
+							$v_abs = PathSubs::mk_abs_link($base_href, $v);
+						} else {
+							if (substr($v_rel, 0, 1) ne "#") {
+								$v_abs = $uq_link_fold . $v_rel;
+							} else {
+								$v_abs = $uq_link_addr . $v_rel;
+							}
+							$v_abs = PathSubs::resolve_dotdot($v_abs);
+						}
+					}
+					next if exists $m_CheckedLinks{$v_abs};
+					if (is_outside($v_abs)) {
+						if (!$v_is_abs) {
+							if (ending_is_container($v_abs)) {
+								$m_CheckedLinks{$v_abs} = {};
+								tell_bad_link("Outside relative link ($v_rel)",
+									$uq_link_addr, $n, $v, $line);
+							}
+						}
+						### Skip outside absolute links
+						### Could be things like banners etc...
+						next;
+					}
+					$$ref_links{$v_rel} = $n;
+					$$ref_lines{$n} = $line;
+					if (substr($v_rel, 0, 1) ne "#") {
+						my $v_rel_name = $v_rel;
+						$v_rel_name =~ s!#.*$!!;
+						$v_rel_name =~ s!\?.*$!!;
+						$$ref_tagname{$v_rel_name} = $tag;
+						$$ref_attname{$v_rel_name} = $k;
+					}
+					if ($v_is_abs && ($v_rel ne $v)) { $$attr_hash{$k} = $v_rel; }
+				}
+			}
+		}; # $start_cb
+
+	$m_start_cb = $start_cb;
+	my $p = HTML::WalkerParser->new($parsed_fh);
+	while ($line = <$fh>) {
+		$n++;
+		$p->parse($line);
+	}
+	$fh->close();
+} # parse_file
+
+
+
+##########################################################
+### Do the walk...
+##########################################################
+sub walk_link($$;$$$$) {
+	die "$#_" unless ($#_ == 1 || $#_ == 5);
+	my $link_fold   = shift;
+	my $link_file   = shift;
+	my $parent_url  = shift;
+	my $parent_lnum = shift;
+	my $parent_link = shift;
+	my $parent_line = shift;
+
+	my $link_addr   = $link_fold . $link_file;
+	my $uq_link_addr;
+	my $is_file = ($link_addr !~ m!^https?://!i);
+	if ($is_file) {
+		$uq_link_addr = PathSubs::uniq_file($link_addr);
+	} else {
+		$uq_link_addr = PathSubs::resolve_dotdot($link_addr);
+	}
+	return if exists $m_CheckedLinks{$uq_link_addr};
+	return if exists  $m_MissedLinks{$uq_link_addr};
+	$m_CheckedLinks{$uq_link_addr} = {};
+	my $link_is_container = ending_is_container($uq_link_addr);
+	if ($link_is_container) {
+		set_is_container($uq_link_addr, 1);
+		return if is_outside($uq_link_addr);
+	} else {
+		return if $m_bOnlyCont;
+	}
+	my $response;
+	my $contenttype;
+	my $bDoRewrite;
+	my $file_name;
+	if ($is_file) {
+		if (!-r $uq_link_addr) {
+			tell_bad_link("Can't read file ($uq_link_addr)",
+				$parent_url, $parent_lnum, $parent_link, $parent_line);
+			$m_MissedLinks{$uq_link_addr} = 1;
+			return;
+		}
+		$file_name = $uq_link_addr;
+	} else {
+		$file_name = mk_mirror_name($uq_link_addr);
+		if (!defined $ua) {
+			$ua = new LWP::UserAgent;
+			$ua->agent($m_ua_personality);
+			#$ua->delay(0.1);
+		}
+		if ($m_bMirror) {
+			$response = $ua->mirror($uq_link_addr, $file_name);
+			&$m_subMirrorAction($uq_link_addr, $file_name, $response);
+		} else {
+			my $request = new HTTP::Request('GET', $uq_link_addr);
+			$response = $ua->request($request, $file_name);
+		}
+		#dump_response($response); exit;
+		if ($response->code != 304) {
+			if (!$response->is_success) {
+				tell_bad_link($response->status_line . " ($uq_link_addr)",
+					$parent_url, $parent_lnum, $parent_link, $parent_line);
+				$m_MissedLinks{$uq_link_addr} = 1;
+				return;
+			}
+			$bDoRewrite = $m_bMirror;
+			$contenttype = get_contenttype($response);
+			$link_is_container = is_linked_contenttype($response);
+		}
+		if ($uq_link_addr ne $response->base) {
+			if ($m_bMirror) {
+				my $base_file = mk_mirror_name($response->base);
+				if (!File::Copy::copy($file_name, $base_file)) {
+					die "Can't copy($file_name, $base_file): $!\n";
+				}
+				if (my $lm = $response->last_modified) { utime $lm, $lm, $base_file; }
+				$file_name = $base_file;
+			}
+			$uq_link_addr = $response->base;
+		}
+	}
+	### Test again, could be new info from net!
+	if ($link_is_container) {
+		set_is_container($uq_link_addr, 1);
+		return if is_outside($uq_link_addr);
+	} else {
+		return if $m_bOnlyCont;
+		return;
+	}
+	&$m_subReport("$uq_link_addr ...");
+
+	my %links;
+	my %anchs;
+	my %lines;
+	my %tagname;
+	my %attname;
+	my $parsed_fh;
+	my $parsed_file;
+	my $file_to_parse = $file_name;
+	if ($bDoRewrite) {
+		$parsed_file = $file_to_parse . "-p$$";
+		&$m_subReport(" <<GET");
+		$parsed_fh = new IO::File("> $parsed_file");
+		die "Can't create $parsed_file: $!\n" unless defined $parsed_fh;
+		print $parsed_fh "<!-- parsed version -->\n";
+	}
+	&$m_subReport("\n");
+	parse_file($file_to_parse, $parsed_fh, $uq_link_addr,
+		\@m_sLinkRoots,
+		\%links, \%anchs, \%lines, \%tagname, \%attname);
+	if (defined $parsed_fh) {
+		$parsed_fh->close();
+		if (-e $file_name) { unlink $file_name or die "Can't unlink $file_name: $!"; }
+		rename($parsed_file, $file_name) or die "Can't rename($parsed_file, $file_name): $!\n";
+	    if (my $lm = $response->last_modified) { utime $lm, $lm, $file_name; }
+	}
+	### Now we know...
+	if ($link_is_container) { return if is_outside($uq_link_addr); }
+
+	$m_CheckedLinks{$uq_link_addr}->{ANC} = \%anchs;
+	my $file_dir;
+	if ($is_file) {
+		$file_dir = $uq_link_addr;
+		$file_dir =~ s![^/]*$!!;
+		#chdir $file_dir;
+	}
+	my $container_folder = $uq_link_addr; $container_folder =~ s![^/]*$!!;
+	&$m_subAction($uq_link_addr, $file_name, $contenttype);
+	for my $link (sort keys %links) {
+		# Next line is for onclick lines in prepared docs
+		next if ($link eq "#");
+		my $lnum = $links{$link};
+		my $line = $lines{$lnum};
+		if ($link eq "") {
+			tell_bad_link("Empty link", $uq_link_addr, $lnum, $link, $line);
+			next;
+		}
+		if ($link =~ m!(.*)\?!) { $link = $1; }
+		my $anchor;
+		if ($link =~ m!(.*)#(.*)!) { $link = $1; $anchor = $2; }
+		if ($link eq "") {
+			if (!exists $anchs{$anchor}) {
+				tell_bad_link("Anchor not found ($anchor)", $uq_link_addr, $lnum, $link, $line);
+			}
+			next;
+		}
+		my $sub_fold;
+		my $sub_file;
+		my $uq_sublink; 
+		if ($link =~ m!^https?://!i) {
+			$sub_fold = "";
+			$sub_file = $link;
+			$uq_sublink = $link;
+		} else {
+			$sub_file = $link;
+			if ($is_file) {
+				$sub_fold = $file_dir;
+				$uq_sublink = PathSubs::uniq_file($sub_fold . $sub_file);
+			} else {
+				$sub_fold = $container_folder;
+				$uq_sublink = $sub_fold . $sub_file;
+			}
+		}
+		next if (exists $m_CheckedLinks{$uq_sublink});
+		if (defined $anchor) {
+			$m_CheckedLinks{$uq_link_addr}->{EXTANC}->{$uq_sublink} =
+					{ ANC=> $anchor, LINE=>$line, LNUM=>$lnum};
+		}
+		if ($m_bOnlyCont) {
+			die "link=$link\tattr=$tagname{$link}\n" unless exists $tagname{$link};
+			next unless maybecont($tagname{$link}, $attname{$link});
+		}
+		if (is_outside($uq_link_addr)) {
+			if (maybecont($tagname{$link}, $attname{$link}) ) {
+				next;
+			}
+		}
+		walk_link($sub_fold, $sub_file, $uq_link_addr, $lnum, $link, $line);
+	}
+} # walk_link
+
+
+
+
+############################################
+### Some more checks!
+############################################
+sub check_external_anchors() {
+	&$m_subReport("\nChecking external anchors...\n");
+	for my $f (sort keys %m_CheckedLinks) {
+		my $fnode = $m_CheckedLinks{$f};
+		if (exists ${$fnode}{"EXTANC"}) {
+			my $extanc_hash = ${$fnode}{"EXTANC"};
+			for my $fx (keys %$extanc_hash) {
+				next unless (exists $m_CheckedLinks{$fx});
+				my $ea_hash = ${$extanc_hash}{$fx};
+				my $ea = ${$ea_hash}{ANC};
+				my $fxnode = $m_CheckedLinks{$fx};
+				my $fx_anc_hash = ${$fxnode}{"ANC"};
+				if (!exists ${$fx_anc_hash}{$ea}) {
+					my $line = ${$ea_hash}{LINE};
+					my $lnum = ${$ea_hash}{LNUM};
+					&$m_subReport("From $f\n");
+					tell_bad_link("Ext anchor not found ($fx#$ea)",
+						$f, $lnum, "$fx#$ea", $line);
+				}
+			}
+		}
+	}
+} # check_external_anchors
+
+
+
+#############################
+### Reporting
+#############################
+sub report_errors($$) {
+	my $bSum = shift;
+	my $bDet = shift;
+	my $errors_reported;
+	my $errors_found;
+	for my $f (sort keys %m_CheckedLinks) {
+		my $fnode = $m_CheckedLinks{$f};
+		if (exists ${$fnode}{ERR}) {
+			$errors_found = 1;
+			last unless $bSum;
+			if (!defined $errors_reported) {
+				$errors_reported = 1;
+				&$m_subReport("\n\n*********** Summary ERRORS and WARNINGS **********\n");
+			}
+			&$m_subReport("$f\n");
+			my $err_hash = ${$fnode}{ERR};
+			for my $e (sort keys %$err_hash) {
+				my $refE = ${$err_hash}{$e};
+				&$m_subReport("\t" . ${$refE}[0] . "\n");
+			}
+		}
+	}
+	undef $errors_reported;
+	if ($bDet) {
+		for my $f (sort keys %m_CheckedLinks) {
+			my $fnode = $m_CheckedLinks{$f};
+			if (exists ${$fnode}{ERR}) {
+				if (!defined $errors_reported) {
+					$errors_reported = 1;
+					&$m_subReport("\n\n*********** Detailed ERRORS and WARNINGS **********\n");
+				}
+				&$m_subReport("$f\n");
+				my $err_hash = ${$fnode}{ERR};
+				for my $e (sort keys %$err_hash) {
+					my $refE = ${$err_hash}{$e};
+					&$m_subReport("\tat line $e: " .  ${$refE}[1] . "\n");
+				}
+			}
+		}
+	}
+	if ($errors_found) {
+		die "\n*** There where errors ***\n";
+	} else {
+		&$m_subReport("No errors found\n");
+	}
+} # report_errors
+
+sub dump_response($) {
+	my $response = shift;
+		&$m_subReport( $response->code . " " . $response->message . "\n");
+		&$m_subReport( "****************************************\n");
+		#&$m_subReport( $response->request . "\n");
+		#&$m_subReport( "****************************************\n");
+		#&$m_subReport( $response->previous . "\n");
+		#&$m_subReport( "****************************************\n");
+		&$m_subReport( 	"  i=" . $response->is_info .
+				", s=" . $response->is_success .
+				", r=" . $response->is_redirect .
+				", e=" . $response->is_error . "\n");
+		&$m_subReport( "****************************************\n");
+		&$m_subReport( "content: " . $response->content . "\n");
+		&$m_subReport( "****************************************\n");
+		&$m_subReport( "base: " . $response->base . "\n");
+		&$m_subReport( "****************************************\n");
+		&$m_subReport( $response->as_string);
+		&$m_subReport( "****************************************\n");
+		&$m_subReport( $response->current_age . "\n");
+		&$m_subReport( "****************************************\n");
+		my @rh = $response->header("Content-Type");
+		for my $r (@rh) { &$m_subReport( "ct: $r\n"); }
+		&$m_subReport( "****************************************\n");
+} # dump_response
+
+
+#############################
+### Parameters
+#############################
+sub sMirrorRoot() {
+	my $val = shift;
+	$m_sMirrorRoot = PathSubs::get_temp_path() . "LinkWalker/" unless defined $m_sMirrorRoot;
+	my $old = $m_sMirrorRoot;
+	$m_sMirrorRoot = PathSubs::uniq_dir($val) if defined $val;
+	return $old;
+}
+sub bMirror(;$) {
+	my $val = shift;
+	my $old = $m_bMirror;
+	$m_bMirror = $val if defined $val;
+	$old;
+}
+
+sub subReporter(;$) {
+	my $val = shift;
+	my $old = $m_subReport;
+	$m_subReport = $val if defined $val;
+	$old
+}
+sub subAction(;$) {
+	my $val = shift;
+	my $old = $m_subAction;
+	$m_subAction = $val if defined $val;
+	$old
+}
+sub bOnlyCont(;$) {
+	my $val = shift;
+	my $old = $m_bOnlyCont;
+	$m_bOnlyCont = $val if defined $val;
+	$old
+}
+sub ua_personality(;$) {
+	my $val = shift;
+	my $old = $m_ua_personality;
+	$m_ua_personality = $val if defined $val;
+	$old
+}
+
+sub clear_roots() { @m_sLinkRoots = (); }
+sub get_roots() { return \@m_sLinkRoots; }
+sub add_root($) { push @m_sLinkRoots, shift; }
+sub add_files_root($) {
+	my $file = shift;
+	my $default_root;
+	my ($host) = ($file =~ m!(^https?://[^/]*)!i);
+	if (defined $host) {
+		$default_root = $file;
+	} else {
+		die "Can't find $file\n" unless -e $file;
+		$default_root = PathSubs::uniq_file($file);
+	}
+	$default_root =~ s![^/]*$!!;
+	add_root($default_root);
+}
+
+### Default actions
+sub default_sub {}
+$m_subReport 		= \&default_sub;
+$m_subAction 		= \&default_sub;
+$m_subMirrorAction 	= \&default_sub;
+
+1;
diff --git a/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/ParserTagEnd.pm b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/ParserTagEnd.pm
new file mode 100644
index 0000000..32407d6
--- /dev/null
+++ b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/ParserTagEnd.pm
@@ -0,0 +1,448 @@
+package HTML::ParserTagEnd;
+
+# Author address: <gisle@aas.no>
+### Modified for <tag />, Lennart
+
+use strict;
+use HTML::Entities ();
+
+use vars qw($VERSION);
+$VERSION = "2.23";  # $Date: 1999/06/09 10:27:16 $
+
+
+sub new
+{
+    my $class = shift;
+    my $self = bless { '_buf'            => '',
+		       '_strict_comment' => 0,
+		     }, $class;
+    $self;
+}
+
+
+# A little note about the observed Netscape behaviour:
+#
+# It parse <xmp> in the depreceated 'literal' mode, i.e. no tags are
+# recognized until a </xmp> is found.
+# 
+# <listing> is parsed like <pre>, i.e. tags are recognized.  <listing>
+# are presentend in smaller font than <pre>
+#
+# Netscape does not parse this comment correctly (it terminates the comment
+# too early):
+#
+#    <! -- comment -- --> more comment -->
+#
+# Netscape ignores '<!--' and '-->' within the <SCRIPT> and <STYLE> tag.
+# This is used as a trick to make non-script-aware browsers ignore
+# the scripts.
+
+
+sub parse
+{
+    my $self = shift;
+    my $buf = \ $self->{'_buf'};
+    unless (defined $_[0]) {
+	# signals EOF (assume rest is plain text)
+	$self->text($$buf) if length $$buf;
+	$$buf = '';
+	return $self;
+    }
+    $$buf .= $_[0];
+    my $netscape_comment = !$self->{'_strict_comment'};
+
+    # Parse html text in $$buf.  The strategy is to remove complete
+    # tokens from the beginning of $$buf until we can't deside whether
+    # it is a token or not, or the $$buf is empty.
+
+  TOKEN:
+    while (1) {
+
+	# First we try to pull off any plain text (anything before a "<" char)
+	if ($$buf =~ s|^([^<]+)||) {
+	    if (length $$buf) {
+		$self->text($1);
+	    } else {
+		my $text = $1;
+		# At the end of the buffer, we should not parse white space
+		# but leave it for parsing on the next round.
+		if ($text =~ s|(\s+)$||) {
+		    $$buf = $1;
+                # Same treatment for chopped up entites and words.
+		# We must wait until we have it all.
+		} elsif ($text =~ s|(\s*\S+)$||) {
+		    $$buf = $1;
+		};
+		$self->text($text) if length $text;
+		last TOKEN;
+	    }
+
+	# Netscapes buggy comments are easy to handle
+	} elsif ($netscape_comment && $$buf =~ m|^<!\s*--|) {
+	    if ($$buf =~ s|^<!\s*--(.*?)--\s*>||s) {
+		$self->comment($1);
+	    } else {
+		last TOKEN;  # must wait until we see the end of it
+	    }
+
+	# Then, markup declarations (usually either <!DOCTYPE...> or a comment)
+	} elsif ($$buf =~ s|^(<!)||) {
+	    my $eaten = $1;
+	    my $text = '';
+	    my @com = ();  # keeps comments until we have seen the end
+	    # Eat text and beginning of comment
+	    while ($$buf =~ s|^(([^>]*?)--)||) {
+		$eaten .= $1;
+		$text .= $2;
+		# Look for end of comment
+		if ($$buf =~ s|^((.*?)--)||s) {
+		    $eaten .= $1;
+		    push(@com, $2);
+		} else {
+		    # Need more data to get all comment text.
+		    $$buf = $eaten . $$buf;
+		    last TOKEN;
+		}
+	    }
+	    # Can we finish the tag
+	    if ($$buf =~ s|^([^>]*)>||) {
+		$text .= $1;
+		$self->declaration($text) if $text =~ /\S/;
+		# then tell about all the comments we found
+		for (@com) { $self->comment($_); }
+	    } else {
+		$$buf = $eaten . $$buf;  # must start with it all next time
+		last TOKEN;
+	    }
+
+        # Should we look for 'processing instructions' <? ...> ??
+	#} elsif ($$buf =~ s|<\?||) {
+	    # ...
+
+	# Then, look for a end tag
+	} elsif ($$buf =~ s|^</||) {
+	    # end tag
+	    if ($$buf =~ s|^([a-zA-Z][a-zA-Z0-9\.\-]*)(\s*>)||) {
+		$self->end(lc($1), "</$1$2");
+	    } elsif ($$buf =~ m|^[a-zA-Z]*[a-zA-Z0-9\.\-]*\s*$|) {
+		$$buf = "</" . $$buf;  # need more data to be sure
+		last TOKEN;
+	    } else {
+		# it is plain text after all
+		$self->text("</");
+	    }
+
+	# Then, finally we look for a start tag
+	} elsif ($$buf =~ s|^(<([a-zA-Z]+)>)||) {
+	    # special case plain start tags for slight speed-up (2.5%)
+	    ### mod Lennart
+	    $self->start(lc($2), {}, 0, [], $1);
+
+	} elsif ($$buf =~ s|^<||) {
+	    # start tag
+	    my $eaten = '<';
+
+	    # This first thing we must find is a tag name.  RFC1866 says:
+	    #   A name consists of a letter followed by letters,
+	    #   digits, periods, or hyphens. The length of a name is
+	    #   limited to 72 characters by the `NAMELEN' parameter in
+	    #   the SGML declaration for HTML, 9.5, "SGML Declaration
+	    #   for HTML".  In a start-tag, the element name must
+	    #   immediately follow the tag open delimiter `<'.
+	    if ($$buf =~ s|^(([a-zA-Z][a-zA-Z0-9\.\-]*)\s*)||) {
+		$eaten .= $1;
+		my $tag = lc $2;
+		my %attr;
+		my @attrseq;
+
+		# Then we would like to find some attributes
+                #
+                # Arrgh!! Since stupid Netscape violates RCF1866 by
+                # using "_" in attribute names (like "ADD_DATE") of
+                # their bookmarks.html, we allow this too.
+		while ($$buf =~ s|^(([a-zA-Z][a-zA-Z0-9\.\-_]*)\s*)||) {
+		    $eaten .= $1;
+		    my $attr = lc $2;
+		    my $val;
+		    # The attribute might take an optional value (first we
+		    # check for an unquoted value)
+		    if ($$buf =~ s|(^=\s*([^\"\'>\s][^>\s]*)\s*)||) {
+			$eaten .= $1;
+			$val = $2;
+			HTML::Entities::decode($val);
+		    # or quoted by " or '
+		    } elsif ($$buf =~ s|(^=\s*([\"\'])(.*?)\2\s*)||s) {
+			$eaten .= $1;
+			$val = $3;
+			HTML::Entities::decode($val);
+                    # truncated just after the '=' or inside the attribute
+		    } elsif ($$buf =~ m|^(=\s*)$| or
+			     $$buf =~ m|^(=\s*[\"\'].*)|s) {
+			$$buf = "$eaten$1";
+			last TOKEN;
+		    } else {
+			# assume attribute with implicit value
+			$val = $attr;
+		    }
+		    $attr{$attr} = $val;
+		    push(@attrseq, $attr);
+		}
+
+		# At the end there should be a closing ">"
+### Modified for <tag />, Lennart
+		if ($$buf =~ s|^/>||) {
+		    $self->start($tag, \%attr, 1, \@attrseq, "$eaten>");
+		} elsif ($$buf =~ s|^>||) {
+		#if ($$buf =~ s|^>||) {
+		    $self->start($tag, \%attr, 0, \@attrseq, "$eaten>");
+		} elsif (length $$buf) {
+		    # Not a conforming start tag, regard it as normal text
+		    $self->text($eaten);
+		} else {
+		    $$buf = $eaten;  # need more data to know
+		    last TOKEN;
+		}
+
+	    } elsif (length $$buf) {
+		$self->text($eaten);
+	    } else {
+		$$buf = $eaten . $$buf;  # need more data to parse
+		last TOKEN;
+	    }
+
+	} else {
+	    #die if length($$buf);  # This should never happen
+	    last TOKEN; 	    # The buffer should be empty now
+	}
+    }
+
+    $self;
+}
+
+
+sub eof
+{
+    shift->parse(undef);
+}
+
+
+sub parse_file
+{
+    my($self, $file) = @_;
+    no strict 'refs';  # so that a symbol ref as $file works
+    local(*F);
+    unless (ref($file) || $file =~ /^\*[\w:]+$/) {
+	# Assume $file is a filename
+	open(F, $file) || die "Can't open $file: $!";
+	$file = \*F;
+    }
+    my $chunk = '';
+    while(read($file, $chunk, 512)) {
+	$self->parse($chunk);
+    }
+    close($file);
+    $self->eof;
+}
+
+
+sub strict_comment
+{
+    my $self = shift;
+    my $old = $self->{'_strict_comment'};
+    $self->{'_strict_comment'} = shift if @_;
+    return $old;
+}
+
+
+sub netscape_buggy_comment  # legacy
+{
+    my $self = shift;
+    my $old = !$self->strict_comment;
+    $self->strict_comment(!shift) if @_;
+    return $old;
+}
+
+
+sub text
+{
+    # my($self, $text) = @_;
+}
+
+sub declaration
+{
+    # my($self, $decl) = @_;
+}
+
+sub comment
+{
+    # my($self, $comment) = @_;
+}
+
+sub start
+{
+die "hie";
+    # my($self, $tag, $attr, $attrseq, $origtext) = @_;
+    # $attr is reference to a HASH, $attrseq is reference to an ARRAY
+}
+
+sub end
+{
+    # my($self, $tag, $origtext) = @_;
+}
+
+1;
+
+
+__END__
+
+
+=head1 NAME
+
+HTML::Parser - SGML parser class
+
+=head1 SYNOPSIS
+
+ require HTML::Parser;
+ $p = HTML::Parser->new;  # should really a be subclass
+ $p->parse($chunk1);
+ $p->parse($chunk2);
+ #...
+ $p->eof;                 # signal end of document
+
+ # Parse directly from file
+ $p->parse_file("foo.html");
+ # or
+ open(F, "foo.html") || die;
+ $p->parse_file(\*F);
+
+=head1 DESCRIPTION
+
+The C<HTML::Parser> will tokenize an HTML document when the parse()
+method is called by invoking various callback methods.  The document to
+be parsed can be supplied in arbitrary chunks.
+
+The external interface the an I<HTML::Parser> is:
+
+=over 4
+
+=item $p = HTML::Parser->new
+
+The object constructor takes no arguments.
+
+=item $p->parse( $string );
+
+Parse the $string as an HTML document.  Can be called multiple times.
+The return value is a reference to the parser object.
+
+=item $p->eof
+
+Signals end of document.  Call eof() to flush any remaining buffered
+text.  The return value is a reference to the parser object.
+
+=item $p->parse_file( $file );
+
+This method can be called to parse text from a file.  The argument can
+be a filename or an already opened file handle. The return value from
+parse_file() is a reference to the parser object.
+
+=item $p->strict_comment( [$bool] )
+
+By default we parse comments similar to how the popular browsers (like
+Netscape and MSIE) do it.  This means that comments will always be
+terminated by the first occurrence of "-->".  This is not correct
+according to the "official" HTML standards.  The official behaviour
+can be enabled by calling the strict_comment() method with a TRUE
+argument.
+
+The return value from strict_comment() is the old attribute value.
+
+=back
+
+
+
+In order to make the parser do anything interesting, you must make a
+subclass where you override one or more of the following methods as
+appropriate:
+
+=over 4
+
+=item $self->declaration($decl)
+
+This method is called when a I<markup declaration> has been
+recognized.  For typical HTML documents, the only declaration you are
+likely to find is <!DOCTYPE ...>.  The initial "<!" and ending ">" is
+not part of the string passed as argument.  Comments are removed and
+entities will B<not> be expanded.
+
+=item $self->start($tag, $attr, $attrseq, $origtext)
+
+This method is called when a complete start tag has been recognized.
+The first argument is the tag name (in lower case) and the second
+argument is a reference to a hash that contain all attributes found
+within the start tag.  The attribute keys are converted to lower case.
+Entities found in the attribute values are already expanded.  The
+third argument is a reference to an array with the lower case
+attribute keys in the original order.  The fourth argument is the
+original HTML text.
+
+
+=item $self->end($tag, $origtext)
+
+This method is called when an end tag has been recognized.  The
+first argument is the lower case tag name, the second the original
+HTML text of the tag.
+
+=item $self->text($text)
+
+This method is called when plain text in the document is recognized.
+The text is passed on unmodified and might contain multiple lines.
+Note that for efficiency reasons entities in the text are B<not>
+expanded.  You should call HTML::Entities::decode($text) before you
+process the text any further.
+
+A sequence of text in the HTML document can be broken between several
+invocations of $self->text.  The parser will make sure that it does
+not break a word or a sequence of spaces between two invocations of
+$self->text().
+
+=item $self->comment($comment)
+
+This method is called as comments are recognized.  The leading and
+trailing "--" sequences have been stripped off the comment text.
+
+=back
+
+The default implementation of these methods do nothing, i.e., the
+tokens are just ignored.
+
+There is really nothing in the basic parser that is HTML specific, so
+it is likely that the parser can parse other kinds of SGML documents.
+SGML has many obscure features (not implemented by this module) that
+prevent us from renaming this module as C<SGML::Parser>.
+
+=head1 EFFICIENCY
+
+The parser is fairly inefficient if the chunks passed to $p->parse()
+are too big.  The reason is probably that perl ends up with a lot of
+character copying when tokens are removed from the beginning of the
+strings.  A chunk size of about 256-512 bytes was optimal in a test I
+made with some real world HTML documents.  (The parser was about 3
+times slower with a chunk size of 20K).
+
+=head1 SEE ALSO
+
+L<HTML::Entities>, L<HTML::TokeParser>, L<HTML::Filter>,
+L<HTML::HeadParser>, L<HTML::LinkExtor>
+
+L<HTML::TreeBuilder> (part of the I<HTML-Tree> distribution)
+
+=head1 COPYRIGHT
+
+Copyright 1996-1999 Gisle Aas. All rights reserved.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
+
+
diff --git a/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/datadir.txt b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/datadir.txt
new file mode 100644
index 0000000..1ba751d
--- /dev/null
+++ b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/HTML/datadir.txt
@@ -0,0 +1 @@
+C:/TEMP/i2data
diff --git a/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/PathSubs.pm b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/PathSubs.pm
new file mode 100644
index 0000000..e95b8d5
--- /dev/null
+++ b/emacs.d/nxhtml/nxhtml/html-chklnk/PerlLib/PathSubs.pm
@@ -0,0 +1,207 @@
+# Copyright 2006 Lennart Borgman, http://OurComments.org/. All rights
+# reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING.  If not, write to the
+# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+
+package PathSubs;
+
+#####################################################
+###     This package contains general path handling
+###     routines and some win32 specific dito.
+### The latter should ev be moved to a new module!
+#####################################################
+use strict;
+
+use File::Spec;
+
+### Absolute path names
+
+sub is_abs_path ($) {
+   my $path = shift;
+   return 0 if $path eq "";
+   return 1 if File::Spec->file_name_is_absolute($path);
+   #return 1 if substr($path, 1, 1) eq ":";     # MSWin32
+   #return 1 if substr($path, 0, 1) eq "/";
+   return 1 if $path =~ /^https?:/i;
+   return 1 if $path =~ /^file:/i;
+   return 1 if $path =~ /^javascript:/i;
+   return 1 if $path =~ /^mailto:/i;
+}
+sub is_abs_netpath($) {
+   my $path = shift;
+   return 1 if $path =~ /^https?:/i;
+   # New
+   return 1 if $path =~ /^ftp:/i;
+   return 1 if $path =~ /^mailto:/i;
+}
+
+
+sub uniq_file($) {
+        my $fname = shift;
+        $fname =~ s!^\s+|\s+$!!g;
+        return "" if ($fname eq "");
+        $fname = File::Spec->rel2abs($fname);
+        if (!File::Spec->file_name_is_absolute($fname)) {
+            die "File name is not absolute: $fname";
+        }
+        #print STDERR "uniq_file($fname)\n";
+        $fname =~ tr!\\!/!;
+        if (-e $fname)  {
+                #print STDERR "exists $fname\n";
+                ### There is an error in 522, compensate for this!
+                #die substr($fname, -1);
+                if (substr($fname, -1) eq "/") { chop $fname; }
+                #print STDERR "exists $fname\n";
+                ### Translate ..
+                if (substr($fname, 1, 1) eq ":") {
+                    my $ffname = Win32::GetFullPathName($fname);
+                    ### Get case
+                    my $lfname = Win32::GetLongPathName($ffname);
+                    #print STDERR "lexists $lfname\n";
+                    $fname = $lfname if ($lfname ne "");
+                }
+        } else {
+                #print STDERR "NOT exists $fname\n";
+                if (substr($fname, -1) eq "/") { chop $fname; }
+                my $head = "";
+                if (substr($fname, 0, 2) eq "//") {
+                        $head = "//";
+                        $fname = substr($fname, 2);
+                }
+                my @fname = split("/", $fname);
+                my $tail = pop @fname;
+                $fname = uniq_dir($head . join("/", @fname)) . $tail;
+        }
+        if (substr($fname, 1, 1) eq ":") {
+                $fname = uc(substr($fname, 0, 1)) . substr($fname, 1);
+                #print STDERR "fname $fname\n";
+        }
+        $fname =~ tr!\\!/!;
+                #print STDERR "fname ($fname)\n";
+        return $fname;
+}
+sub uniq_dir($) {
+        my $dir = shift;
+        my $uq_dir = uniq_file($dir);
+        if (substr($uq_dir, -1) ne "/") { $uq_dir .= "/"; }
+        return $uq_dir;
+}
+
+
+
+### Relative paths
+sub _get_link_root($) {
+        my $lnk = shift;
+        if ($lnk =~ m!^(/|ftp://[^/]*|https?://[^/]*|[a-z]:/)!i) {
+                return $1;
+        } else {
+                return "";
+        }
+}
+
+sub resolve_dotdot($) {
+        my $orig_url = shift;
+        my $root = _get_link_root($orig_url);
+        return $orig_url if length($root) == length($orig_url);
+        my $url = substr($orig_url, length($root));
+        if (substr($root, -1) eq "/") {
+                chop $root;
+                $url = "/$url";
+        }
+        #die "$root\n$url";
+        my $iPosSearch = 2;
+        #print "url=$url\n";
+        while ((my $iPos = index($url, "/../", $iPosSearch)) > -1) {
+                my $sLeft = substr($url, 0, $iPos);
+                if (substr($sLeft, -2) eq "..") {
+                        $iPosSearch += 3;
+                        next;
+                }
+                my $sRight = substr($url, $iPos+3);
+                #print "url=$url\n";
+                #print "iPos=$iPos\n";
+                #print "sLeft=$sLeft\n";
+                $sLeft =~ s!/[^/]*$!!;
+                #print "sLeft=$sLeft\n";
+                #print "sRight=$sRight\n";
+                $url = $sLeft . $sRight;
+                #print "\t***url=$url\n";
+                #print "url=$url\n";
+        }
+        if (index($url, "../") > -1) {
+                return $orig_url;
+        }
+        return $root . $url;
+}
+
+sub mk_relative_link($$;$) {
+    my $from = shift;
+    my $to   = shift;
+    my $norm = shift;
+    if ($norm) {
+        $from = uniq_file($from);
+        $to   = uniq_file($to);
+    }
+    if (-e $from) {
+        $from = uniq_file($from);
+    } else {
+        $from = resolve_dotdot($from);
+    }
+    if (-e $to) {
+        $to   = uniq_file($to);
+    } else {
+        $to = resolve_dotdot($to);
+    }
+    my $root_from = _get_link_root($from);
+    my $root_to   = _get_link_root($to  );
+    if ($root_from ne $root_to) {
+        return $to;
+    }
+    my @from = split "/", $from;
+    my @to   = split "/", $to;
+    while (@to) {
+        last if ($to[0] ne $from[0]);
+        shift @to;
+        shift @from;
+    }
+    if (@to == 1 && @from == 1) {
+        if (length($to[0]) > length($from[0])) {
+                        if (substr($to[0], 0, length($from[0])+1) eq ($from[0] . "#")) {
+                                return substr($to[0], length($from[0]));
+                        }
+        }
+    }
+    my $rl;
+    for (1..$#from) { $rl .= "../"; }
+    $rl .= join("/", @to);
+
+    return $rl;
+}
+
+
+
+sub mk_absolute_link($$) {
+        my $from   = shift;
+        my $rel_to = shift;
+        my $abs = $from;
+        $abs =~ s![^/]*$!!;
+        $abs .= $rel_to;
+        if (!is_abs_netpath($abs)) { $abs = uniq_file($abs); }
+        $abs;
+}
+
+
+1;
diff --git a/emacs.d/nxhtml/nxhtml/html-chklnk/link_checker.pl b/emacs.d/nxhtml/nxhtml/html-chklnk/link_checker.pl
new file mode 100644
index 0000000..0925b1c
--- /dev/null
+++ b/emacs.d/nxhtml/nxhtml/html-chklnk/link_checker.pl
@@ -0,0 +1,328 @@
+#! perl
+
+# Copyright 2006, 2007 Lennart Borgman, http://OurComments.org/. All
+# rights reserved.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+use strict;
+use warnings;
+
+use IO::File;
+use File::Spec;
+use File::Find;
+
+sub check_file($);
+
+#############################
+### Collecting info
+#############################
+my $m_site_dir;   # Site root directory (every file should be in this)
+my %m_CheckedFiles;
+my %m_FilesToCheck;
+my %m_MissedFiles;
+my $m_errors_found;
+
+sub tell_bad_link($$$$$) {
+	my $what = shift;
+	my $file = shift;
+	my $lnum = shift;
+	my $link = shift;
+	my $line = shift;
+        $line =~ s/^\s+|\s+$//g;
+	$m_CheckedFiles{$file}->{"ERR"}->{$lnum} = "$what\n    Link: \"$link\"\n";
+        #$line";
+}
+
+#############################
+### Helpers
+#############################
+sub add_file_to_check($) {
+    $m_FilesToCheck{File::Spec->canonpath(shift)} = 1;
+}
+# sub full_uq_file($) {
+# 	my $file = shift;
+# 	my $full_file = $file;
+#         if (! File::Spec->file_name_is_absolute($full_file)) {
+#             #$full_file = Win32::GetFullPathName($file);
+#             $full_file = File::Spec->rel2abs($full_file, $m_site_dir);
+#         }
+#         if (($^O eq "MSWin32") || ($^O eq "cygwin")) {
+#             $full_file =~ tr!A-Z!a-z!;
+#         }
+#         #print "ull_uq_file: full_file=$file\n";
+# 	return $full_file;
+# }
+
+#############################
+### Checks
+#############################
+sub check_next_file() {
+    if (scalar(keys %m_FilesToCheck) > 0) {
+        my @FilesToCheck = sort keys %m_FilesToCheck;
+        my $next_file = $FilesToCheck[0];
+        delete $m_FilesToCheck{$next_file};
+        check_file($next_file);
+    }
+}
+sub not_a_local_file($) {
+    my $url = shift;
+    (
+     $url =~ m!^javascript:!
+     ||
+     $url =~ m!^mailto:!
+     ||
+     $url =~ m!^[a-z]+://!
+    );
+} 
+
+sub check_file($) {
+	my $fname = shift;
+        if (! File::Spec->file_name_is_absolute($fname)) {
+            die "check_file: File is not abs: $fname";
+        }
+        my $only_name = (File::Spec->splitpath($fname))[2];
+	print "Checking $fname ... ";
+        sleep 0.5;
+	$m_CheckedFiles{$fname} = {};
+	my %links;
+	my %anchs;
+	my %lines;
+	my $fh = new IO::File($fname);
+	die "Can't read $fname: $!\n" unless defined $fh;
+	my $whole;
+	my $n;
+        my $found_errors = 0;
+	while (my $line = <$fh>) {
+		$n++;
+		chomp $line;
+		$whole = $line;
+		while ($whole =~ m!(?:\s|^)id="(.*?)"!g) {
+			$anchs{$1} = $n;
+			$lines{$n} = $line;
+		}
+		while ($whole =~ m!(?:\s|^)name="(.*?)"!g) {
+			$anchs{$1} = $n;
+			$lines{$n} = $line;
+		}
+		while ($whole =~ m!(?:\s|^)href="(.*?)"!g) {
+			my $l = $1;
+			next if not_a_local_file($l);
+                        if ($l =~ m!^#!) {
+                            $l = $only_name . $l;
+                        }
+			$links{$l} = $n;
+			$lines{$n} = $line;
+		}
+		while ($whole =~ m!(?:\s|^)src="(.*?)"!g) {
+			my $l = $1; $l =~ tr!A-Z!a-z!;
+			$links{$l} = $n;
+			$lines{$n} = $line;
+		}
+	}
+	$fh->close();
+	$m_CheckedFiles{$fname}->{ANC} = \%anchs;
+        my ($fv, $fd, $ff) = File::Spec->splitpath($fname);
+        my $fdir = File::Spec->catpath($fv, $fd, "");
+	for my $link (sort keys %links) {
+		# Next line is for onclick lines
+		next if ($link eq "#");
+		my $lnum = $links{$link};
+		my $line = $lines{$lnum};
+		if ($link eq "") {
+			tell_bad_link("empty link", $fname, $lnum, $link, $line);
+                        $found_errors = 1;
+			next;
+		}
+		if ($link =~ m!(.*)\?!) { $link = $1; }
+		my $anchor;
+		if ($link =~ m!(.*)#(.*)!) { $link = $1; $anchor = $2; }
+		if ($link eq "") {
+			if (!exists $anchs{$anchor}) {
+				tell_bad_link("bad internal anchor ref ($anchor)", $fname, $lnum, $link, $line);
+                                $found_errors = 1;
+			}
+			next;
+		}
+		$link =~ m!([^\.]*)$!;
+		my $link_file_type = $1;
+		my $subfile = $link;
+                if (!File::Spec->file_name_is_absolute($subfile)) {
+                    $subfile = File::Spec->catpath($fv, $fd, $link);
+                }
+                $subfile = File::Spec->canonpath($subfile);
+                die "Contained .." if $subfile =~ m/\.\./;
+		next if (exists $m_MissedFiles{$subfile});
+		if (! -r $subfile) {
+			tell_bad_link("Can't read linked file: $!", $fname, $lnum, $link, $line);
+                        $found_errors = 1;
+			$m_MissedFiles{$subfile} = 1;
+			next;
+		}
+		next unless $link_file_type =~ m!^html?$!i;
+		if (defined $anchor) {
+			$m_CheckedFiles{$fname}->{EXTANC}->{$subfile} =
+					{ ANC=> $anchor, LINE=>$line, LNUM=>$lnum};
+		}
+		next if (exists $m_CheckedFiles{$subfile});
+		#check_file($subfile);
+                my $rel_root = File::Spec->abs2rel($subfile, $m_site_dir);
+                if (substr($rel_root, 0, 2) eq "..") {
+                    tell_bad_link("Reference to file outside site", $fname, $lnum, $link, $line);
+                    $found_errors = 1;
+                } else {
+                    #$m_FilesToCheck{$subfile} = 1;
+                    add_file_to_check($subfile);
+                }
+	}
+        if ($found_errors) {
+            print "Errors found\n";
+        } else {
+            print "Ok\n";
+        }
+        sleep 0.5;
+        check_next_file();
+} # check_file
+
+
+sub check_external_anchors() {
+	for my $f (sort keys %m_CheckedFiles) {
+		my $fnode = $m_CheckedFiles{$f};
+		if (exists ${$fnode}{"EXTANC"}) {
+			my $extanc_hash = ${$fnode}{"EXTANC"};
+			for my $fx (keys %$extanc_hash) {
+				next unless (exists $m_CheckedFiles{$fx});
+				my $ea_hash = ${$extanc_hash}{$fx};
+				my $ea = ${$ea_hash}{ANC};
+				my $fxnode = $m_CheckedFiles{$fx};
+				my $fx_anc_hash = ${$fxnode}{"ANC"};
+				if (!exists ${$fx_anc_hash}{$ea}) {
+					my $line = ${$ea_hash}{LINE};
+					my $lnum = ${$ea_hash}{LNUM};
+					tell_bad_link("Hash not found", $f, $lnum, "$fx#$ea", $line);
+				}
+			}
+		}
+	}
+} # check_external_anchors
+
+
+
+#############################
+### Reporting
+#############################
+sub report_errors() {
+	for my $f (sort keys %m_CheckedFiles) {
+		my $fnode = $m_CheckedFiles{$f};
+		if (exists ${$fnode}{"ERR"}) {
+			if (!defined $m_errors_found) {
+				$m_errors_found = 1;
+				print "\n\n*********** Error details: **********\n";
+                                sleep 0.5;
+			}
+			#print "\n$f";
+			my $err_hash = ${$fnode}{"ERR"};
+			for my $e (sort keys %$err_hash) {
+                            print "\n$f";
+                            print " at line $e:\n    " . ${$err_hash}{$e} . "\n";
+                            sleep 0.5;
+			}
+		}
+	}
+	if ($m_errors_found) {
+		die "\n*** There where errors ***\n";
+	} else {
+		print "Everything that was checked is ok\n";
+	}
+} # report_errors
+
+#############################
+### Help
+#############################
+sub usage() {
+    die "Usage: $0 --site=SITE-DIR --start=START-FILE\n";
+}
+
+#############################
+### Parameters
+#############################
+#my $m_start_file; # File to start checking in
+sub get_params() {
+    usage() unless $#ARGV > -1;
+    for (my $i = 0; $i <= $#ARGV; $i++) {
+        my ($k, $v) = ($ARGV[$i] =~ m!-?-?(.*?)=(.*)!);
+        if ($k eq "site") {
+            $m_site_dir = $v;
+        } elsif( $k eq "start") {
+            #$m_FilesToCheck{$v} = 1;
+            add_file_to_check($v);
+        } else {
+            print STDERR "Unknown parameter: $ARGV[$i]\n";
+            usage();
+        }
+    }
+    foreach my $key (keys %m_FilesToCheck) {
+        die "Can't find $key\n" unless -e $key;
+    }
+    if (! $m_site_dir) {
+        print STDERR "No site directory given\n";
+        usage();
+    }
+    die "Can't find $m_site_dir\n" unless -d $m_site_dir;
+    if ((scalar keys %m_FilesToCheck) == 0) {
+        my $add_files =
+            sub {
+                return unless m/.html?$/i;
+                return if -d $_;
+                #$m_FilesToCheck{$File::Find::name} = 1;
+                add_file_to_check($File::Find::name);
+            };
+        File::Find::find($add_files, $m_site_dir);
+    }
+}
+
+sub check_canonpath() {
+    my $testpath = "/test/../some.txt";
+    if ($testpath eq File::Spec->canonpath($testpath)) {
+        my $errmsg = <<_BADCANON_
+
+** Fatal Error:
+
+   File::Spec->canonpath does not clean up path.
+
+   If you are doing this from Emacs with html-chklnk-check-site-links
+   it may be because you are using Cygwin as your shell.  You can cure
+   this in the following ways:
+
+   1) Use w32shell.el - this will temporary switch to "cmd" as shell.
+   2) Use the default shell on w32.
+
+_BADCANON_
+;
+
+        die $errmsg;
+    }
+}
+
+#############################
+### Main
+#############################
+
+check_canonpath();
+
+$| = 1; # flush or blush!
+
+print "\n";
+get_params();
+
+check_next_file();
+check_external_anchors();
+report_errors();