#!/usr/bin/perl # syn analyzes the syntax of HTML pages. It outputs a list of orphan tags # or an indented list of a single tag type for debugging. # It accepts file wildcards and outputs a list of all file errors. # Written by P. Lutus Ashland, Oregon lutusp@arachnoid.com 6/3/96 @orphan = ( # these tags can be unpaired "!--", "p", "br", "img", "input", "bgsound", "embed", "hr", "base", "li", "!doctype", "option", "isindex", "dt", "dd", "link", "meta", "nextid", ); if($ARGV[0]) { $continuous = 0; $argstr = 0; $argval = ""; $verbose = 0; $indentlist = 0; $indent = 0; $pline = 0; foreach $fn (@ARGV) { if($argstr != 0) { $argval = $fn; $argstr = 0; } elsif($fn eq "-v") { $verbose++; } elsif($fn eq "-i") { $indentlist++; $argstr++; } elsif($fn eq "-c") { $continuous++; } else { if(-T $fn) { # if it is a test file $err = 0; undef %ar; undef $/; # grab the entire file at once open(DATA,$fn); $file = ; close DATA; $/ = "\n"; $lines = 0; $lines = ($file =~ s/\n/ /g); undef %pcs; $n = (@pcs = split(/\ 0) { foreach $q (@pcs) { if($q ne "") { $r = $q; # find the token up to but not including ' ','=','>' $q =~ s/[\s=>].*//; # make it lower case $q =~ tr/[A-Z]/[a-z]/; # strip and count the '/' delimiter $decr = ($q =~ s/^\///); $m = 0; $tagflg = (($q eq $argval) || ($argval eq "all")); foreach $z (@orphan) { # is this a tag that can be single? if($q eq $z) { $m++; } } if($m == 0) { # not OK to be single if($ar{$q}) { # OK exists - do nothing } else { # create it $ar{$q} = 0; } $ar{$q} += ($decr)?-1:1; if($tagflg) { $indent += ($decr)?-1:1; } } } if(($indentlist) && ($tagflg)) { $tab = " " x $indent; print "$tab$r\n"; &check_line; } } } } foreach $k (keys(%ar)) { # now print the errors $q = $ar{$k}; if($q != 0) { if($err == 0) { print "$fn:\n"; &check_line; } $v = ($q < 0)?"":"<$k>"; $q = abs($q); $err += $q; print "$v $q\n"; &check_line; } } if(($verbose) || ($err > 0)) { print "$fn: $lines lines, $err errors.\n"; &check_line; } } } } else { print "usage: -c continuous listing (no page stops) -v (verbose)\n"; print " -i tagname/\"all\" (indented list of a chosen tag or all)\n"; print " names of HTML files inc. wildcards.\n"; } sub check_line { if($continuous == 0) { $pline++; if($pline > 20) { $pline = 0; print "(Press Enter for more):"; ; } } }