#!/usr/bin/perl
# syn analyzes the syntax of HTML pages. It outputs a list of orphan tags
# or an indented list of a single tag type for debugging.
# It accepts file wildcards and outputs a list of all file errors.
# Written by P. Lutus Ashland, Oregon lutusp@arachnoid.com 6/3/96
@orphan = ( # these tags can be unpaired
"!--",
"p",
"br",
"img",
"input",
"bgsound",
"embed",
"hr",
"base",
"li",
"!doctype",
"option",
"isindex",
"dt",
"dd",
"link",
"meta",
"nextid",
);
if($ARGV[0]) {
$continuous = 0;
$argstr = 0;
$argval = "";
$verbose = 0;
$indentlist = 0;
$indent = 0;
$pline = 0;
foreach $fn (@ARGV) {
if($argstr != 0) {
$argval = $fn;
$argstr = 0;
}
elsif($fn eq "-v") {
$verbose++;
}
elsif($fn eq "-i") {
$indentlist++;
$argstr++;
}
elsif($fn eq "-c") {
$continuous++;
}
else {
if(-T $fn) { # if it is a test file
$err = 0;
undef %ar;
undef $/; # grab the entire file at once
open(DATA,$fn);
$file = ;
close DATA;
$/ = "\n";
$lines = 0;
$lines = ($file =~ s/\n/ /g);
undef %pcs;
$n = (@pcs = split(/\,$file));
if($n > 0) {
foreach $q (@pcs) {
if($q ne "") {
$r = $q;
# find the token up to but not including ' ','=','>'
$q =~ s/[\s=>].*//;
# make it lower case
$q =~ tr/[A-Z]/[a-z]/;
# strip and count the '/' delimiter
$decr = ($q =~ s/^\///);
$m = 0;
$tagflg = (($q eq $argval) || ($argval eq "all"));
foreach $z (@orphan) { # is this a tag that can be single?
if($q eq $z) {
$m++;
}
}
if($m == 0) { # not OK to be single
if($ar{$q}) { # OK exists - do nothing
}
else { # create it
$ar{$q} = 0;
}
$ar{$q} += ($decr)?-1:1;
if($tagflg) {
$indent += ($decr)?-1:1;
}
}
}
if(($indentlist) && ($tagflg)) {
$tab = " " x $indent;
print "$tab$r\n";
&check_line;
}
}
}
}
foreach $k (keys(%ar)) { # now print the errors
$q = $ar{$k};
if($q != 0) {
if($err == 0) {
print "$fn:\n";
&check_line;
}
$v = ($q < 0)?"$k>":"<$k>";
$q = abs($q);
$err += $q;
print "$v $q\n";
&check_line;
}
}
if(($verbose) || ($err > 0)) {
print "$fn: $lines lines, $err errors.\n";
&check_line;
}
}
}
}
else {
print "usage: -c continuous listing (no page stops) -v (verbose)\n";
print " -i tagname/\"all\" (indented list of a chosen tag or all)\n";
print " names of HTML files inc. wildcards.\n";
}
sub check_line {
if($continuous == 0) {
$pline++;
if($pline > 20) {
$pline = 0;
print "(Press Enter for more):";
;
}
}
}