#!/usr/bin/perl -w # sitegen.pl populates a website's pages with drop-down menus. # this script relies on these two custom Meta tags: # # # the "group" tag should be in each page # the "groupTitle" tag only needs to be in # one of each group's pages. # This scheme also relies on pages consistently having page titles, # and unique page titles within each group's pages. # set group to "NOSHOW" for pages not to appear in the menus. # also, each named group must have at least one associated page # so it can be displayed in the menus # if an argument of "-z" is provided to this script, # it will remove all its content from the # set of Web pages # WARNING: this script replaces pages en masse. # ALWAYS maintain a full backup of your page set. # Copyright (c) 2005, P. Lutus, released under the GPL. # BEGIN user settings $source = ($ARGV[0])?$ARGV[0]:"/path/to/Web/pages"; $plainTextIndexPath = "/path/siteIndex.txt"; $rightArrowPath = "$source/images/rightarrow.png"; $leftArrowPath = "$source/images/leftarrow.png"; # END user settings $startBlockTag = ""; $endBlockTag = ""; $idTag = ""; $scriptTag = "\n"; use File::Find; sub processDir { my $path = $File::Find::name; # if it's a file if ( -f $path ) { # if it has an apparent HTML/PHP page suffix # and doesn't have a tilde at the start of its name if ( $path =~ /\.(htm[l|]|php)$/ && !( $path =~ /\/\~.*/ ) ) { push( @fileList, $path ); } } } sub readFile { local $/; open( FILE, $_[0] ); my $data = ; close FILE; return $data; } sub writeFile { open( FILE, ">$_[0]" ); print FILE $_[1]; close FILE; } sub readMetaTag { my ( $dataRef, $tag ) = @_; my ($value) = ${$dataRef} =~ /meta\s*name\s*=\s*\"$tag\"\s*content\s*=\s*\"(.*?)\"/is; return $value || ""; } sub readPageTitle { my $dataRef = $_[0]; my ($title) = ${$dataRef} =~ /\s*(.*?)\s*<\/title>/is; if ( !$title ) { $path = $_[1]; if ( !$path =~ /\.php$/i ) { print "Error: HTML page without a title: \"$path\"\n"; } $title = "No Title"; } return $title; } # relative path for HTML links sub makeRelPath { my ( $from, $to ) = @_; my @fromlist = split( "/", $from ); my @tolist = split( "/", $to ); # are the paths identical? if ( $from eq $to ) { $result = pop @tolist; } else { # drop path elements until unequal while ( $fromlist[0] eq $tolist[0] ) { shift @fromlist; shift @tolist; } # create relative path from -> to $result = "../" x $#fromlist; $result .= join( "/", @tolist ); } return $result; } # use ' members' and ' grouptitle' # (note leading spaces) # to assure that these items appear # before the subgroup list sub create_siteindex { my @fileList = @{ $_[0] }; for my $file (@fileList) { my $data = readFile($file); my $groupName = readMetaTag( \$data, "group" ); my $groupTitle = readMetaTag( \$data, "groupTitle" ); my $pageTitle = readPageTitle( \$data, $file ); if ( length($groupName) > 0 ) { $hasgroup++; if ( $groupName eq "NOSHOW" ) { # exclude these pages $noshow++; } else { my $tree = $_[1]; my @items = split( "/", $groupName ); for my $item (@items) { $tree->{$item} = {} unless defined $tree->{$item}; $tree = $tree->{$item}; } $tree->{' members'}{$pageTitle} = $file; if ( length($groupTitle) > 0 ) { $tree->{' grouptitle'} = $groupTitle; } } } $total++; } } # just for debugging sub debug_dump_hash { my ( $treeRef, $path ) = @_; for my $key ( sort keys %{$treeRef} ) { my $value = $treeRef->{$key}; if ( ref($value) eq "HASH" ) { debug_dump_hash( \%{$value}, "$path|$key" ); } else { print "$path|$key = $value\n"; } } } sub makeSiteIndexBlock { my ( $parentRef, $keysetRef, $treeRef, $i, $sourcePath ) = @_; my $result = $startBlockTag . "\n" . $idTag . "\n<table><tr>"; for ( $j = 1 ; $j <= $i ; $j++ ) { my $destPath = $pagePaths[$j]; my $destName = $groupTitles[$j]; $result .= "<td><a href=\"" . makeRelPath( $sourcePath, $destPath ) . "\">" . $destName . "</a> | </td>"; } $result .= "<td>\n" . $scriptTag; $result .= "<select onChange=\"toNewPage(this)\" title=\"Open this list to choose a page\">\n"; # make a list of members of this grup # and the first members of any sibling groups my %optionHash = (); # look for members of this group # $key = page title, $value = page path for my $key ( @{$keysetRef} ) { my $value = $treeRef->{$key}; $optionHash{$key} = $value; } # now look for sibling groups and # locate their first member pages for my $key ( %{$parentRef} ) { my $value = $parentRef->{$key}; if ( ref($value) eq "HASH" ) { if ( $key ne ' members' && $key ne ' grouptitle' ) { $groupTitle = $value->{' grouptitle'}; my $memlist = $value->{' members'}; my @keyset = ( sort keys %{$memlist} ); my $firstkey = $keyset[0]; if ( !$firstkey || !defined $memlist->{$firstkey} ) { print "Fatal Error: no index page for group $key.\n"; print "Be sure there is at least one page\n"; print "for each named group. Quitting now.\n"; exit 1; } my $firstvalue = $memlist->{$firstkey}; $optionHash{$groupTitle} = $firstvalue; } } } # now compile the option drop-down list $selIndex = 0; $k = 0; for my $key ( sort keys %{optionHash} ) { my $value = $optionHash{$key}; my $relPath = makeRelPath( $sourcePath, $value ); $sel = ""; if ( $value eq $sourcePath ) { $sel = " selected"; $selIndex = $k; } $result .= "<option value=\"" . $relPath . "\"" . $sel . ">" . $key . "\n"; $k++; } $result .= "</select>\n</td>"; # now rescan the option array # to create the arrowed items $k = 0; for my $key ( sort keys %{optionHash} ) { my $value = $optionHash{$key}; my $relPath = makeRelPath( $sourcePath, $value ); if ( $k == $selIndex - 1 ) { # left arrow $result .= "<td><a href=\"" . $relPath . "\" title=\"Click for prior page\">"; $result .= "<img src=\"" . makeRelPath( $sourcePath, $leftArrowPath ) . "\" border=\"0\"></a></td>"; } elsif ( $k == $selIndex + 1 ) { # right arrow $result .= "<td><a href=\"" . $relPath . "\" title=\"Click for next page\">"; $result .= "<img src=\"" . makeRelPath( $sourcePath, $rightArrowPath ) . "\" border=\"0\"></a></td>"; } $k++; } $result .= "</tr></table>\n" . $endBlockTag; } # process all the member pages of a named group sub process_group { my ( $parentRef, $keysetRef, $treeRef, $i ) = @_; my $groupPath = ""; my $pagePath = ""; for ( $j = 1 ; $j <= $i ; $j++ ) { $groupPath .= "$groupTitles[$j]|"; $pagePath .= "$pagePaths[$j]|"; } # $key = page title, $value = page path for my $key ( @{$keysetRef} ) { my $value = $treeRef->{$key}; print INDEXFILE "$groupPath$key\t$value\n"; my $pageData = readFile($value); $changed = 0; if ($zap) { # if only remove index content if ( $pageData =~ /$startBlockTag/ ) { $pageData =~ s/($startBlockTag.*?$endBlockTag\s*)//isg; $changed = 1; } } else { # the site index block is unique to each page # because it uses relative page addressing # so it must be regenerated for each page my $topIndexBlock = makeSiteIndexBlock( $parentRef, $keysetRef, $treeRef, $i, $value ); my $bottomIndexBlock = $topIndexBlock; # the bottom index doesn't need the Javascript code $bottomIndexBlock =~ s/<script.*?<\/script>\n//sg; # get the original blocks from the page data my ( $originalTop, $originalBottom ) = $pageData =~ /($startBlockTag.*?$endBlockTag)/isg; # test: have there been any changes? # if not, there's no need to rewrite this page $changed = ( !$originalTop || !$originalBottom || $topIndexBlock ne $originalTop || $bottomIndexBlock ne $originalBottom ) ? 1 : 0; if ($changed) { # zap existing blocks if ( $pageData =~ /$startBlockTag/ ) { $pageData =~ s/($startBlockTag.*?$endBlockTag\s*)//isg; } # put in new blocks $pageData =~ s/(<body.*?>)(\s*)/$1\n$topIndexBlock\n/is; $pageData =~ s/(.*?)(\s*)(<\/body.*?>)/$1\n$bottomIndexBlock\n$3/is; } } if ($changed) { writeFile( $value, $pageData ); $changedPages++; } } } sub process_tree { my ( $treeRef, $i ) = @_; for my $key ( sort keys %{$treeRef} ) { my $value = $treeRef->{$key}; if ( ref($value) eq "HASH" ) { if ( $key eq " members" ) { # it's a hash of member pages @keyset = ( sort keys %{$value} ); my $firstkey = $keyset[0]; my $firstvalue = $value->{$firstkey}; $pageTitles[$i] = $firstkey; $pagePaths[$i] = $firstvalue; process_group( $treeRef, \@keyset, $value, $i ); } else { # it's a subgroup hash $groupNames[$i] = $key; process_tree( \%{$value}, $i + 1 ); } } else { # it contains the group's title if ( $key eq " grouptitle" ) { $groupTitles[$i] = $value; } } } } # the zap option removes all this script's # content from the page set $zap = ( $ARGV[0] && $ARGV[0] eq "-z" ) ? 1 : 0; @fileList = (); find( { wanted => \&processDir, no_chdir => 1 }, $source ); %tree = (); $hasgroup = 0; $total = 0; $noshow = 0; $changedPages = 0; create_siteindex( \@fileList, \%tree ); @groupTitles = (); @groupNames = (); @pageTitles = (); @pagePaths = (); # debug_dump_hash(\%tree,""); open( INDEXFILE, ">$plainTextIndexPath" ); process_tree( \%tree, 0 ); close INDEXFILE; print "total pages = $total, changed = $changedPages, noshow = $noshow\n"; # use this for "if ./sitegen.pl; then do something; fi # when pages are changed exit( $changedPages == 0 );