#!/usr/bin/perl # # secnum This script adds section numbering to headings HTML files. # Section numbers and depth are increased based on the heading # tags and heading depth. # # Revision History # 1.0 Initial revision. 150113 # 1.1 Added the -daemon option. 150117 # 1.2 Added the -depth, -start, and -toc options. 150125 # 1.3 Run the process as a daemon if -daemon is given. 150325 # 1.4 Added licensing info. 180531 # # Future Plans: # New options: # -permissive allow

foo

# -noimplicit I don't remember what this should do # # Written by Wayne Morrison, 150125. # # Copyright 2015 Wayne Morrison # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use strict; use Getopt::Long qw(:config no_ignore_case_always); use POSIX qw(setsid); # # Version information. # my $NAME = "secnum"; my $VERS = "$NAME version: 1.4"; ############################################################################ # # Options fields. # my %opts = (); # Options. # # Command line arguments. # my @opts = ( 'daemon:i', # Daemon mode. 'depth=i', # Give a table of contents. 'overwrite', # Overwrite the output file. 'section=i', # Heading/section map. 'start=i', # Starting section number. 'toc', # Give a table of contents. 'verbose', # Give verbose output. 'help', # Give a help message. 'Version', # Display the program version. ); my $verbose = 0; # Verbose flag. my $overwrite = 0; # Overwrite flag. my $daemon = 0; # Daemon-mode flag. my $depth; # Maximum depth of sections. my $section; # Heading number start. my $start; # Section numbering start. my $toc = 0; # Table-of-contents flag. my $infile = ''; # Input HTML file. my $outfile = ''; # Output HTML file. ############################################################################ my $SUFFIX = '-numbered'; # Suffix for output HTML files. my $TOCSUFFIX = '.toc'; # Suffix for output TOC files. # # Indices into @tags. # my $SECTION = 0; # Section -- 1 my $SUB1SECT = 1; # Subsection -- 1.3 my $SUB2SECT = 2; # Subsubsection -- 1.3.2 my $SUB3SECT = 3; # Sub 3 section -- 1.3.2.3 my $SUB4SECT = 4; # Sub 4 section -- 1.3.2.1.5 my $SUB5SECT = 5; # Sub 5 section -- 1.3.2.1.5.1 my @tags = (); # Tags for numbering. my $DEFAULT_SECTION = 2; # Default section tag. (

) my $NAPTIME = 10; # Time to sleep in daemon mode. my $MINSLEEP = 10; # Minimum sleep time. main(); exit(0); #----------------------------------------------------------------------------- # Routine: main() # sub main { $| = 1; # # Munch on the options and arguments. # optsandargs(); # # Maybe create a daemon process to run forever. # daemonize() if($daemon); # # Map the tags to their proper sections. # maptags(); # # Add section numbers to heading-tag text. # while(42) { my @stats; # Input-file statistics. # # Get the last modification time of the input file. # @stats = stat($infile); # # Add section numbers to the input file. # addnumbering(); # # Stop running if we aren't in daemon mode. # last if(! $daemon); # # Go to sleep for a bit. # sleepytime($stats[9]); } } #---------------------------------------------------------------------- # Routine: optsandargs() # # Purpose: Parse the command line for options and arguments. # sub optsandargs { # # Parse the options. # GetOptions(\%opts,@opts) || usage(); # # Check for some immediate-action options. # usage() if(defined($opts{'help'})); version() if(defined($opts{'Version'})); $verbose = $opts{'verbose'}; $overwrite = $opts{'overwrite'}; $start = $opts{'start'}; $toc = $opts{'toc'}; # # Set the daemon's sleep time. If it wasn't specified, then we'll # leave it as zero (daemon mode off.) If the option was specified # without a sleep time, we'll use the default. If it was specified # with a sleep time, we'll use it. # $daemon = 0; if(defined($opts{'daemon'})) { if($opts{'daemon'} == 0) { $daemon = $NAPTIME; } else { $daemon = $opts{'daemon'}; } if($daemon < $MINSLEEP) { print STDERR "daemon sleep time cannot be less than $MINSLEEP\n"; exit(1); } } # # Set the depth of the headers given. If it wasn't specified, then # we'll go up to the maximum (depending on the section value.) # $depth = (defined($opts{'depth'})) ? $opts{'depth'} : 6; if(($depth < 1) || ($depth > 6)) { print STDERR "maximum depth must be between 1 and 6\n"; exit(1); } # # Set the starting section number. # $start = (defined($opts{'start'})) ? $opts{'start'} : 1; if($start < 1) { print STDERR "starting section number must be greater than 0\n"; exit(1); } # # Due to numbering logic in addnumbering(), we must subtract 1 # from the starting number. # $start--; # # Get the heading number to be used for sections. # $section = defined($opts{'section'}) ? $opts{'section'} : $DEFAULT_SECTION; if($section < 1) { print STDERR "heading tag number must be a positive number\n"; exit(1); } # # Get the names of the input and output files. # if(@ARGV == 1) { $infile = $ARGV[0]; $outfile = $infile; # # Add the suffix for an HTML or TOC file. # if($toc) { $outfile .= $TOCSUFFIX; } else { # # Add the suffix to the appropriate place in the # output file. # if($outfile =~ /\.(html{0,1})$/i) { my $ext = $1; $outfile =~ s/.$ext$/$SUFFIX.$ext/; } else { $outfile .= $SUFFIX; } } } elsif(@ARGV == 2) { $infile = $ARGV[0]; $outfile = $ARGV[1]; } else { usage(); } # # Only allow the input and output files to be the same if the # overwrite flag was also given. # if($infile eq $outfile) { if(! $overwrite) { print STDERR "input and output files are the same\n"; print STDERR "cannot overwrite the input file without the -overwrite flag\n"; exit(1); } print "overwriting the input file\n" if($verbose); } # # Ensure -daemon mode isn't being used with overwriting. # if($daemon) { if($infile eq $outfile) { print STDERR "daemon mode may only be used when the input and output filenames differ\n"; exit(1); } } # # Check for option incompatibilities # if($daemon && $overwrite) { print STDERR "-daemon may not be used with -overwrite\n"; exit(1); } if($toc && $overwrite) { print STDERR "-toc may not be used with -overwrite\n"; exit(1); } } #---------------------------------------------------------------------- # Routine: daemonize() # # Purpose: Create a daemon process that'll keep running. # sub daemonize { my $cpid; # Child process id. # # Create the child process that we will become. # $cpid = fork(); # # The parent process will exit. # if($cpid > 0) { exit(0); } elsif($cpid < 0) { # # There are much greater problems if this ever happens. # print "unable to create child process\n"; exit(1); } # # Create the new session for this process. # POSIX::setsid(); } #---------------------------------------------------------------------- # Routine: maptags() # # Purpose: Map the heading tags to the appropriate sections. # sub maptags { $tags[$SECTION] = $section; $tags[$SUB1SECT] = $section + 1; $tags[$SUB2SECT] = $section + 2; $tags[$SUB3SECT] = $section + 3; $tags[$SUB4SECT] = $section + 4; $tags[$SUB5SECT] = $section + 5; } #---------------------------------------------------------------------- # Routine: addnumbering() # # Purpose: The contents of an HTML file are scanned and section numbers # are added to the heading tags. # Lines without headings are only added to the output array. # Lines with headings have the appropriate section numbers # added at the start of the heading text. # After going through the whole file, the modified contents # are written to the output file. # sub addnumbering { my @lines = (); # Lines from input HTML file. my @outlines = (); # Output lines with numbered HTML headings. my @toclines = (); # Lines for table of contents. my @errors = (); # Error lines. my $sect = $start; # Section -- 1 my $sub1 = 0; # Subsection -- 1.3 my $sub2 = 0; # Subsubsection -- 1.3.2 my $sub3 = 0; # Sub3-section -- 1.3.2.3 my $sub4 = 0; # Sub4-section -- 1.3.2.1.5 my $sub5 = 0; # Sub5-section -- 1.3.2.1.5.1 print "renumbering $infile\n" if($verbose); # # Get the contents of the input file. # open(HTML, "< $infile"); @lines = ; close(HTML); # # Go through the file contents and add section numbers to the heading # tags. Lines without headings are only added to the output array. # Lines with headings have the appropriate section numbers added at # the start of the heading text. # for(my $ind=0; $ind < @lines; $ind++) { my $ln = $lines[$ind]; # Line from file. my $hopen; # Heading opener tag. my $hclose; # Heading closer tag. my $str; # Heading text. my $sectstr = ''; # Heading with numbering. # # No heading, so we'll save the line and go to the next. # if($ln !~ //i) { push @outlines, $ln; next; } # # Get the heading tags and tag text from the line. # $ln =~ /(.)<\/h([123456])>/i; $hopen = $1; $str = $2; $hclose = $3; # # If the tag fields don't match, we'll save the problem. # if($hopen != $hclose) { push @errors, "heading-tag mismatch on line $ind: $ln"; push @outlines, $ln; next; } # # Get rid of a newline from the heading text. # This should never do anything. chomp($str); # # Handle the headings as appropriate for the section depth. # If this heading number is not in the range of the numbered # sections, we'll save the line and go to the next. # if(($hopen < $tags[$SECTION]) || ($hopen > $tags[$SUB5SECT])) { push @outlines, $ln; next; } elsif($hopen == $tags[$SECTION]) { $sect++; $sub1 = 0; $sub2 = 0; $sub3 = 0; $sub4 = 0; $sub5 = 0; $sectstr = "$sect"; } elsif(($hopen == $tags[$SUB1SECT]) && ($depth >= 2)) { $sect++ if($sect == 0); $sub1++; $sub2 = 0; $sub3 = 0; $sub4 = 0; $sub5 = 0; $sectstr = "$sect.$sub1"; } elsif(($hopen == $tags[$SUB2SECT]) && ($depth >= 3)) { $sect++ if($sect == 0); $sub1++ if($sub1 == 0); $sub2++; $sub3 = 0; $sub4 = 0; $sub5 = 0; $sectstr = "$sect.$sub1.$sub2"; } elsif(($hopen == $tags[$SUB3SECT]) && ($depth >= 4)) { $sect++ if($sect == 0); $sub1++ if($sub1 == 0); $sub2++ if($sub2 == 0); $sub3++; $sub4 = 0; $sub5 = 0; $sectstr = "$sect.$sub1.$sub2.$sub3"; } elsif(($hopen == $tags[$SUB4SECT]) && ($depth >= 5)) { $sect++ if($sect == 0); $sub1++ if($sub1 == 0); $sub2++ if($sub2 == 0); $sub3++ if($sub3 == 0); $sub4++; $sub5 = 0; $sectstr = "$sect.$sub1.$sub2.$sub3.$sub4"; } elsif(($hopen == $tags[$SUB5SECT]) && ($depth == 6)) { $sect++ if($sect == 0); $sub1++ if($sub1 == 0); $sub2++ if($sub2 == 0); $sub3++ if($sub3 == 0); $sub4++ if($sub4 == 0); $sub5++; $sectstr = "$sect.$sub1.$sub2.$sub3.$sub4.$sub5"; } # # Add the section-number string back to the line, and put # it immediately after the heading-opener tag. # The modified line is then added to the end of the output # array. # $ln =~ s//$&$sectstr /; push @outlines, $ln; # # Add the section-number string to the list of TOC lines. # We'll only do this if the toc flag was given and if this # tag's header depth is within the range desired by the user. # # if($toc && ($hopen >= $tags[0]) && ($hopen <= $tags[($toc-1)])) if($toc && ($sectstr ne '')) { push @toclines, $ln; } } # # If we hit any errors, we'll print them now and return. # We will not save the numbered output file. # if(@errors > 0) { for(my $ind=0; $ind < @errors; $ind++) { print STDERR "$errors[$ind]\n"; } return; } # # Create and write the output file. # if($toc) { # # Maybe print a table of contents. # tocker(@toclines); } else { if(open(OUT, "> $outfile") == 0) { print STDERR "unable to write \"$outfile\": $!\n"; return; } print OUT @outlines; close(OUT); } } #---------------------------------------------------------------------- # Routine: sleepytime() # # Purpose: Run a rudimentary daemon loop. We'll sleep for a bit, # then we'll see if the input file has been modified recently. # If so, we'll return let the magic happen once more. # sub sleepytime { my $lasttime = shift; # Last modification of input file. while(42) { my $mtime; # Modification time of input file. my @stats; # Input-file statistics. sleep($NAPTIME); @stats = stat($infile); $mtime = $stats[9]; last if($mtime > $lasttime); } } #---------------------------------------------------------------------- # Routine: tocker() # # Purpose: Print a table of contents for the renumbered lines. This # shows the text for the file's heading tags. addnumbering() # only saved the heading depth given in the -toc option (or # the default), so we don't have to handle that here. # sub tocker { my @toclines = @_; # Lines for table of contents. # # We won't keep on here if we're not to do a table of contents. # return if($toc == 0); if(open(TOC, "> $outfile") == 0) { print STDERR "unable to write \"$outfile\": $!\n"; return; } # # Go through the TOC lines and display the heading text. # for(my $ind = 0; $ind < @toclines; $ind++) { my $ln = $toclines[$ind]; # TOC line. my $tagtxt; # Heading text. # # Go to the next line if there's no heading. # next if($ln !~ //i); # # Get the heading tag text from the line. # $ln =~ /(.)<\/h([123456])>/i; $tagtxt = $2; print TOC "$tagtxt\n"; } close(TOC); } #---------------------------------------------------------------------- # Routine: version() # # Purpose: Print the version number(s) and exit. # sub version { print STDERR "$VERS\n"; exit(0); } #---------------------------------------------------------------------- # Routine: usage() # # Purpose: Give usage message and exit. # sub usage { print STDERR "usage: secnum [options] [outfile]\n"; print STDERR "\n"; print STDERR "\twhere [options] are:\n"; print STDERR "\t\t-daemon [sleep-time]\n"; print STDERR "\t\t-depth max-depth\n"; print STDERR "\t\t-overwrite\n"; print STDERR "\t\t-section heading-number\n"; print STDERR "\t\t-start section-number\n"; print STDERR "\t\t-toc\n"; print STDERR "\n"; print STDERR "\t\t-verbose\n"; print STDERR "\t\t-Version\n"; print STDERR "\t\t-help\n"; exit(0); } 1; ############################################################################## =pod =head1 NAME B - Section numbering to headings in HTML files =head1 SYNOPSIS secnum [options] [outfile] =head1 DESCRIPTION B adds section numbering to HTML files. Section numbers are added to existing heading tags. The numbers are calculated automatically based on the type of heading tag and the number of each type of heading already seen. No additional options or tags are required for B to work. Section numbers will be added to the headings of a specified HTML file, and the whole file (with numbered sections) written to a new file. If given the I<-toc> option, B will create a table of contents, rather than a numbered HTML file. B is a preprocessor. It must be run on the HTML file prior to making the HTML available for display. By default, numbering starts with the EH2E tag and goes through EH6E No numbering is added for the EH6E tag. The I<-section> option allows section numbering to start with heading tag EH1E, EH4E, or whatever heading tag the user wants. If any heading levels are skipped, then B assumes that the skipped heading levels should be implied. For example, take the case of the next heading after an EH2E heading being an EH4E heading. The missing EH3E heading may be handled explicitly or implicitly. The explicit handling would skip the numbering, giving a section number like "1.0.1". Implicit handling for the missing heading level would result in a section number like "1.1.1". B provides implicit handling for missing heading levels. If the missing heading is later inserted, then all will be well, since B functions as a pre-processor and will account for the new header. Given this set of heading tags:
A Document

A Section

A Subsection

A Subsubsection

A Another Subsubsection

Another Subsection

And Still Another Subsubsection

An Ill-advised, Skipped Sub6section

Another Section
B will rewrite the heading text to be:
A Document

1 A Section

1.1 A Subsection

1.1.1 A Subsubsection

1.1.2 Another Subsubsection

1.2 Another Subsection

1.2.1 And Still Another Subsubsection

1.2.1.1.1 An Ill-advised, Skipped Subsection

2 Another Section
The input file (I) must be specified. The output file (I) is optional. If the I is not given, then the output filename is constructed from the input filename. If I ends in ".html", ".HTML", ".htm", or ".HTM", then the B suffix is inserted before that dotted file extension. Otherwise, the B suffix is appended to the filename. The B suffix is I<-numbered>. The command "I" will result in the B as the output file. The input HTML file may be modified in place if the I<-overwrite> option is given and an output file is not specified. The command "I" will result in the section-numbered HTML being written to B. This is irreversible and should be used with caution. Once a file is overwritten, the section numbers are embedded in the original file. B can be run in a simple daemon mode. This is useful while writing new HTML content. B will run continuously and periodically check if the specified file has been updated. If so, B will perform its regular header numbering and write the modified HTML to the specified output file. This may B be used when the input file and output files have different names. =head2 Why A Preprocessor? HTML does not provide a nice, easy method for providing automatically renumbered section headings. A variety of methods may be used to automatically add section numbers, each method with its own advantages and disadvantages. - CSS can automatically add section numbers, but the section numbers aren't selectable in the browser, and skipped heading levels aren't handled nicely. - Javascript and PHP can be added to handle automatic section numbers, but it is likely to add complexity to each numbered heading in each file. (The author is not a Javascript or PHP developer, so this assessment should be taken with a grain of salt.) - B handles automatic section numbering, the resulting section numbers are selectable in the browser, and it does "The Right Thing" for skipped heading levels. However, it is a pre-processor and this isn't useful for live HTML files. (The author uses B during the writing of HTML-based documentation. After the documentation is finished, the unnumbered HTML is saved for later modification and the B-modified HTML is the version available for general use.) Each method is useful in different situations. Use the method that best suits your own needs. =head1 WARNINGS Please be aware of the following when using B: =over 4 =item Care must be taken when using B as it is a simple parser. It only looks for the presence heading tags and it ignores all other parts of the file contents. B does not ensure the heading tags are not in quotes or comments. =item B assumes the opening and closing heading tags are on the same line. =item B assumes only one heading-tag group will be found on a line. =item If errors are encountered, the output file will not be written. Currently, the only parsing error that B recognizes is when it finds mismatched heading tags (e.g., EH2E closed by E/H4E.) =back In time, B may be modified so the warnings will no longer be of concern. =head1 OPTIONS B takes the following options: =over 4 =item I<-daemon [sleeptime]> Run B in a simple daemon mode. This means that B will run continuously in a background process and watch the specified file to see when it has been updated. At that point, it will perform its regular header numbering. This is useful when an HTML file is in active development. This may B be used when the input file and output files have different names. The I value is optional. If it isn't specified, then the default sleep time (10 seconds) will be used. If it is specified, then that will be used for the sleep time. I<-daemon> may not be used with the I<-overwrite> option. =item I<-depth max-depth> Specify the maximum depth of the heading section numbers. This is B the maximum heading tag number, but the maximum numbering depth. If I<-depth 3> is specified, then a section number with three places may be given; a section number of four, five, or six places will not be given. For example, I<-depth 3> may give 1.3.2, but it will not give 1.3.2.1. The I may be from 1 to 6; the default is 6. =item I<-overwrite> Overwrite the input file with the section-numbered file contents. This is irreversible and should be used with caution. I<-overwrite> may not be used with the I<-daemon> or I<-nohtml> options. =item I<-section heading-number> Heading tag number at which to start section numbering. The default is to start with EH2E. This option allows section numbering to start with heading tag EH1E, EH4E, or whatever heading tag the user wants. The I must be numeric. =item I<-start section-number> Set the section number for the first section in the HTML file. This allows multiple HTML files to be used rather than a single, monolithic file. Currently, this is the initial section number only. Starting subsections are not modified by this option. This may change in the future. =item I<-toc> This flag tells B to produce a table of contents of the given HTML file's heading tags with section numbers. A table of contents (TOC) file is created, but the renumbered HTML file is not created. The heading text is given exactly as it is found in the input file (with the inclusion of section numbering.) Any HTML -- links, bold tags, special characters, etc. -- are left as is and are not cleaned up for display. I<-toc> may not be used with the I<-overwrite> option. =item I<-verbose> This option turns on verbose output. =item I<-Version> Display the version information for B. =item I<-help> Display a help message. =back =head1 AUTHOR Wayne Morrison, wayne@waynemorrison.com =head1 LICENSE Copyright 2015 Wayne Morrison Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. =cut