#!/usr/bin/perl # # spell-squirrel # This script is used in conjunction with the aspell command. # It allows a file-specific word list to be defined that will # be considered to be spelled correctly. # # The command is used in this way: # # spell-squirrel [options] # # Revision History # 1.0 Initial revision. 120414 # 2.0 Added indexing of filenames. 130405 # Added -create option. # Added -defedit option. # Added -edit option. # Added -listdicts option. # Added -update option. # 2.1 Added licensing info. 180531 # # Things to add: # - -verify option: check index against dictionaries not now # - -dictfile option: additional (eg, project) dict; not now # - -groupdict option: files associated with one dict; not now # - -deletedict option not now # # Copyright 2012 Wayne Morrison # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use strict; use Cwd 'abs_path'; use File::Spec; use Getopt::Long qw(:config no_ignore_case_always); use POSIX; # # Version information. # my $NAME = "spell-squirrel"; my $VERS = "$NAME version: 2.1"; ############################################################################ # # Options fields. # my %opts = (); # Options. # # Command line arguments. # # To be implemented: # 'dictfile=s', # my @opts = ( 'create', # Create a new dictionary. 'defedit', # Edit the default dictionary. 'edit', # Edit the file-specific dictionary. 'nodefault', # Don't use default dictionary. 'listdicts', # Show dictionaries that would be used. 'update', # Update dictionary with new entries. 'verbose', # Give verbose output. 'help', # Give a help message 'Version', # Display the program version. ); my $creator = 0; # Create-new-dictionary flag. my $defeditor = 0; # Edit-default-dictionary flag. my $editor = 0; # Edit-dictionary flag. my $lister = 0; # Show-dictionaries flag. my $nodefault = 0; # No-default flag. my $updater = 0; # Update-dictionary flag. my $verbose = 0; # Verbose flag. ############################################################################ my $CAT = "/bin/cat"; my $EGREP = "/usr/bin/egrep"; my $SPELL = "/opt/local/bin/aspell list"; my $DEF_EDITOR = '/usr/bin/vi'; my $spelldir = ".spell-squirrel"; # Directory of word files. my $default = "spell-squirrel-default"; # Default word file. my $fileindex = "spell-squirrel-index"; # Index file. my $fnindex = ''; # spell-squirrel's filename index. my $checkfile = ''; # Name of file to be spelling-checked. my $filedict = ''; # Name of file's dictionary. my $tmpsp = ''; # Name of temporary word file. my $newdict = ''; # Name of new dictionary for file. ############################################################################ # # Do our work. # my $status; $status = main(); exit($status); #---------------------------------------------------------------------- # Routine: main() # sub main { my $spdir; # Directory for spelling files. my $sf; # Spelling file. $| = 1; # # Munch on the options and arguments. # optsandargs(); # # Build the directories and paths we'll need. # $spdir = buildpaths(); # # Build the word file to use. # buildspell($spdir,$checkfile); # # Display the dictionary paths. # showpaths(); # # Run the spelling check. # system("$CAT \"$checkfile\" | $SPELL | $EGREP -v -f $tmpsp | sort -u"); # # Create a new dictionary file, if needed. # createdict(); # # Clean up. # unlink($tmpsp); return(0); } #---------------------------------------------------------------------- # Routine: optsandargs() # # Purpose: Parse the command line for options and arguments. # sub optsandargs { # # Parse the options. # GetOptions(\%opts,@opts) || usage(); # # Check for some immediate-action options. # usage() if(defined($opts{'help'})); version() if(defined($opts{'Version'})); # # Set up flags for various helpful messages. # $creator = $opts{'create'}; $defeditor = $opts{'defedit'}; $editor = $opts{'edit'}; $lister = $opts{'listdicts'}; $updater = $opts{'update'}; $verbose = $opts{'verbose'}; # # Ensure that mutually exclusive options weren't given. # if($creator && $defeditor) { print STDERR "-create and -defedit are mutually exclusive\n"; exit(7); } if($creator && $editor) { print STDERR "-create and -edit are mutually exclusive\n"; exit(7); } if($creator && $updater) { print STDERR "-create and -update are mutually exclusive\n"; exit(7); } if($defeditor && $editor) { print STDERR "-defedit and -edit are mutually exclusive\n"; exit(7); } if($defeditor && $updater) { print STDERR "-defedit and -update are mutually exclusive\n"; exit(7); } if($editor && $updater) { print STDERR "-edit and -update are mutually exclusive\n"; exit(7); } # # Check for a file to check. # return if($defeditor); usage() if(@ARGV == 0); $checkfile = @ARGV[0]; $checkfile =~ s/\/*$//; } #---------------------------------------------------------------------- # Routine: buildpaths() # # Purpose: Build the spelling directory and default-file names. # sub buildpaths { my $dir; # Spelling directory. # # Get our file directory. # $dir = glob("~"); $dir .= "/$spelldir"; # # Get our default dictionary and index files. # $default = "$dir/$default"; $fnindex = "$dir/$fileindex"; editor($default,$defeditor); # # Create the file directory and the basic files if they don't exist. # if(! -e $dir) { print "creating new spelling-squirrel directory\n" if($verbose); mkdir($dir); } if(! -e $default) { print "creating new spelling-squirrel default dictionary\n" if($verbose); system("touch $default"); } if(! -e $fnindex) { print "creating new spelling-squirrel index file\n" if($verbose); system("touch $fnindex"); } # # Get the file's dictionary. # getdictfile($dir); # # Get our temporary file. # $tmpsp = "$dir/tmp-$$"; return($dir); } #---------------------------------------------------------------------- # Routine: getdictfile() # # Purpose: z # # Index line format: # dictfile.nnnn abspath # sub getdictfile { my $spdir = shift; # Spelling directory. my $abs; # Absolute path to file. my @indexlines; # Contents of index file. my $checkfilenode; # Node of $checkfile. my @files; # Matching files. my $newind = -1; # New file index. # # Get the absolute path of the file to check. # $abs = abs_path($checkfile); # # Read our index file. # open(SPIND,"< $fnindex"); @indexlines = ; close(SPIND); foreach my $line (@indexlines) { my $dictfn; # Dictionary from index. my $fnpath; # Pathname from index. # # Get the pieces of the index line. # $line =~ /^([\S\s]+)\/ (.*)$/; $dictfn = $1; $fnpath = $2; # # If the index entry's path matches the path we're looking # for, then we'll use the dictionary file from this entry. # If -create was given, ensure the file doesn't exist yet. # If -edit was given, we'll edit the existing dictionary. # if($abs eq $fnpath) { $filedict = "$spdir/$dictfn"; unlink($filedict) if($creator); editor($filedict,$editor); return; } } # # We won't use a file-specific dictionary if this file doesn't have # one already. # $filedict = '/dev/null'; # # Return if we aren't to create a new one. # if(! $creator) { return; } # # Get the node name of the file we're looking for. # $checkfilenode = $checkfile; $checkfilenode =~ s/^.*\///; # # Look for the dictionary file with the largest index number. # foreach my $fn (sort(glob("$spdir/.* $spdir/*"))) { next if($fn !~ /\/$checkfilenode.[0-9]{6}$/); $fn =~ /\/$checkfilenode.([0-9]{6})$/; $newind = $1 if($newind < $1); } # # Build the new dictionary file name. # $newdict = sprintf("$spdir/$checkfilenode.%06d", ($newind + 1)); # # Ensure we haven't exceeded the range of name collisions. # if($newind == 999) { print STDERR "new dictionary file will exceed name index\n"; exit(3); } } #---------------------------------------------------------------------- # Routine: buildspell() # # Purpose: This routine builds the big temporary dictionary we'll # be using. # sub buildspell { my @files = (); # Word files. # # Don't actually build the temporary dictionary if the user # only wants paths shown. # return if($lister); # # Add the file's dictionary and the default dictionary to our # list of dictionaries. # push @files, "\"$filedict\"" if((-r $filedict) && (-s $filedict)); push @files, "\"$default\"" if(! $nodefault); # # Concatenate all the dictionaries into a single file. # system("$CAT @files > $tmpsp"); } #---------------------------------------------------------------------- # Routine: createdict() # # Purpose: Create a new dictionary for the specified file, # if -create was given. # Update an existing dictionary for the specified file, # if -update was given. # sub createdict { my $abs; # Absolute path of file. my $overwrite = 0; # Overwrite flag for -create and existing dicts. # # Return if -create or -update wasn't given. # return if((! $creator) && (! $updater)); # # If we were given -create but the new dictionary name is null, # that means this file already has a dictionary. We'll arrange # to zap the file, but won't add a new index entry. # if($newdict eq '') { $newdict = $filedict; $overwrite = 1; } # # Create the new dictionary. (or...) Open the existing dictionary. # Re-run the spelling check, but this time the misspelled words # will be either written to a new dictionary or appended to an # existing dictionary. # if($creator) { open(STDOUT, "> $newdict"); system("$CAT \"$checkfile\" | $SPELL | $EGREP -v -f $tmpsp | sort -u"); } else { open(STDOUT, ">> $newdict"); system("$CAT \"$checkfile\" | $SPELL | $EGREP -v -f $tmpsp | sort -u"); } # # Add the new dictionary to our index file. This is not done # when updating an existing dictionary or when we're overwriting # an existing dictionary. # if($creator && (! $overwrite)) { # # Get the node of the new dictionary. # $newdict =~ s/^.*\///; # # Get the absolute path of the file. # $abs = abs_path($checkfile); # # Add the new dictionary to the index. # open(SPIND,">> $fnindex"); print SPIND "$newdict/ $abs\n"; close(SPIND); } } #---------------------------------------------------------------------- # Routine: editor() # # Purpose: This routine runs an editor on the file's dictionary # if -editor was given. # sub editor { my $editfile = shift; # File to edit. my $editflag = shift; # Should-we-edit? flag. my $editcmd = $DEF_EDITOR; # Editor to use. return if(! $editflag); $editcmd = $ENV{'EDITOR'} if(defined($ENV{'EDITOR'})); exec("$editcmd $editfile"); print STDERR "unable to run $editcmd\n"; exit(7); } #---------------------------------------------------------------------- # Routine: showpaths() # # Purpose: Display the dictionary paths. # sub showpaths { return if((! $verbose) && (! $lister)); print "default dictionary: $default\n"; if($filedict eq '/dev/null') { print "file's dictionary: no file-specific dictionary exists\n"; } else { print "file's dictionary: $filedict\n"; if($creator) { print STDERR "-create given and a file-specific dictionary already exists\n"; exit(2); } } if($newdict ne '') { print "new file dictionary: $newdict\n"; } # # If the user only wanted the paths, then we'll exit now. # exit(0) if($lister); print "temporary dictionary: $tmpsp\n\n"; } #---------------------------------------------------------------------- # Routine: version() # # Purpose: Print the version number(s) and exit. # sub version { print STDERR "$VERS\n"; exit(0); } #---------------------------------------------------------------------- # Routine: usage() # # Purpose: Give usage message and exit. # sub usage { my $whence = shift; # Location of call. print STDERR "usage: spell-squirrel [options] \n"; print STDERR "\n"; print STDERR "\t-create\n"; print STDERR "\t-defedit\n"; print STDERR "\t-edit\n"; print STDERR "\t-listdicts\n"; print STDERR "\t-nodefault\n"; print STDERR "\t-update\n"; print STDERR "\n"; print STDERR "\t-verbose\n"; print STDERR "\t-help\n"; print STDERR "\t-Version\n"; exit(0); } 1; ############################################################################## =pod =head1 NAME spell-squirrel - spelling checker with file-specific word lists =head1 SYNOPSIS spell-squirrel [options] =head1 DESCRIPTION B is a wrapper for spelling checkers. It provides for file-specific dictionaries to be used in conjunction with the spelling checkers' usual dictionaries. This is useful for such things as variable names and reserved words in source files or project-specific words and terms in text files. A file's file-specific dictionary will contain a list of words that will be considered to be spelled correctly for that particular file. Words on this list are not reported as misspelled for that file when B is run on it. These dictionaries are created with the B<-create> option and may be edited by using the B<-edit> option. A default dictionary can be created that contains words common to a broad range of a user's files. The default dictionary will be applied to any B check, unless the B<-nodefault> option is given. File-specific dictionaries are kept distinct even when they are for files whose actual names are the same. So, B and B will have different dictionaries. This is done by indexing the dictionary files. More details are available in the IMPLEMENTATION DETAILS section below. That section is not required reading in order to use B, but it will be helpful in understanding how this program works. This version of B uses the B command to perform the actual spelling checks, but that can be adjusted on a site-by-site basis. The only requirement is that the spelling checker be able to act as a filter; i.e., it must be able to check text taken from standard input and write misspelled words to standard output. B is not for use with an interactive program, such as within emacs. =head1 OPTIONS B takes the following options. Only one of the B<-create>, B<-defedit>, B<-edit>, and B<-update> options may be used at a time. =over 4 =item B<-create> Create a new file-specific dictionary for the given file. The new dictionary will be created in the dictionary directory and it will be added to the dictionary index file. If this option is given for a file that already has a dictionary, then the old dictionary will be deleted and replaced with the new. =item B<-defedit> Edit the default dictionary. If the I environment variable is defined, then the program named by it will be used as the editor. If that environment variable is not defined, then B will be used. =item B<-edit> Edit the dictionary for the specified file. If the I environment variable is defined, then the program named by it will be used as the editor. If that environment variable is not defined, then B will be used. =item B<-listdicts> List the dictionaries that would be used for the given file. None of the spelling checks are actually run nor are new dictionaries created. =item B<-nodefault> Do not include the default dictionary in the referenced collection of dictionaries. =item B<-update> Update an existing file-specific dictionary for the given file. The new dictionary entries will be appended to the dictionary. =item B<-verbose> Display the verbose output. =item B<-Version> Display the version information for B. =item B<-help> Display a help message. =back =head1 IMPLEMENTATION DETAILS This section contains implementation details for B. It is not required reading in order to use B, but it may be helpful if you want to understand how B works. =head2 Dictionaries and the Dictionary Index B stores its files in B<~/.spell-squirrel>, a directory dedicated to use by this program. The default dictionary, the file-specific dictionaries, the index file, and temporary files are all stored in this directory. The dictionary index maps absolute paths to dictionary files. B uses this index since files in different directories can have the same node name. Each entry in the index file contains the name of the file-specific dictionary and the file's absolute path. The lines have this format: ./ The I portion is the same as the node name in the absolute path. The I portion is a numeric index, which allows multiple files with the same node name to have their own dictionary files. There will always be six digits in the index, giving a million possible different dictionary files for each nodename. It is expected that this will be sufficient for most purposes. A slash-space pair is used to separate the dictionary names from the absolute paths since node names can contain spaces, but not slashes. You I edit dictionary files manually, but you must use the B<-listdicts> option to ensure you're looking at the correct dictionary file for the file. The B<-edit> option can be used to edit the dictionary file, if the entries need to be adjusted. =head2 Spelling Checker Programs The actual spelling checks are performed by a real spelling-checker program, and not by B. The current implementation uses B. Any spelling checker may be used, but only if it supports the following: - the spelling checker must be able read its standard input for the data to check, - the spelling checker must write misspelled words to its standard output. The actual spelling checking is done using this command: cat | aspell list | egrep -v -f | sort -u I is the file whose spelling will be checked. I is a temporary file that contains the default dictionary and the file-specific dictionary. =head1 FILES ~/.spell-squirrel/ directory that holds file-specific dictionaries ~/.spell-squirrel/spell-squirrel-default default dictionary =head1 AUTHOR Wayne Morrison, wayne@waynemorrison.com =head1 LICENSE Copyright 2012 Wayne Morrison Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. =head1 SEE ALSO B =cut