#!/usr/bin/perl # # archon This script backs up a specific set of files and directories. # It is intended to be used as a periodic, partial backup. # It is not intended to replace full monthly (biweekly) dumps. # # Revision History # 1.0 Initial revision. 2008 # 1.1 Ensure output file doesn't exist; 131202 # proper removal of trailing slashes. # 1.2 Added the -build, -keep, -latest, -noeject, 141124 # -noumount, -nounmount, and -usearch options. # 1.3 Moved build set to .archon. 141125 # 1.4 Added the -skip option and file checking. 150202 # 2.0 Added the device list and named archives to 150329 # the .archon file. # 3.0 Added logging. 150414 # 3.1 Added licensing info. 180526 # # Copyright 2008-2018 Wayne Morrison # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # The following test cases must be checked prior to release: # * -archives option # * -devices option # * -files option # * -config option # * -build option # * -keep option # * -latest option # * -noeject option # * -skip option # * -select option w/ good device name # * -select option w/ good device name and no backups directory # * -select option w/ good device path # * -select option w/ bad device name # * -select option w/ bad device path # * -usearchive option w/ good archive # * -usearchive option w/ bad archive # * -usedevice option w/ good path # * -usedevice option w/ bad path # * default archive # * named archive # * bad named archive # * no archives in file # * no devices in file # * default device # * missing default device # * bad default device # * comments on archset lines # * comments on group lines # * comments on file lines # * archset names w/ bad characters # * device names w/ bad characters # * file names w/ bad characters # use strict; use Getopt::Long qw(:config no_ignore_case_always); # # Version information. # my $NAME = "archon"; my $VERS = "$NAME version: 3.1"; ############################################################################ # # Options fields. # my %opts = (); # Options. # # Command line arguments. # my @opts = ( 'build', # Build but don't copy the archive file. 'keep', # Don't delete archive file after copy. 'latest', # Use latest archfile; don't build one. 'noeject|noumount|nounmount', # Don't eject thumb drive after archive. 'skip', # Skip existence checks. 'usearchive=s', # Use the specified archive file. 'config=s', # Specify name of archive config file. 'select=s', # Specify device name to use. 'usedevice=s', # Use this specific device. 'archives', # Show the list of archive names. 'devices', # Show the list of devices. 'files', # Show the list of files in an archive. 'history:i', # Show the archiving history. 'logfile=s', # Use a different log file. 'verbose', # Give verbose output. 'help', # Give a help message. 'Version', # Display the program version. ); my $archdev = ''; # Archive device to use. my $build = 0; # Build-nocopy flag. my $config; # Configuration file. my $keep = 0; # Keep-archive flag. my $latest = 0; # Latest-archive-file flag. my $listarchives = 0; # List-archives flag. my $listdevices = 0; # List-devices flag. my $listfiles = 0; # List-files flag. my $noeject = 0; # No-unmount flag. my $skip = 0; # Skip-checks flag. my $usearch = ''; # Archive file to use. my $usedev = ''; # Exact archive device to use. my $verbose = 0; # Verbose flag. my $logfile = ''; # Log file. my $lastlogcnt = -1; # Last N log entries. ############################################################################ my $backdir = 'backups'; # Backup directory. my $EXT = 'arch'; # Archive name extension. my $CP = '/bin/cp'; # File-copying program. my $DF = '/bin/df -lPh'; # Disk-space program. my $DISKUTIL = '/usr/sbin/diskutil'; # Volume-ejector program. my $MV = '/bin/mv'; # File-moving program. my $TAR = '/usr/bin/tar'; # Archiving program. my $TARARGS = '--bzip2'; # Arguments to archiver. my $EXCLUDES = '--exclude rollmgr.socket'; # Exclusion arguments. my $DEFCONFIG = glob("~/.archon"); # Default configuration file. # wgm open(ARCHON, "< $config"); ############################################################################ my @devnames = (); # List of user-defined device names. my @devpaths = (); # List of user-defined device paths. my $devcnt = 0; # Count of user-defined device entries. my $defdevice = ''; # Default device name (not path.) my @archives = (); # List of user-defined archive names. my %archives = (); # Hash of user-defined archive names. my $DEF_ARCHSET = 'default'; # Default archive name to use. my $archset = ''; # Archive name to use. my $userset; # Archive name if specified by user. my $firstset = ''; # First archive set listed. my $listcmds = 0; # Count of list options given. my @logmsgs = (); # Stored log messages. ############################################################################ main(); exit(0); #------------------------------------------------------------------------ # Routine: main() # # Purpose: Do Everything. # sub main { my $archfile; # Archive filename. my $avol; # Archive volume. my $hdir; # Home directory. my $saveme; # List of files and directories to archive. # # Munch on the options and arguments. # optsandargs(); # # Go to our home directory before doing anything. # $hdir = glob("~"); chdir($hdir); # # Build the list of files and directories we'll be archiving. # readconfig(); # # Build the list of files and directories we'll be archiving. # histlist() if($lastlogcnt >= 0); # # Build the archive set from the config file and the selected # archive name. # $saveme = getarchset(); # # Give requested lists and exit (maybe.) # lister(); # # Check the archive list to see if they all exist and are readable. # checklist($saveme); # # Build the name of the archive file. # $archfile = getarchive(); # # Create the archive file. # archit($archfile,$saveme); if($build) { logger('built, not archived', $archset, ''); exit(0); } # # Try to move the archive file. # $avol = moveit($archfile); logger('archived', $archset, $avol); # # Unmount the archive volume. # unmounter($avol); } #---------------------------------------------------------------------- # Routine: optsandargs() # # Purpose: Parse the command line for options and arguments. # sub optsandargs { # # Parse the options. # GetOptions(\%opts,@opts) || usage(); # # Check for some immediate-action options. # usage() if(defined($opts{'help'})); version() if(defined($opts{'Version'})); # # Get values for non-immediate options. # $archdev = $opts{'select'}; $build = $opts{'build'} || 0; $keep = $opts{'keep'} || 0; $latest = $opts{'latest'} || 0; $noeject = $opts{'noeject'} || 0; $skip = $opts{'skip'} || 0; $usearch = $opts{'usearchive'} || ''; $usedev = $opts{'usedevice'} || ''; $verbose = $opts{'verbose'} || 0; $listarchives = $opts{'archives'} || 0; $listdevices = $opts{'devices'} || 0; $listfiles = $opts{'files'} || 0; $logfile = $opts{'logfile'} || $DEFCONFIG; $lastlogcnt = $opts{'history'} if(defined($opts{'history'})); $config = defined($opts{'config'}) ? $opts{'config'} : $DEFCONFIG; # # Complain if mutually exclusive options were specified. # if($build && $latest) { print STDERR "-build and -latest are mutually exclusive; stopping\n"; exit(2); } if($build && $usearch) { print STDERR "-build and -usearch are mutually exclusive; stopping\n"; exit(2); } if($latest && $usearch) { print STDERR "-latest and -usearch are mutually exclusive; stopping\n"; exit(2); } # # Complain if mutually exclusive options were specified. # if(($archdev ne '') && ($usedev ne '')) { print STDERR "-select and -usedevice are mutually exclusive; stopping\n"; exit(2); } # # Set a flag indicating if we were given any of the list options. # $listcmds = $listarchives + $listdevices + $listfiles; # # Lop off trailing slashes from the archive device. # $archdev =~ s/\/+$//; # # Get the name of the archive set to use. If one wasn't given, # we'll use the default archive set. # $archset = (@ARGV == 0) ? $DEF_ARCHSET : $ARGV[0]; if(@ARGV == 0) { $archset = $DEF_ARCHSET; $userset = ''; } else { $archset = $ARGV[0]; $userset = $archset; } # # Ensure the archive filename doesn't have problems. # if((my $bc=badname($archset,0)) ne '') { print STDERR "archive set name \"$archset\" contains invalid character - \"$bc\"\n"; exit(1); } # # Convert spaces in the archive file to underscores. # $archset =~ tr/ \t/_/; # # Existence/readability checks on the config file. # if(! -r $config) { print "configuration file $config is not readable; stopping\n"; exit(3); } if(! -f $config) { print "configuration file $config is not a regular file; stopping\n"; exit(3); } } #---------------------------------------------------------------------- # Routine: readconfig() # # Purpose: Read the configuration file and build the the device list # and archive lists. # sub readconfig { my @alllines = (); # All the lines in .archon. my @lines = (); # Usable lines in .archon. my $bc; # Bad character in line. my $curgroup = ''; # Current group. my $indevices = 0; # Flag for being in device list. my %deventries = (); # Hash of device entries. my $defdev = -1; # Default device name. my $devsections = 0; # Count of device sections. my @filelist; # Files in a particular archive. # # If the config file exists, we'll read All The Lines! # If it doesn't exist, we'll use a default configuration built-in # to archon. # if(-e $config) { # # Get the contents of the archon config file. # open(ARCHON, "< $config"); @alllines = ; close(ARCHON); } else { print "$config doesn't exist; using archon-internal list\n" if($verbose); # # Get the default configuration. # @alllines = ; } # # Copy the useful lines (non-empty, non-comment lines) into @lines. # for(my $ind = 0; $ind < @alllines; $ind++) { my $ln = $alllines[$ind]; # Line from usable list. # # Get rid of newlines from this line. # Get rid of leading and trailing spaces from this line. # Get rid of comments from this line. # Then skip empty lines lines. # chomp $ln; $ln =~ s/^\s+|\s+$|\s*\#.*$//g; next if($ln =~ /^$/); # # Complain if we find an invalid character. # if(($ln !~ /^>>>/) && (($bc = badname($ln,0)) ne '')) { print STDERR "invalid character on line " . ($ind + 1) . " - \"$bc\"\n"; exit(1); } # # Add this (presumably) valid line to our sub-list. # push @lines, $ln; } # # Go through the list and figure out what's going on with each. # for(my $ind = 0; $ind < @lines; $ind++) { my $ln = $lines[$ind]; # Line from usable list. my $groupname; # Name of a config group. # # If this is a section divider, we'll handle it as needed. # (Device lists and archive lists are the only two sections # recognize right now.) # if($ln =~ /^>>>\s*(.*)$/) { # # If we're in the midst of building a file list, # this section divider means we've hit the end of # this list and we're starting a new section. We'll # save this list and start a new one. # if(@filelist > 0) { my @flist = @filelist; # Copy of files in list. $archives{$curgroup} = \@flist; @filelist = (); } # # Save the new section's name. # $groupname = $1; # # If this is a device list, we'll set a flag # indicating that's where we are. # If this is file list, we'll turn off the devices # flag and mess about with the archive name. # (Ensure it isn't already in use, save the name # in an archive list and archive hash, and zap the # file list.) # If this is the first archive defined in the file, # we'll save its name specially. # if($groupname =~ /^devices$/i) { $indevices = 1; $devsections++; } elsif($groupname =~ /^log\s+(.*)$/i) { $indevices = 0; push @logmsgs, $1; next; } elsif($groupname =~ /^logfile\s+(.*)$/i) { $indevices = 0; $logfile = $1; # # Ensure the log file doesn't have # problem characters in its name. # if(($bc = badname($logfile,0)) ne '') { print STDERR "logfile name \"$logfile\" contains an invalid character - \"$bc\"\n"; exit(1); } next; } else { my $bc; # Bad character in name. $indevices = 0; # # Ensure the archive file doesn't have # problem characters in its name. # if(($bc = badname($groupname,1)) ne '') { print STDERR "archive set name \"$archset\" contains an invalid character - \"$bc\"\n"; exit(1); } # # Convert spaces in the archive file to # underscores. # $groupname =~ tr/ \t/_/; # # If this archive name has already been used, # we'll complain and exit. # if(exists($archives{$groupname})) { print STDERR "archive $groupname is already defined\n"; exit(2); } # # Save the name of this archive set. # push @archives, $groupname; $archives{$groupname} = 1; # # Save the name of the first archive set found. # if($firstset eq '') { $firstset = $groupname; } # # Zap the archive file list. # @filelist = (); } # # Save the new section as the current group. # $curgroup = $groupname; # # Go on to the next line. # next; } # # If we're in a device list, we'll save the device name and # path into the appropriate lists. We are *not* using a # hash for these lists so that a device name can actually # list multiple devices and a device path can be listed by # several device names. # if($indevices) { my $key; # Key to device entries. $ln =~ s/\/*$//; $ln =~ /(\S+)\s+(\S+)/; # # Save the device name and path. # push @devnames, $1; push @devpaths, $2; $devcnt++; # # If the device name is "default" and the device # path is not "default", then this device path is # saved as the default device. # if(($1 eq 'default') && ($2 !~ /^default$/i)) { $defdev = $2; } } else { # # Save this line into the current file list. # push @filelist, $ln; } } # # Save the final archive set. # if(@filelist > 0) { my @flist = @filelist; # Copy of files in list. $archives{$curgroup} = \@flist; @filelist = (); } # # Use the default if no groups were defined. # if($curgroup eq '') { $curgroup = 'default'; } # # Complain and exit if no archive sets were defined. # if(keys(%archives) == 0) { print "no archive sets defined\n"; exit(1); } # # Complain and exit if no device lists were defined. If no device # paths were defined, this is likely a pre-v2.0 config file. # if(($devsections == 0) && (@devpaths == 0) && ($usedev eq '')) { print "no device lists defined\n"; print "\nIf you're using a pre-v2.0 .archon file, use the -usedevice option\nto specify the archive target.\n"; print "Better yet, you might want to consider updating your .archon file\nto the newer, more useful version.\n"; exit(1); } # # If no default set was given but the default set should be used, # then we'll use the first group defined in the file. # if((! defined($archives{'default'})) && ($archset eq 'default')) { $archset = $firstset; if($verbose && (! $listcmds)) { print "no default archive set defined, using archive \"$archset\"\n"; } } # # If no archive sets were defined, this is likely the old-format # config file. # if(@archives == 0) { print "no named archives in the .archon file, but we're able to continue\n"; print "\nYou're probably using a pre-v2.0 .archon file; you might want to consider\nupdating your .archon file to the newer, more useful version.\n\n"; } # # If a default device has been specified, we'll save its name. # if($defdev != -1) { for(my $ind = 0; $ind < @devnames; $ind++) { if($devnames[$ind] eq $defdev) { $defdevice = $defdev; last; } } } # # Sort the archive names. # @archives = sort(@archives); # # Ensure the archive file isn't a problem. # if(($bc = badname($archset,1)) ne '') { print STDERR "archive set name \"$archset\" contains invalid character - \"$bc\"\n"; exit(1); } # # Convert spaces in the archive file to underscores. # $archset =~ tr/ \t/_/; } #---------------------------------------------------------------------- # Routine: getarchset() # # Purpose: Build the string of the selected archive list. # sub getarchset { my $savelist = ''; # String list of files to save. my @flist; # Array list of files to save. my $flist; # Reference to array list. my @badnames = (); # List of bad names. # # Ensure the requested archive set is defined in the config file. if(! defined($archives{$archset})) { print STDERR "archive set \"$archset\" is not defined in config file \"$config\"\n"; exit(4); } # # Get the array of files to save. # $flist = $archives{$archset}; @flist = @$flist; # # Do another filename check of the whole archive set to ensure # the names are all lovely. This shouldn't ever fail here, but # it's better to be safe. # foreach my $fn (@flist) { if((my $bc = badname($fn,0)) ne '') { push @badnames, "$fn\t\"$bc\""; } } # # If we found any filenames with invalid characters, we'll print # them all out here in one swell foop. # if(@badnames > 0) { print STDERR "archive set name \"$archset\" contains entries with invalid characters\n"; foreach my $bfn (@badnames) { print "\t$bfn\n"; } exit(1); } # # Build the string of files to save. # $savelist = join ' ', @flist; return($savelist); } #------------------------------------------------------------------------ # Routine: checklist() # # Purpose: Check the list of files and directories we'll be archiving. # Conditions checked are: # - existence # - readability # - searchability (for directories) # - valid type (regular file, directory, symlink) # sub checklist { my $savelist = shift; # Files and directories in backup-set. my @list; # Broken-out name of save list. my @newlist = (); # *Real* broken-out name of save list. my $errs = 0; # Error count. # # Don't check anything if -skip was given. # return if($skip); # # Split our file list into its constituent parts. # @list = split / /, $savelist; # # Build a list of actual filename -- joining up quoted names -- # from the split list. # while(@list) { my $fn; # Name from list. my $qstr = ''; # Quoted string. # # If this name doesn't have any quotes, we'll add it to # our new list of names. # $fn = shift @list; if($fn !~ /['"]/) { push @newlist, $fn; next; } # # This entry is quoted, so we'll build the full name. # $qstr = $fn; while(@list) { # # Add this list entry to the quoted filename. # $fn = shift @list; $qstr .= " $fn"; # # We'll drop out if this latest name has a quote. # last if($fn =~ /['"]/); } # # Add the constructed filename to the new list of names. # push @newlist, $qstr; } # # Go through our new list of names and check to see if the # file exists and is readable. # foreach my $fn (@newlist) { my @fns; # List of globbed filenames. # # Get the expansion of this filename. # @fns = glob($fn); # # Check each file in the name expansion to ensure that # we can (most likely) archive it. # foreach my $nfn (@fns) { if(chkfile($nfn) == 0) { $errs++; } } } # # If there weren't any errors, we'll continue onwards. # if($errs) { print STDERR "archon not continuing\n" if($verbose); exit(1); } } #------------------------------------------------------------------------ # Routine: chkfile() # # Purpose: Check a file for "okayness". Any type of file must exist and # be readable. Only regular files, symlinks, and directories # are valid. Directories must also be searchable. # # A boolean is returned indicating the file's okayness. # sub chkfile { my $nfn = shift; # Name of file to check. # # Check for existence and readability. # if(! -e $nfn) { print STDERR "$nfn does not exist\n"; return(0); } if(! -r $nfn) { print STDERR "$nfn is not readable\n"; return(0); } # # Type specific checks. # if(-d $nfn) { if(! -x $nfn) { print STDERR "$nfn is not searchable\n"; return(0); } } elsif(-f $nfn) { # No other checks for plain files right now. } elsif(-l $nfn) { # No other checks for symbolic links right now. } else { print STDERR "$nfn is not a file, directory, or symlink\n"; return(0); } # # File is okay. # return(1); } #------------------------------------------------------------------------ # Routine: getarchive() # # Purpose: Build the name of the archive file. # # If -usearch was given, then that will be used as the name # of an existing archive file. If there isn't a file of that # name, an error exit will happen. # # The kronos[5] calculation is to get the two-digit year. # This is a *little* sloppy, but I'm really not expecting # to live past 2099, so I'm not too worried about it. # # Also, it would be easy enough to just subtract 100 from # that year, but this way shows clearly what's happening. # I guess the other way does too, but this is *my* code, # not yours. # sub getarchive { my $user; # Name of user. my @kronos; # Timestamp. my $dir; # Directory for output file. my $outfile; # Output file name. my $archnode; # Archive node name. # # Set up for the user-specified archive -- if one was given. # Error and exit if the file exists already. # if($usearch ne '') { if(! -e $usearch) { print STDERR "archive file \"$usearch\" does not exist; stopping\n"; exit(1); } return($usearch); } # # We'll write the archive directly to the user's home directory. # We might move it to the memory stick later. # $dir = glob("~$user"); # # Get the user's name. # ($user) = getpwuid($<); # # Set up for using the most recent timestamped archive file in the # user's home directory. If we find a timestamped archive file, # we'll set $usearch to that and return. # if($latest) { my $fmask; # File mask to find. my @files; # First-match files. my @flist = (); # Second-match files. # # Build the file mask and get a list of matching files. # We'll allow for compressed and uncompressed archive files. # $fmask = "$dir/*-$user-$archset.$EXT"; @files = sort(glob($fmask), glob("$fmask.bz2")); # # If there were no matching files, complain and exit. # if(@files == 0) { print STDERR "no archive files in $dir; stopping\n"; exit(1); } # # Save the files that match our timestamp in the name. # foreach my $fn (@files) { push @flist, $fn if($fn =~ /^$dir\/\d{6}-\d{4}-/); } # # If there were no matching files, complain and exit. # if(@flist == 0) { print STDERR "no dated archive files in $dir; stopping\n"; exit(1); } # # Save the most recent as the archive file to use and # return the name. # $usearch = $flist[-1]; return($usearch); } # # Get the current time and adjust the year and month. # @kronos = localtime(time); $kronos[5] += 1900 - 2000; $kronos[4] += 1; # # Get the archive's node name. # $archnode = sprintf("%02d%02d%02d-%02d%02d-$user-$archset.$EXT",$kronos[5],$kronos[4],$kronos[3],$kronos[2],$kronos[1]); # # Build the output filename and return it. # $outfile = "$dir/$archnode"; return($outfile); } #------------------------------------------------------------------------ # Routine: archit() # # Purpose: Create the archive file. The local copy of the archive # file, if such exists, will be deleted. # sub archit { my $archfile = shift; # Archive file. my $archlist = shift; # Archive list. my $cmd; # Archive command. # # If we've been given the archive file, we'll just return. # if($usearch ne '') { print "using archive \"$usearch\"\n"; return; } print "creating archive\n"; unlink($archfile); $archfile = "$archfile.bz2" if($TARARGS =~ /bzip2/); $cmd = "$TAR -cf $archfile $TARARGS $EXCLUDES $archlist"; system($cmd); } #------------------------------------------------------------------------ # Routine: moveit() # # Purpose: Move the archive file to the memory stick iff the stick # has been inserted. # sub moveit { my $archfile = shift; # Archive file. my $cmd; # Move command. my $archvol; # Archive volume. my $archdir; # Archive directory. # # Select an archive volume. # ($archvol, $archdir) = getdevice(); # # Return if the archive volume hasn't been inserted. # if($archvol eq '') { print STDERR "no archive volumes are mounted\n"; return(''); } # # Exit if the archive volume doesn't have a backup directory. # if(! -e $archdir) { print STDERR "backup path \"$archdir\" does not exist\n"; exit(5); } # # Adjust for file extension -- if the user didn't give us the # archive name. # if(! $usearch) { $archfile = "$archfile.bz2" if($TARARGS =~ /bzip2/); } # # Move the archive file to the archive volume. # if($keep) { print "copying $archfile.bz2 to $archdir\n" if($verbose); $cmd = "$CP $archfile $archdir"; } else { print "saving $archfile.bz2 to $archdir\n" if($verbose); $cmd = "$MV $archfile $archdir"; } system($cmd); return($archvol); } #---------------------------------------------------------------------- # Routine: getdevice() # # Purpose: Select an archive volume to use. If the user specified an # archive device, we'll search the device tables (names and # paths) for to just use that device. The device-name table # will be searched for exact (though case-insensitive) matches. # If none are found, the device-paths table is searched for # case-insensitive matches. # # If the user did not specify an archive device, we'll try to # use any of the devices defined in the config file. # # The device path and actual backup directory on that device # are returned to the caller. # # sub getdevice { my $archvol = ''; # Archive volume. my $archdir = ''; # Archive directory. my @devlist = (); # Devices indices to check. # # If the user specified an archive device, we'll search the device # tables for to just use that device. Otherwise, we'll try to use # any of the devices defined in the config file. # if($archdev ne '') { my $defflag = 0; # Default-device flag. # # If the user wants the default device, we'll search the # device list for the default. If we find it, we'll use # the corresponding device path as the actual device name # to search for. If we don't find it, we'll give an # error and exit. # if($archdev eq 'default') { my $found = 0; # Default-found flag. $defflag = 1; for(my $ind = 0; $ind < @devnames; $ind++) { if($devnames[$ind] eq 'default') { $archdev = $devpaths[$ind]; $found = 1; last; } } if(! $found) { print STDERR "no default device specified\n"; exit(5); } } # # Search the defined device-name list for a case-ignored # exact match. We'll save the array indices of the matches. # for(my $ind = 0; $ind < @devnames; $ind++) { if($devnames[$ind] =~ /$archdev/i) { push @devlist, $ind; } } # # If we didn't find any matches in the device-name list, we'll # search the device-path list. In this case, we won't look # for exact matches, just for a path that contains the user's # requested device. Again, the index of matches will be saved. # # Unless the user wants the default device, in which case we'll # give an error and exit. # if(@devlist == 0) { if($defflag) { print STDERR "default device \"$archdev\" not defined\n"; exit(5); } # # Save the matching paths. # for(my $ind = 0; $ind < @devpaths; $ind++) { if($devpaths[$ind] =~ /$archdev/i) { push @devlist, $ind; } } } # # If the device list is *still* empty, the specified archive # device is not defined. # if(@devlist == 0) { print STDERR "archive device \"$archdev\" not defined\n"; exit(6); } } else { # # If the user gave the exact device to use, we'll just use it. # Otherwise, we'll use the whole list of devices and add the # default to the beginning of the list. # if($usedev ne '') { return($usedev, $usedev); } else { # # Save the indices of all the names in the device-name # list. # for(my $ind = 0; $ind < @devnames; $ind++) { $devlist[$ind] = $ind; } # # If the device list contained a default device, we'll # now move it to the beginning of the device list. # unshift @devlist, $defdevice if($defdevice ne ''); } } # # Go through each of our matching archive volumes. # We'll use the first volume we find mounted. # foreach my $ind (@devlist) { my $avol = $devpaths[$ind]; # Archive volume to check. my $ret; # df return code. # # Quietly see if this volume is mounted and save # the retcode. # system("$DF $avol > /dev/null 2>&1"); $ret = $? >> 8; # # If the volume's mounted, build the backup path. # if($ret == 0) { $archvol = "$avol"; $archdir = "$avol/backups"; last; } } return($archvol, $archdir); } #---------------------------------------------------------------------- # Routine: unmounter() # # Purpose: Unmount the archive volume. Unless -noeject was given. # Or if no archive volume was given. Or if the user specified # the exact path of the archive volume. # sub unmounter { my $avol = shift; # Archive volume. my $ret = 1; # Return code from diskutil. # # Don't try to unmount if no archive volume was given or if # the user specified the exact path. # return if(($avol eq '') || ($usedev ne '')); # # Save disk buffers and get the archive volume info. # system("sync"); system("$DF $avol"); # # Handle the -noeject option. # if($noeject) { print "not unmounting $avol\n" if($noeject); $ret = 0; } # # Attempt to eject the archive volume. # while ($ret != 0) { # # Ensure the archive volume is an actual volume. # system("$DISKUTIL info $avol > /dev/null"); $ret = $? >> 8; last if($ret != 0); # # Eject the archive volume. # system("sync"); system("$DISKUTIL eject $avol"); $ret = $? >> 8; } } #---------------------------------------------------------------------- # Routine: lister() # # Purpose: Front-end for listing contents of the config file. # A blank will be put between each listing group. # If none of the list options were specified, we'll # return without doing anything. # sub lister { my $listcnt = $listcmds; # Number of lists to give. return if($listcnt == 0); if($listarchives) { listarchives(); $listcnt--; print "\n" if($listcnt); } if($listdevices) { listdevs(); $listcnt--; print "\n" if($listcnt); } if($listfiles) { listfiles($archset, 0); } exit(0); } #---------------------------------------------------------------------- # Routine: listarchives() # # Purpose: List the archive sets defined in the config file. # sub listarchives { if(@archives == 0) { print "no archives defined in archive config file $config\n"; return; } for(my $ind = 0; $ind < @archives; $ind++) { if($verbose) { listfiles($archives[$ind], $ind); } else { print "$archives[$ind]\n"; } } } #---------------------------------------------------------------------- # Routine: listdevs() # # Purpose: Print device entries in order given in the archive config file. # sub listdevs { my @nameindex = (); # Sorted names. my @pathindex = (); # Sorted paths. if($devcnt == 0) { print "no devices defined\n"; return; } print "\n"; print "Devices:\n"; printf("\t%-20s\tPaths\n","Names"); for(my $ind = 0; $ind < $devcnt; $ind++) { printf("\t%-20s\t%s\n",$devnames[$ind],$devpaths[$ind]); } print "\n"; print "Default device - $defdevice\n"; } #---------------------------------------------------------------------- # Routine: listfiles() # # Purpose: Print the files listed for the specified archive. # sub listfiles { my $archname = shift; # Archive to list. my $index = shift; # Archive's @archives index. my $flist; # Reference to the file list. my @filelist; # Files in a particular archive. print "\n" if($index); # # Ensure this archive has been defined. # if(! defined($archives{$archname})) { print "archive \"$archname\" is not defined\n"; return; } # # Get this archive's list of files. # $flist = $archives{$archname}; @filelist = @$flist; # # Print this archive's list of files. # print "$archname: " . @filelist . " file entries\n"; foreach my $fn (@filelist) { print "\t$fn\n"; } } #---------------------------------------------------------------------- # Routine: badname() # # Purpose: Return an indicator showing if the given name contains # an unacceptable character. # If the second argument is true, we won't allow slashes. # If not, we won't. # # If an invalid character is found, we'll return the offender. # If no invalid characters were found, we'll return ''. # sub badname { my $name = shift; # Name to check. my $slashflag = shift; # Check for slashes. if($slashflag && ($name =~ /([;\$\<\>\[\]\{\}\(\)\&\?\/'`|!])/)) { return($1); } if($name =~ /([;\$\<\>\[\]\{\}\(\)\&\?'`|!])/) { return($1); } return(''); } #---------------------------------------------------------------------- # Routine: histlist() # # Purpose: Show some log messages. # sub histlist { my $logcnt; # Number of log messages saved. my $firstind; # Index of first log message to print. my @loggies = (); # Selected messages to display. my $selected = 0; # Flag indicating selections requested. # # Ensure we're looking at the correct set of log messages. # if($logfile ne $config) { readlog(); } # # If an archive device was specified, we'll only consider entries # for archives saved to that device. # if($usedev ne '') { @loggies = (); for(my $ind = 0; $ind < @logmsgs ; $ind++) { $logmsgs[$ind] =~ /^.* ; .* ; (.*) ; .*$/; if($usedev eq $1) { push @loggies, $logmsgs[$ind]; } } @logmsgs = @loggies; $selected++; } # # If an archive set was specified, we'll only consider entries # for archives of that archive set. # if($userset ne '') { @loggies = (); for(my $ind = 0; $ind < @logmsgs ; $ind++) { $logmsgs[$ind] =~ /^.* ; (.*) ; .* ; .*$/; if($userset eq $1) { push @loggies, $logmsgs[$ind]; } } @logmsgs = @loggies; $selected++; } # # Ensure there are log messages to show. If not, we'll give an # appropriate message based on whether or not any selection criteria # were specified. # $logcnt = @logmsgs; if($logcnt == 0) { if($selected) { print "no log messages in log file \"$logfile\" matching selection criteria\n"; } else { print "no log messages in log file \"$logfile\"\n"; } exit(0); } # # Show all the log messages if more entries were requested than # there are or if a specific number wasn't requested. # if(($lastlogcnt >= $logcnt) || ($lastlogcnt == 0)) { $lastlogcnt = $logcnt; } # # Figure out the first entry index to show. # $firstind = $logcnt - $lastlogcnt; # # Now we'll print the log entries. # print "log messages:\n"; for(my $ind = $firstind ; $ind < $logcnt ; $ind++) { print "\t$logmsgs[$ind]\n"; } # # And now we'll stop. # exit(0); } #---------------------------------------------------------------------- # Routine: readlog() # # Purpose: Read a non-standard log file. The stored log messages are # saved in the @logmsgs array. # sub readlog { my @msgs; # Log's messages. my $ind = 0; # Line index. # # Get the log messages. # open(LOG, "< $logfile"); @msgs = ; close(LOG); # # Reset the collected log messages. # @logmsgs = (); # # Read the log file and save its log messages. Lines which # don't contain log entries are skipped entirely. # foreach my $ln (@msgs) { # # Get rid of newlines from this line. # Get rid of leading and trailing spaces from this line. # Get rid of comments from this line. # Then skip empty lines lines. # chomp $ln; $ln =~ s/^\s+|\s+$|\s*\#.*$//g; next if($ln =~ /^$/); # # Skip non-command lines. # next if($ln !~ /^>>>/); # # Save the line if it's a log entry. # if($ln =~ /^>>>\s*log\s+(.*)$/) { push @logmsgs, $1; } } } #---------------------------------------------------------------------- # Routine: logger() # # Purpose: Write a log message to to the log file. # sub logger { my $msg = shift; # Caller's message. my $arset = shift; # Archive name. my $arvol = shift; # Archive volume. my @chronos; # Log times. my $logmsg = ''; # Constructed log message. @chronos = localtime; $logmsg = sprintf("%02d%02d%02d %02d:%02d:%02d ; $arset ; $arvol ; $msg", $chronos[5] - 100, $chronos[4] + 1, $chronos[3], $chronos[2], $chronos[1], $chronos[0]); open(LOG, ">> $logfile"); print LOG ">>> log $logmsg\n"; close(LOG); } #---------------------------------------------------------------------- # Routine: version() # # Purpose: Print the version number(s) and exit. # sub version { print STDERR "$VERS\n"; exit(0); } #---------------------------------------------------------------------- # Routine: usage() # # Purpose: Give usage message and exit. # sub usage { my $usagestr = "usage: archon [options] where [options] are: -build build but don't copy the archive file -keep don't delete archive file after copy -latest use latest archfile; don't build one -noeject don't eject thumb drive after archive -noumount don't eject thumb drive after archive -nounmount don't eject thumb drive after archive -skip skip existence checks -usearchive archive use the specified archive file -config conffile specify name of archive config file -select devname specify device to use -usedevice devpath use this specific device/directory -archives show the list of archive names -devices show the list of devices -files show the list of files in an archive -verbose give verbose output -help give a help message -Version display the program version "; print STDERR $usagestr; exit(0); } 1; ############################################################################## =pod =head1 NAME B - targetted archiving program =head1 SYNOPSIS archon [options] [archive-set] =head1 DESCRIPTION B archives a specific set of files and directories. It is intended to be used as a periodic, partial backup. It is not intended to replace full or incremental dumps. The archive will be copied to removable media, such as a USB drive or a removable hard disk. An archive is built according to the B<.archon> configuration file. This file defines a number of named archive sets (lists of files to be archived) and has a list of named devices to use as possible archive locations. Using the archive sets and the device lists in conjunction with B's options and arguments, it is easy to have a flexible, diverse set of archives. Archive sets should be reviewed periodically to ensure that the files being archived is still the required set files. The B<.archon> file is relatively straightforward, but it is very important that it be defined correctly so that the archives are built as needed. The format of the file changed in B version 2.0, and it highly recommended that you read the "ARCHON CONFIGURATION FILES" section below in order to create your configuration file properly. A small example configuration is given in B's __DATA__ section at the end of the program file. If the B<.archon> file does not exist, that very small default archive set will be backed up. This is a rudimentary configuration and the devices defined there are unlikely to work anywhere. This configuration is primarily intended as an example. Relative paths in an archive set are assumed to be relative to the home directory of the user running B. Absolute paths may be given, but modern B implementations typically strip off the leading slash. Several checks will be performed on the file names in the archive sets, the archive-set names, and the device names. The conditions checked are: * existence * readability * searchability (for directories) * valid type (regular file, directory, symlink) * invalid characters in the name The first four checks will not be run if the I<-skip> option is given. The B command is used to create the tarfile; the I<--bzip2> option is used to have B compress the tarfile. Compression is done with the B compression format. When installing B on a new system, ensure that its B program has the I<--bzip2> option. If not, you'll need to adjust the I subroutine to: * not use --bzip2 * not append ".bz2" to the archive filename * run bzip2 (or some other file compressor) on the archive file The I<$TARARGS> variable may also need some adjustment. =head2 Archive Names The archive files will be created with a specific filename pattern. The pattern includes a timestamp, the user's name, and the name of the archive set. When spaces are used in archive-set names, they are converted to underscores for in the name of the actual archive file. The following pattern is used: YYMMDD-hhmm-username-archiveset.arch.bz2 where: YYMMDD date of archive (YY - year, MM - month, DD - day) hhmm time of archive (hh - hour, mm - minute) username user's login name archiveset name of the archive set arch file (doesn't change) bz2 indicates bzip2 compression (doesn't change) A few example archive names are: 150309-0836-suzee-inbox.arch.bz2 150210-2021-bobb-project_data.arch.bz2 150120-2310-chowder-homedir.arch.bz2 If an archive is started within the same minute as another archive using the same archive set, and username, the archive names will exactly match. The first archive will be overwritten by the second, unless the two archives are saved to different volumes. =head1 OPTIONS B takes the following options: =over 4 =item I<-archives> Show the names of the archive sets defined in the configuration file. =item I<-build> Build the archive file, but do not copy it anywhere. =item I<-config conffile> I will be used as the configuration file, rather than the default B<.archon> file in the user's home directory. =item I<-devices> Show the list of devices in the configuration file. =item I<-files> Show the list of files in the archive set named on the command line. =item I<-history count> Show log entries from the current configuration file. If the optional I field is given, then that is the number of the most recent entries that will be displayed. Otherwise, all the entries will be shown. =item I<-logfile file> Specify the file to which log entries will be written. This must be a regular file and it must be writable. =item I<-keep> The source archive file will not be deleted after it has been copied to the target drive. =item I<-latest> An archive file will not be built, but the most recent archive file will be copied to the target drive. This archive file is the most recent file in the user's home directory which matches the standard archive file naming format. B will attempt to find a valid file, but no effort will be made in verifying the file's contents. =item I<-noeject> =item I<-noumount> =item I<-nounmount> The target drive will not be ejected after the archive file has been copied. =item I<-select device> I will be used for the archive device. Device names from the configuration's device list will be matched against I. The first matching device that is found to be mounted will be used as the archive device. If no matches are found, then the device paths from the device list will be checked for matches. =item I<-skip> The existence and readability checks will not be performed on the archive list. =item I<-usearchive archive-file> The existing file named I will be copied to the chosen archive device. B will archive this file as is, without attempting to rename the file, compress the file, or verify its contents. =item I<-usedevice volume> I will be used for the ultimate location of the archive. I is the exact path to be used; B will not look there for a B directory. This is not the archive filename, it is a volume name or a directory. After copying the archive to I, the given path will not be unmounted. =item I<-verbose> Give verbose output. =item I<-Version> Display the version information for B. =item I<-help> Display a help message. =back =head1 ARCHON LOGGING B logs successful archive executions. By default, the log entries are added to the end of the selected B<.archon> file. However, an alternate log file may be specified in the B<.archon> file or with the I<-logfile> option. The '>>>logfile' directive is used to specify an alternate log file in the configuration file. Log entries may be display by using the I<-history> option. The most recent I entries may be displayed by giving a numeric argument with I<-history>. If an archive set or an archive device is specified on the command line, then only entries matching those selection criteria will be displayed. Each log entry consists of a date and time timestamp, the archive set, the archive device, and a brief message indicating the status of that execution. The fields are separated by semicolons. The '>>>log' directive is used to mark a log entry in the configuration file. This is an example logging entry: >>>log 150405 18:29:31 ; daily ; /Volumes/archer ; archived Only a few status messages are currently defined. This is expected to change in the future. =head2 Logging Examples The B<.archon> file contains these logging entries: >>>log 150415 01:24:49 ; tiny ; /Volumes/archer ; built, not archived >>>log 150415 13:10:01 ; daily ; /Volumes/archer ; archived >>>log 150416 13:10:01 ; daily ; /Volumes/archer ; archived >>>log 150417 13:10:00 ; daily ; /Volumes/thumb1 ; archived >>>log 150418 17:30:39 ; tiny ; /Volumes/archer ; archived >>>log 150418 13:10:01 ; daily ; /Volumes/thumb1 ; archived >>>log 150419 13:10:02 ; daily ; /Volumes/serenity ; archived If this command is given "archon -history", then all seven log entries will be displayed. If this command is given "archon -history 1", then these entries will be displayed: 150419 13:10:02 ; daily ; /Volumes/serenity ; archived If this command is given "archon -history tiny", then this entry will be displayed: 150415 01:24:49 ; tiny ; /Volumes/archer ; built, not archived 150418 17:30:39 ; tiny ; /Volumes/archer ; archived If this command is given "archon -history 1 tiny", then this entry will be displayed: 150418 17:30:39 ; tiny ; /Volumes/archer ; archived If this command is given "archon -history 1 /Volumes/thumb1", then this entry will be displayed: 150418 13:10:01 ; daily ; /Volumes/thumb1 ; archived =head1 ARCHON CONFIGURATION FILES The original version of the B<.archon> configuration file had a simple format; it was just a list of files to be archived. Version 2.0 of B uses an enhanced, more flexible format. The old B<.archon> files will continue to work, at least as far as archived files are concerned. However, the old files will require the I<-usedevice> option. The new B<.archon> files are divided into sections of archive devices and archive sets. The device lists provide named devices that may be selected as the destination of archives. Archive sets are named lists of files that will be archived to the archive devices. The new B<.archon> configuration files provide several benefits. Moving device lists into the configuration file makes B more flexible for individual users, since there is no longer a need to modify the source code just to change the available devices. Putting the names of files to be archived into named lists provides an easy way to have different archive sets defined for different uses. The configuration sections are marked by a line starting with ">>>", followed by either the word "devices" (for a device list) or a label (for an archive set.) The archive-set label can consist of alphabetic characters, numerals, and the following punctuation: "," "-" "_" "." and spaces. No other characters are allowed. Comment lines start with a sharp sign. Blank lines are allowed anywhere, even within an archive set or a device list. Comment lines and blank lines are ignored. This is a very simple B<.archon> file. It defines a single device and an archive set with two entries: # A simple .archon file. >>> devices thumb /Volumes/thumb1 >>> smallset .cshrc mail =head2 Device Section Device entries consist of two fields: a device name and a device path. The device name serves as a simple label for the device. The device name is the absolute path to the device. B expects the device path to contain a writable directory named B. In the example above, there should be a directory named B. An archive device can actually be a directory and not removable media. They are discussed here as if they are removable media (thumb drive, hard drive, etc.) because it is generally safer to archive files on something that isn't tied to a specific machine. However, this is not required. The device name serves several purposes. At heart, the name is a simple label for the device. If all the device names are unique, then the I<-usedevice> option allows a specific device to be used as the archive device. The device name may also be used to name a group of devices. If multiple devices are given the same name, then the I<-usedevice> option will attempt to use each of the devices in the group until it finds one that is mounted. A device path may be listed with multiple device names, allowing it to be included in multiple groups. There is no inherent or required one-to-one correspondence between device names and device paths, though the configuration file may be written that way. There are no default devices, except in one situation described below. If no device is named in the options, then the whole device list will be tried until a named device is found to be mounted. The one time a default device will be used is when the B<.archon> file does not exist. In this case, a rudimentary configuration will be used that is stored at the end of the B program file. However, the devices in this rudimentary configuration are unlikely to work anywhere. This configuration is primarily intended as an example. The "default" device name is a special name. The device path for the I device is not an actual path, but rather another device name. When trying to determine the set of archive devices to try, B will use the default device's "path" -- another device name -- as the first device to try using. The I device won't necessarily always work; it is just the first device that will be tried. If none of the associated device paths are mounted, then B will not be able to work. The example device section below contains all the possibilities discussed: >>> devices thumb /Volumes/thumb1 thumb /Volumes/thumb2 thumb /Volumes/thumb3 serenity /Volumes/firefly firefly /Volumes/firefly firefly /Volumes/thumb3 tmp /tmp/data-archives default firefly The I device name is used to refer to three devices: B, B, and B. The I device name is used to refer to a single device: B. The I device name is used to refer to two devices: B and B. Both of those devices are already in other groups. The I device name is used to refer to a single device: B. In most Unix systems, this is either a directory on a hard disk or an in-memory virtual disk. The I device name points back into the device list to refer to the I device name. If the default device is specified, then the two I devices will be checked. =head2 Archive Set Section In addition to a device section, a B<.archon> file has one or more archive sets. An archive set has an identifying name, and contains a list of files and directories to be archived for that set. The first archive set will be used by default if an archive set is not specified on the command line. Archive sets in B<.archon> are distinct, even though the contents of the sets may overlap. Specifying one set later (or earlier) in a file will not cause any other archive sets to be included. The filenames listed in the archive set are somewhat restricted in the characters they may contain. The following characters cannot be used in filenames: ; $ < > [ ] { } ( ) & ? ' ` | ! The name of an archive set is also unable to use that same set of characters, but the set names cannot contain slashes. An archive-set name can contain spaces and tabs, but they will be converted to underscores. A filename can contain spaces, but in this case the filename must be surrounded by double-quotes. =head2 Example B<.archon> File This example B<.archon> file contains several archive sets and a pair of device sections. # The devices we'll want to use. >>> devices archer /Volumes/archer archer /Volumes/centaur thumbs /Volumes/thumb1 thumbs /Volumes/thumb2 thumbs /Volumes/thumb3 # Absolute essentials that we must archive. Everything else is extra. >>> essentials .cshrc .login .mailrc .ssh bin data/sciuridae # The email folders we can't lose. >>> required-mail mail/inbox mail/projects mail/hunnybunny # All our email folders. >>> all-mail mail >>> devices firefly /Volumes/serenity default thumbs # Data for current projects. >>> data "data/file index" data/sciuridae data/ailuradae data/procyonidae # Everything we want to back up with some frequency. >>> everything .cshrc .login .mailrc .ssh bin mail "data/file index" data/sciuridae data/ailuradae data/procyonidae "source files/modules" >>>logfile /opt/logs/archon.log >>>log 150418 17:30:39 ; tiny ; /Volumes/archer ; archived >>>log 150418 13:10:01 ; daily ; /Volumes/thumb1 ; archived >>>log 150419 13:10:02 ; daily ; /Volumes/serenity ; archived =head1 PORTABILITY NOTE This script was written for Mac OS X, but it should be easily portable to other Unix-like systems. The important thing to watch for is how a non-OS X system names and manages removable media. =head1 FUTURE POSSIBILITIES The following possibilities I be included in a future version of B: =over 4 =item * specifying multiple archive sets on a command line =item * allowing an archive set to include another archive set =item * specifying a default device for a particular archive set =back Inclusion of these possible features will depend on time availability and interest. =head1 AUTHOR Wayne Morrison, wayne@waynemorrison.com =head1 LICENSE Copyright 2008-2018 Wayne Morrison Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. =head1 SEE ALSO B, B =cut #------------------------------------------------------------------------ # # The names in the _DATA_ section are a default set of files and directories # to be backed up. # __DATA__ >>> devices thumbs /Volumes/thumb1 thumbs /Volumes/thumb2 extdrive /Volumes/gatodelmar0 >>> default .cshrc .fetchmailrc .login .logout .mail_aliases .mailrc .mh_profile .ssh .tcshrc bin