#!/usr/bin/perl

# chkpdssum
#
# Routine to check the MD5 checksums in a PDS checksum table file against the 
# contents of the PDS checksum table.  The PDS label is not actually required
# or referenced.  The program can also be run against a non-PDS volume 
# directory tree, assuming the checksum table is appropriate.
#
# Format: % chkpdssum [-i] [-f file] [-o outfile] [-x filepart] [ root_dir ]
#
#  where  -i          Ignore case in file names in the checksum table
#         -f file     Name of the checksum table file
#         -o outfile  Output file name
#         -x filepart Exclude files matching the 'filepart' string [may be
#                       repeated]  These are ignored in both the checksum
#                       file and in the directory tree
#         root_dir    root directory of the tree to be checked.
#
# The default name of the checksum file is "checksum.tab" (or "CHECKSUM.TAB")
# in the current working directory. Output is to STDOUT unless an alternate
# output file is indicated.
#
# 19 Jul 2006, A.C.Raugh
# 20 Sep 2006, ACR: Added "-x" option and changed to Getopt::Long interface
#
#============================================================================

use File::Find;
use Digest::MD5;
use Getopt::Long;

# Collect command line options:

GetOptions("i"   => \$opt_i,
           "f=s" => \$opt_f,
           "o=s" => \$opt_o,
           "x=s" => \@exclude);

# If we're ignoring case we force the match case to lower as well:

if ($opt_i && @exclude)
  { for ($i=0; $i<@exclude; $i++)
      { $exclude[$i] =~ tr/A-Z/a-z/; }
  }

# Set defaults:

$ROOT = ".";

if (@ARGV > 0)
  { $ROOT = $ARGV[0]; 
    $ROOT =~ s/\/$//;
  }
if (@ARGV > 1)
  { die "Usage: chkpdssum [-i] [-f file] [-o outfile] [-x string] [ root_dir ]\n"; }

$outfile = ($opt_o)? $opt_o : "-";
open (OUT,">$outfile") || die "Could not open output file '$outfile' ($!)";

# Find the checksum file:

if ($opt_f)
  { if (-f $opt_f)
      { $sumfile = $opt_f; }
    elsif (-f "$ROOT/$opt_f")
      { $sumfile = "$ROOT/$opt_f"; }
    else
      { die "Could not find checksum file '$opt_f'.\n"; }
  }
else
  { # Check first for an index/ directory with a checksum.tab file (since 
    # this is mainly for PDS volume environments):

    if (-f "$ROOT/index/checksum.tab")
      { $sumfile = "$ROOT/index/checksum.tab"; }
    elsif (-f "$ROOT/INDEX/CHECKSUM.TAB")
      { $sumfile = "$ROOT/INDEX/CHECKSUM.TAB"; }

    # Then look in the current working directory:

    elsif (-f "checksum.tab")
      { $sumfile = "checksum.tab"; }
    elsif (-f "CHECKSUM.TAB")
      { $sumfile = "CHECKSUM.TAB"; }

    # And finally check in the given root directory:

    elsif (-f "$ROOT/checksum.tab")
      { $sumfile = "$ROOT/checksum.tab"; }
    elsif (-f "$ROOT/CHECKSUM.TAB")
      { $sumfile = "$ROOT/CHECKSUM.TAB"; }

    # If it's not in any of those places, we fail:

    else
      { die "Could not find checksum.tab file.\n"; }

    # Otherwise, we'll save the name of the label file as well by substituting
    # the extension in the same case:

    $lblfile = $sumfile;
    $lblfile =~ s/\.tab$/.lbl/;
    $lblfile =~ s/\.TAB$/.LBL/;
  }

# Initialize the error counter:

$errors = 0;

# Set up the reusable checksum object:

$md5 = Digest::MD5->new;

# OK, we'll begin by collecting checksums for the files that actually are 
# in the root_dir directory and saving them into a hash.  We'll do this 
# the same way we did it in the 'mksum' routine:

find({wanted=>\&diskfiles, no_chdir=>1},$ROOT);

# Now we open the checksum file and start a loop through it, looking for a
# corresponding element in the %disksum hash for each checksum file line.

open(SUM,$sumfile) || die "Could not open $sumfile for reading ($!)";

while ($line=<SUM>)
  { $line =~ /^([0-9a-f]+) +(\S+)\s*$/;
    $sum  = $1;
    $file = $2;
    $file =~ s/^\*//;
    $save = $file;

    # If we're ignoring case, force the file name to lowercase:

    $file =~ tr/A-Z/a-z/ if ($opt_i);

    # Check for an exclusion match:

    foreach $string (@exclude)
      { next if ($file =~ /$string/); }

    # Now look for a corresponding entry in the disk file hash:

    if (! $disksum{$file})
      { printf OUT "$save from $sumfile not found in $ROOT/\n"; 
        $errors++;
      }
    elsif ($disksum{$file} ne $sum)
      { printf OUT "$save checksum does not match $diskname{$file}\n"; 
        $disksum{$file} = 1;
        $errors++;
      }

    # Otherwise we just reset the matched hash entry and carry on with 
    # the next line:

    $disksum{$file} = 1;
  }
close(SUM);

# Once we've finished with the files actually listed in the checksum 
# table, we check to see if there are any more unmatched entries in 
# the disk file hash:

foreach $file (sort (keys %disksum))
  { if ($disksum{$file} != 1)
      { printf OUT "$diskname{$file} found in $ROOT/ has no ";
        printf OUT "entry in $sumfile\n"; 
        $errors++;
      }
  }

# And we're done.  Print a success message if we've encountered no problems:

if ($errors == 0)
  { printf OUT "No discrepancies found.\n"; }

close(OUT);

#============================================================================

sub diskfiles

  # Routine to collect checksums for the files found in the root_dir
  # tree:

  { my ($sum,$file);
    my ($name);
    my ($lc_file);

    $file = $File::Find::name;

    # If we're ignoring case we'll need a lower-case version of this name:

    if ($opt_i)
      { $lc_file = $file;
        $lc_file =~ tr/A-Z/a-z/;
      }

    # No action if this is a directory:

    return if (-d $File::Find::name);

    # No action if the file name matches the exclusion string.  If we're
    # ignoring case, we'll check the forced-lower version:

    if ($opt_i)
      { foreach $string (@exclude)
          { return if ($lc_file =~ /$string/); }
      }
    else
      { foreach $string (@exclude)
          { return if ($File::Find::name =~ /$string/); }
      }

    # Logically we can never have an accurate checksum for the checksum
    # file itself, nor (probably) for its label.  So if we'll ignore these
    # files in the output as well:

    return if ($file eq $sumfile);
    return if ($file eq $lblfile);

    # Otherwise get the MD5 checksum:

    $md5->new;
    open(CHK,$File::Find::name) || 
      die "Couldn't open $File::Find::name for reading ($!)";
    binmode(CHK);
    $sum = $md5->addfile(*CHK)->hexdigest;
    close(CHK);

    # Clip the root directory name from the output file name and save the
    # result for future reference:

    $file =~ s/^$ROOT\///;
    $name = $file;

    # If we're ignoring case we force everything to lowercase before 
    # creating the hash entry:

    $file =~ tr/A-Z/a-z/ if ($opt_i);
    $disksum{$file}  = $sum;
    $diskname{$file} = $name;

    # And we're done:

    return;
  }
