#!/usr/local/bin/perl
# ncc - news consistency checker

$VersionID =
  "v$Revision: 1.14 $ rev. $Date: 1995/09/15 19:36:51 $ GMT ($State: Exp $)\n";

# findmissing
# scan news spool, and report articles that are not in the history file.

# Written by Harry Bochner, 3/95
# This program may be distributed freely, but please annotate any changes,
# and please leave these comments in place.
#
# Modified 9/95 by Bill Davidsen (davidsen@tmr.com)
# Slower but more checking of non-newsgroup directories, files
# other than articles, etc.

# Adjust the next two lines to the local configuration.
$history = '/usr/news/history';
$spool= '/usr/spool/news';
$active = "/usr/news/active";

# Give -v flag for verbose progress report.

$verbose = 0;
# "getopts.pl" doesn't really do what I want...
#   neither does this, but it's better
while (0 <= $#ARGV && @ARGV[0] =~ /^-/) {
  $_ = shift(@ARGV);
  /^-v/ && (++$verbose, next);
  /^-V/ && die $VersionID;
  /^--/ && last;
  die "$0: unknown option ($_)";
}

# if a .ncc_config file, use that instead of standard names
if (-f ".ncc_config") {
  open(CONFIG, ".ncc_config") || die "Can't open config file";
  ($history, $spool, $active) = (<CONFIG>);
  chop $history; chop $spool; chop $active;
  print STDERR "$history $spool $active\n" if $verbose;
  close(CONFIG);
}

# open the output log files
open(CORE, ">ncc:coredumps") || die "$0: CORE";
open(BOGUS, ">ncc:bogus") || die "$0: BOGUS";
open(MISSING, ">ncc:missing") || die "$0: MISSING";

chdir($spool) || die "$0: $spool: $!";

# scan the active file and build a list of groups and parts
# of groups. Any other directory is bogus.

sub scan_active {
  local (@actparts);
  open(ACT, "<${active}") || die "$0: active: $!";
  print STDERR "scanning active\n" if $verbose;

  while (<ACT>) {
    @actparts = split;
    # if the 4th field is =other.group skip this
    next if @actparts[3] =~ /^=/;
    $group = shift @actparts;
    # Set this as a group
    @is_group{$group} = 1;
    print STDERR "G: $group\n" if $verbose > 1;
    # Now define hierarchy below the group
    while ($group =~ /\./) {
      # drop the last level
      $group =~ s/\.[^.]+$//;
      if (@is_hier{$group}) { $group = ""; }
      else {
        @is_hier{$group} = 1;
        print STDERR "H: $group\n" if $verbose > 1;
      }
    }
  }
}

# scan the news spool recursively, looking for articles; build the internal
# bit string representation that will be checked by check_history.

sub scan {
  local($group, $path, $depth) = @_;
  local($fn, @dirs, $min, $max, @nums, $size, $str, $dir);

  $0 = "scanning $group" if $depth <= 2;
  print STDERR "scanning spool\n" if $depth == 0;

  $dir = $path || ".";
  unless (opendir(DIR, $dir)) {
    warn("$0: $dir: $!");
    return;
  }
  print STDERR "scanning $path\n" if $verbose > 1;

  $min = $max = 0;
  while ($fn = readdir(DIR)) {
    if (-d "${dir}/${fn}") {
      $fullpath = $fullname = "$dir/$fn";
      $fullname =~ s@^\./@@;
      $fullname =~ s^/^.^g;
      next if $fn =~ /\./ || $fn eq "lost+found";
      unless (@is_group{$fullname} || @is_hier{$fullname}) {
        # this dir is not part of the groups
        print BOGUS "$fullpath\n";
        next;
      }
      push(@dirs, $fn);
      next;
    }
    if (!($fn =~ /[1-9][0-9]*/) && $fn ne ".overview") {
      # any filename other than numeric is bogus (ie. core)
      printf(CORE "%s/%s\n", $dir, $fn);
      next;
    }

    $min = $fn if $min == 0 || $min > $fn;
    $max = $fn if $fn > $max;
    push(@nums, $fn);
  }
  closedir(DIR);

  if ($min) {
    $min{$group} = $min;
    $size = ($max-$min+7) / 8;
    $str = "\0" x $size;
    foreach (@nums) {
      vec($str, $_-$min, 1) = 1;
    }
    $arts{$group} = $str;
  }

  $group .= "." if $group;
  $path  .= "/" if $path;
  $depth++;
  foreach $fn (@dirs) {
    &scan("$group$fn", "$path$fn", $depth);
  }
}

# Scan the history file, and clear the bit flags for all articles found there.

sub check_history {
  local($arts, @arts, $group, $num);

  print STDERR "scanning history\n"
    if $verbose;

  open(IN, "<$history") || die "$0: $history: $!";
  while (<IN>) {
    @arts = split;
    shift @arts; shift @arts;	# skip first two fields
    foreach (@arts) {
      ($group, $num) = split(m,/,);
      next unless $num;		# sanity check
      $arts++;
      # clear that bit
      vec($arts{$group}, $num-$min{$group}, 1) = 0
	if defined $min{$group} && $num >= $min{$group};
    }
    print STDERR "$. lines, $arts articles so far\n"
      if $verbose && $. % 50000 == 0;
    $0 = "history line $." if $. % 50000 == 0;
  }
  close(IN);

  print STDERR "done reading history, $arts articles\n" if $verbose;
}

&scan_active;
&scan("", "", 0);
&check_history;

# now check all the bitstrings, and if there are any flags still set,
# report that article.

print STDERR "checking for missing articles\n" if $verbose;
foreach $group (sort keys %arts) {
  $min = $min{$group};
  $str = unpack("b*", $arts{$group});
  delete $arts{$group}; delete $min{$group};	# recover memory?
  next unless $str =~ /1/;

  $path = "";
  for (split(/x*/, $str)) {
    if ($_) {
      unless ($path) {
	$path = $group; $path =~ s,\.,/,g;
	$0 = "findmissing $group";
      }
      print MISSING "$path/$min\n";
    }
    $min++;
  }
}
