#!/usr/bin/perl -w

### retrieve_with_LSA.pl

#use lib '../blib/lib', '../blib/arch';

use strict;
use Algorithm::VSM;

print "\nIMPORTANT:  We assume that you have previously called\n\n" .
      "                   retrieve_with_LSA.pl              \n\n" .
      "on the same corpus and that the database files generated by\n" .
      "that call were not overwritten by intervening calls to either\n" .
      "the retrieve_with_LDA() script or the retrieve_with_VSM.pl script.\n\n";

#my @query = qw/ yobj0 yobj1 /;
#my @query = qw/ scope declaration assign member local test void static /;
#my @query = qw/ program listiterator add arraylist args /;
my @query = qw/ string getallchars throw ioexception distinct treemap histogram map /;

#     The three databases mentioned in the next three statements are
#     created by calling the retrieve_with_LSA.pl script.  The first of the
#     databases stores the corpus vocabulary and term frequencies for the
#     vocabulary words.  The second database stores the term frequency
#     vectors for the individual documents in the corpus.  The third stores
#     the reduced dimensionality doc vectors of the LSA model.
my $corpus_vocab_db = "corpus_vocab_db";
my $doc_vectors_db  = "doc_vectors_db";
my $lsa_doc_vectors_db = "lsa_doc_vectors_db";

my $vsm = Algorithm::VSM->new( 
                   corpus_vocab_db          => $corpus_vocab_db,
                   doc_vectors_db           => $doc_vectors_db,
                   lsa_doc_vectors_db       => $lsa_doc_vectors_db,
                   max_number_retrievals    => 10,
#                   debug               => 1,
          );

$vsm->upload_lsa_model_from_disk();

#   Uncomment the following if you would like to see the corpus vocabulary:
#$vsm->display_corpus_vocab();

#   Uncomment the following if you would like to see the doc vectors for
#   each of the documents in the corpus:
#$vsm->display_doc_vectors();

$vsm->construct_lsa_model();

my $retrievals = $vsm->retrieve_with_lsa( \@query );

$vsm->display_retrievals( $retrievals );

