Example usage for org.apache.lucene.index IndexReader close

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader close.

Prototype

@Override
public final synchronized void close() throws IOException

Source Link

Document

Closes files associated with this index.

Usage

From source file:ca.gnewton.lusql.core.ViewIndex.java

License:Apache License

/**
 * Describe <code>main</code> method here.
 *
 * @param args a <code>String</code> value
 *//*from w  w w. java  2s  .com*/
public static final void main(final String[] args) {
    try {
        IndexReader reader = IndexReader.open(FSDirectory.open(new File(args[0])));
        System.out.println("# of documents indexed: " + (reader.maxDoc() - 1));

        int maxDoc = reader.maxDoc();
        for (int i = 0; i < maxDoc; i++) {
            Document doc = reader.document(i);
            printDoc(doc, i);

        }

        reader.close();
    } catch (Throwable t) {
        t.printStackTrace();
    }

}

From source file:cc.twittertools.index.ExtractTermStatisticsFromIndex.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("index").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("min").create(MIN_OPTION));

    CommandLine cmdline = null;//from ww w  . j  a  va  2  s. c o m
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(ExtractTermStatisticsFromIndex.class.getName(), options);
        System.exit(-1);
    }

    String indexLocation = cmdline.getOptionValue(INDEX_OPTION);
    int min = cmdline.hasOption(MIN_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MIN_OPTION)) : 1;

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
    Terms terms = SlowCompositeReaderWrapper.wrap(reader).terms(StatusField.TEXT.name);
    TermsEnum termsEnum = terms.iterator(TermsEnum.EMPTY);

    long missingCnt = 0;
    int skippedTerms = 0;
    BytesRef bytes = new BytesRef();
    while ((bytes = termsEnum.next()) != null) {
        byte[] buf = new byte[bytes.length];
        System.arraycopy(bytes.bytes, 0, buf, 0, bytes.length);
        String term = new String(buf, "UTF-8");
        int df = termsEnum.docFreq();
        long cf = termsEnum.totalTermFreq();

        if (df < min) {
            skippedTerms++;
            missingCnt += cf;
            continue;
        }

        out.println(term + "\t" + df + "\t" + cf);
    }

    reader.close();
    out.close();
    System.err.println("skipped terms: " + skippedTerms + ", cnt: " + missingCnt);
}

From source file:cc.twittertools.index.ExtractTweetidsFromIndex.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(/*from   w w w.  j  a  v a  2  s.c o  m*/
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(ExtractTweetidsFromIndex.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    PrintStream out = new PrintStream(System.out, true, "UTF-8");
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        out.println(doc.getField(StatusField.ID.name).stringValue() + "\t"
                + doc.getField(StatusField.SCREEN_NAME.name).stringValue());
    }
    out.close();
    reader.close();
}

From source file:cc.twittertools.search.local.RunQueries.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(//from  w w  w  .j  a v a 2 s. c  om
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing topics in TREC format").create(QUERIES_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(RunQueries.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;

    String topicsFile = cmdline.getOptionValue(QUERIES_OPTION);

    int numResults = 1000;
    try {
        if (cmdline.hasOption(NUM_RESULTS_OPTION)) {
            numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION));
        }
    } catch (NumberFormatException e) {
        System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION));
        System.exit(-1);
    }

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER);

    TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile));
    for (TrecTopic topic : topics) {
        Query query = p.parse(topic.getQuery());
        Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(),
                true, true);

        TopDocs rs = searcher.search(query, filter, numResults);

        int i = 1;
        for (ScoreDoc scoreDoc : rs.scoreDocs) {
            Document hit = searcher.doc(scoreDoc.doc);
            out.println(String.format("%s Q0 %s %d %f %s", topic.getId(),
                    hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag));
            if (verbose) {
                out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
            }
            i++;
        }
    }
    reader.close();
    out.close();
}

From source file:cc.twittertools.search.local.SearchStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(/*  w  w w  .ja  v a 2 s.  c  o m*/
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(SearchStatuses.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID;
    String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q;
    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;
    long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION))
            : DEFAULT_MAX_ID;
    int numResults = cmdline.hasOption(NUM_RESULTS_OPTION)
            ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION))
            : DEFAULT_NUM_RESULTS;
    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name,
            IndexStatuses.ANALYZER);
    Query query = p.parse(queryText);
    Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true);

    TopDocs rs = searcher.search(query, filter, numResults);

    int i = 1;
    for (ScoreDoc scoreDoc : rs.scoreDocs) {
        Document hit = searcher.doc(scoreDoc.doc);

        out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i,
                scoreDoc.score, runtag));
        if (verbose) {
            out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
        }
        i++;
    }

    reader.close();
    out.close();
}

From source file:ccc.plugins.search.lucene.SimpleLuceneFS.java

License:Open Source License

private void similar(final String uuid, final int maxHits, final CapturingHandler ch) {
    if (uuid == null) {
        return;//w w w.j  a  v  a 2  s.  c om
    }
    IndexReader ir = null;
    IndexSearcher searcher = null;
    try {
        ir = IndexReader.open(createDirectory());
        searcher = new IndexSearcher(ir);
        final int docNum = docNumber(uuid, searcher);

        if (docNum == -1) {
            return;
        }
        final MoreLikeThis mlt = new MoreLikeThis(ir);
        mlt.setFieldNames(new String[] { DEFAULT_FIELD });
        mlt.setMinDocFreq(2);
        final Query query = mlt.like(docNum);
        ch.handle(searcher, searcher.search(query, maxHits));
    } catch (final IOException e) {
        LOG.warn("Error performing query.", e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (final IOException e) {
                Exceptions.swallow(e);
            }
        }
        if (ir != null) {
            try {
                ir.close();
            } catch (final IOException e) {
                Exceptions.swallow(e);
            }
        }
    }
}

From source file:choco.lucene.IKAnalyzerDemo.java

License:Apache License

public static void main(String[] args) {
    //Lucene Document?? 
    String fieldName = "text";
    // //  w w  w . j  a v a2  s  . c o m
    String text = "IK Analyzer???????";
    //IKAnalyzer? 
    Analyzer analyzer = new IKAnalyzer();
    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        // 
        directory = new RAMDirectory();
        //?IndexWriterConfig 
        IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_34, analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //          
        Document doc = new Document();
        doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
        iwriter.addDocument(doc);
        iwriter.close();
        //?********************************** 
        //? 
        ireader = IndexReader.open(directory);
        isearcher = new IndexSearcher(ireader);
        String keyword = "?";
        //QueryParser?Query 
        QueryParser qp = new QueryParser(Version.LUCENE_34, fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        //?5? 
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        // 
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }
    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:ci6226.eval_index_reader.java

public eval_index_reader(Analyzer _analyzer, String _dir, String[] _searchList, int _topn)
        throws IOException, org.apache.lucene.queryparser.classic.ParseException, InvalidTokenOffsetsException {
    String indexdir = "./" + _dir;
    String field = "text";
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexdir)));
    IndexSearcher searcher = new IndexSearcher(reader);
    PrintWriter writer = new PrintWriter(_dir + ".csv", "UTF-8");

    Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer);

    searcher.setSimilarity(new similarity_tf_rm());
    Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer);

    searcher.setSimilarity(new similiarty_queryNorm());
    Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer);

    writer.close();//w w w. j av  a2s  .  c om
    reader.close();
    /// searcher.setSimilarity(null);

}

From source file:ci6226.facetsearch.java

public static void main(String[] args) throws Exception {
    String index = "./myindex";
    String field = "text";
    String queries = null;/*from   ww  w .  j a  va  2s . c  om*/
    int hitsPerPage = 10;
    boolean raw = false;

    //http://lucene.apache.org/core/4_0_0/facet/org/apache/lucene/facet/doc-files/userguide.html#facet_accumulation

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    // :Post-Release-Update-Version.LUCENE_XY:

    //TODO: SAME AS HOW U BUILD INDEX
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    // :Post-Release-Update-Version.LUCENE_XY:
    QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer);
    while (true) {

        System.out.println("Enter query: ");
        String line = in.readLine();
        line = line.trim();
        if (line.length() == 0) {
            break;
        }
        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));
        Date start = new Date();
        searcher.search(query, null, 100);
        Date end = new Date();
        System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        TopDocs results = searcher.search(query, 5 * hitsPerPage);
        ScoreDoc[] hits = results.scoreDocs;
        int numTotalHits = results.totalHits;

        //N= max docs
        //df = totoal matched doc
        //idf=log(N/df)

        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.doc(hits[i].doc);
            System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score);
            String rtext = doc.get(field);
            System.out.println("Text=\t" + rtext);

            Terms vector = reader.getTermVector(i, "text");
            if (vector == null)
                continue;
            // System.out.println(vector.getSumDocFreq());

            // Terms vector = reader.getTermVector(hits[i].doc, field);  //hits[i].doc=docID
            TermsEnum termsEnum = vector.iterator(null);
            termsEnum = vector.iterator(termsEnum);
            Map<String, Integer> frequencies = new HashMap<>();
            BytesRef text = null;
            while ((text = termsEnum.next()) != null) {
                String term = text.utf8ToString();
                int freq = (int) termsEnum.totalTermFreq();
                frequencies.put(term, freq);
                // System.out.println("Time: "+term + " idef "+freq);
            }

        }

        //   String[] facetCatlog={""};

        System.out.println(numTotalHits + " total matching documents");

    }

    reader.close();
}

From source file:ci6226.loadIndex.java

public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);/*from  ww  w .j av a2 s  .c  o  m*/
    }
    String index = "./myindex";
    String field = "text";
    String queries = null;
    int hitsPerPage = 10;
    boolean raw = false;

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    // :Post-Release-Update-Version.LUCENE_XY:

    //TODO: SAME AS HOW U BUILD INDEX
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    // :Post-Release-Update-Version.LUCENE_XY:
    QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer);
    while (true) {

        System.out.println("Enter query: ");
        String line = in.readLine();
        line = line.trim();
        if (line.length() == 0) {
            break;
        }
        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));
        Date start = new Date();
        searcher.search(query, null, 100);
        Date end = new Date();
        System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        doPagingSearch(in, searcher, query, hitsPerPage, raw, true, analyzer);
    }

    reader.close();
}