Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:com.mathworks.xzheng.analysis.codec.MetaphoneAnalyzerTest.java

License:Apache License

public void testKoolKat() throws Exception {
    RAMDirectory directory = new RAMDirectory();
    Analyzer analyzer = new MetaphoneReplacementAnalyzer();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    IndexWriter writer = new IndexWriter(directory, config);

    Document doc = new Document();
    doc.add(new Field("contents", //#A
            "cool cat", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc);/*  ww w. ja va2  s. c o  m*/
    writer.close();

    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));

    Query query = new QueryParser(Version.LUCENE_46, //#B
            "contents", analyzer) //#B
                    .parse("kool kat"); //#B

    TopDocs hits = searcher.search(query, 1);
    assertEquals(1, hits.totalHits); //#C
    int docID = hits.scoreDocs[0].doc;
    doc = searcher.doc(docID);
    assertEquals("cool cat", doc.get("contents")); //#D

}

From source file:com.mathworks.xzheng.extsearch.payloads.PayloadsTest.java

License:Apache License

public void testPayloadTermQuery() throws Throwable {
    addDoc("Hurricane warning",
            "Bulletin: A hurricane warning was issued at " + "6 AM for the outer great banks");
    addDoc("Warning label maker", "The warning label maker is a delightful toy for "
            + "your precocious seven year old's warning needs");
    addDoc("Tornado warning",
            "Bulletin: There is a tornado warning for " + "Worcester county until 6 PM today");

    IndexReader r = writer.getReader();//from ww  w . j  a  v a  2s  .  co  m
    writer.close();

    IndexSearcher searcher = new IndexSearcher(r);

    searcher.setSimilarity(new BoostingSimilarity());

    Term warning = new Term("contents", "warning");

    Query query1 = new TermQuery(warning);
    System.out.println("\nTermQuery results:");
    TopDocs hits = searcher.search(query1, 10);
    TestUtil.dumpHits(searcher, hits);

    assertEquals("Warning label maker", // #B
            searcher.doc(hits.scoreDocs[0].doc).get("title")); // #B

    Query query2 = new PayloadTermQuery(warning, new AveragePayloadFunction());
    System.out.println("\nPayloadTermQuery results:");
    hits = searcher.search(query2, 10);
    TestUtil.dumpHits(searcher, hits);

    assertEquals("Warning label maker", // #C
            searcher.doc(hits.scoreDocs[2].doc).get("title")); // #C
    r.close();

}

From source file:com.mathworks.xzheng.meetlucene.Searcher.java

License:Apache License

public static void search(String indexDir, String q) throws IOException, ParseException {

    Directory dir = FSDirectory.open(new File(indexDir)); //3
    IndexSearcher is = new IndexSearcher(DirectoryReader.open(dir)); //3

    QueryParser parser = new QueryParser(Version.LUCENE_46, // 4
            "contents", //4
            new StandardAnalyzer( //4
                    Version.LUCENE_46)); //4
    Query query = parser.parse(q); //4
    long start = System.currentTimeMillis();
    TopDocs hits = is.search(query, 10); //5
    long end = System.currentTimeMillis();

    System.err.println("Found " + hits.totalHits + //6
            " document(s) (in " + (end - start) + // 6
            " milliseconds) that matched query '" + // 6
            q + "':"); // 6

    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc); //7
        System.out.println(doc.get("fullpath")); //8
    }/*from w  w  w .j ava 2  s  . c o  m*/

    is.getIndexReader().close(); //9
}

From source file:com.mathworks.xzheng.searching.BasicSearchingTest.java

License:Apache License

public void testQueryParser() throws Exception {
    Directory dir = TestUtil.getBookIndexDirectory();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));

    QueryParser parser = new QueryParser(Version.LUCENE_46, //A
            "contents", //A
            new SimpleAnalyzer(Version.LUCENE_46)); //A

    Query query = parser.parse("+JUNIT +ANT -MOCK"); //B
    TopDocs docs = searcher.search(query, 10);
    assertEquals(1, docs.totalHits);/*from w  ww. j a v  a2s  .  co m*/
    Document d = searcher.doc(docs.scoreDocs[0].doc);
    assertEquals("Ant in Action", d.get("title"));

    query = parser.parse("mock OR junit"); //B
    docs = searcher.search(query, 10);
    assertEquals("Ant in Action, " + "JUnit in Action, Second Edition", 2, docs.totalHits);

    dir.close();
}

From source file:com.mathworks.xzheng.searching.Explainer.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: Explainer <index dir> <query>");
        System.exit(1);//from  www  . j  a  va  2s.  c  o m
    }

    String indexDir = args[0];
    String queryExpression = args[1];

    Directory directory = FSDirectory.open(new File(indexDir));
    QueryParser parser = new QueryParser(Version.LUCENE_46, "contents", new SimpleAnalyzer(Version.LUCENE_46));
    Query query = parser.parse(queryExpression);

    System.out.println("Query: " + queryExpression);

    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));
    TopDocs topDocs = searcher.search(query, 10);

    for (ScoreDoc match : topDocs.scoreDocs) {
        Explanation explanation = searcher.explain(query, match.doc); //#A

        System.out.println("----------");
        Document doc = searcher.doc(match.doc);
        System.out.println(doc.get("title"));
        System.out.println(explanation.toString()); //#B
    }

    directory.close();
}

From source file:com.mathworks.xzheng.searching.ScoreTest.java

License:Apache License

public void testFuzzy() throws Exception {
    indexSingleFieldDocs(new Field[] { new Field("contents", "fuzzy", Field.Store.YES, Field.Index.ANALYZED),
            new Field("contents", "wuzzy", Field.Store.YES, Field.Index.ANALYZED) });

    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));
    Query query = new FuzzyQuery(new Term("contents", "wuzza"));
    TopDocs matches = searcher.search(query, 10);
    assertEquals("both close enough", 2, matches.totalHits);

    assertTrue("wuzzy closer than fuzzy", matches.scoreDocs[0].score != matches.scoreDocs[1].score);

    Document doc = searcher.doc(matches.scoreDocs[0].doc);
    assertEquals("wuzza bear", "wuzzy", doc.get("contents"));

}

From source file:com.mathworks.xzheng.tools.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(TestUtil.getBookIndexDirectory()));
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_46);

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);/* ww  w.  j av  a  2s .c  om*/
        String fragment = highlighter.getBestFragment(stream, title);

        System.out.println(fragment);
    }
}

From source file:com.mathworks.xzheng.tools.remote.SearchClient.java

License:Apache License

private static void search(String name, String word) throws Exception {
    TermQuery query = new TermQuery(new Term("word", word));

    IndexSearcher searcher = //2
            (IndexSearcher) searcherCache.get(name); //2

    if (searcher == null) {
        searcher = new IndexSearcher(new MultiReader[] { lookupRemote(name) }); //3
        searcherCache.put(name, searcher);
    }/*from   w w  w .j ava2s.  c om*/

    long begin = new Date().getTime(); //4
    TopDocs hits = searcher.search(query, 10); //4
    long end = new Date().getTime(); //4

    System.out.print("Searched " + name + " for '" + word + "' (" + (end - begin) + " ms): ");

    if (hits.scoreDocs.length == 0) {
        System.out.print("<NONE FOUND>");
    }

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String[] values = doc.getValues("syn");
        for (String syn : values) {
            System.out.print(syn + " ");
        }
    }
    System.out.println();
    System.out.println();
    // 5
}

From source file:com.mathworks.xzheng.tools.SpatialLuceneExample.java

License:Apache License

public void findNear(String what, double latitude, double longitude, double radius)
        throws CorruptIndexException, IOException {
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));

    DistanceQueryBuilder dq;/*  w  w w.j av  a 2 s  .  c o m*/
    dq = new DistanceQueryBuilder(latitude, // #A
            longitude, // #A
            radius, // #A
            latField, // #A
            lngField, // #A
            tierPrefix, // #A
            true); // #A

    Query tq;
    if (what == null)
        tq = new TermQuery(new Term("metafile", "doc")); // #B
    else
        tq = new TermQuery(new Term("name", what));

    DistanceFieldComparatorSource dsort; // #C
    dsort = new DistanceFieldComparatorSource( // #C
            dq.getDistanceFilter()); // #C
    Sort sort = new Sort(new SortField("foo", dsort)); // #C

    TopDocs hits = searcher.search(tq, dq.getFilter(), 10, sort);

    Map<Integer, Double> distances = // #D
            dq.getDistanceFilter().getDistances(); // #D

    System.out.println("Number of results: " + hits.totalHits);
    System.out.println("Found:");
    for (ScoreDoc sd : hits.scoreDocs) {
        int docID = sd.doc;
        Document d = searcher.doc(docID);

        String name = d.get("name");
        double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField));
        double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField));
        Double geo_distance = distances.get(docID);

        System.out.printf(name + ": %.2f Miles\n", geo_distance);
        System.out.println("\t\t(" + rsLat + "," + rsLng + ")");
    }
}

From source file:com.meizu.nlp.classification.BooleanPerceptronClassifier.java

License:Apache License

/**
 * {@inheritDoc}/* ww w .  j  ava  2  s .c o  m*/
 */
@Override
public void train(LeafReader leafReader, String textFieldName, String classFieldName, Analyzer analyzer,
        Query query) throws IOException {
    this.textTerms = MultiFields.getTerms(leafReader, textFieldName);

    if (textTerms == null) {
        throw new IOException("term vectors need to be available for field " + textFieldName);
    }

    this.analyzer = analyzer;
    this.textFieldName = textFieldName;

    if (threshold == null || threshold == 0d) {
        // automatic assign a threshold
        long sumDocFreq = leafReader.getSumDocFreq(textFieldName);
        if (sumDocFreq != -1) {
            this.threshold = (double) sumDocFreq / 2d;
        } else {
            throw new IOException("threshold cannot be assigned since term vectors for field " + textFieldName
                    + " do not exist");
        }
    }

    // TODO : remove this map as soon as we have a writable FST
    SortedMap<String, Double> weights = new TreeMap<>();

    TermsEnum termsEnum = textTerms.iterator();
    BytesRef textTerm;
    while ((textTerm = termsEnum.next()) != null) {
        weights.put(textTerm.utf8ToString(), (double) termsEnum.totalTermFreq());
    }
    updateFST(weights);

    IndexSearcher indexSearcher = new IndexSearcher(leafReader);

    int batchCount = 0;

    BooleanQuery q = new BooleanQuery();
    q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, "*")), BooleanClause.Occur.MUST));
    if (query != null) {
        q.add(new BooleanClause(query, BooleanClause.Occur.MUST));
    }
    // run the search and use stored field values
    for (ScoreDoc scoreDoc : indexSearcher.search(q, Integer.MAX_VALUE).scoreDocs) {
        Document doc = indexSearcher.doc(scoreDoc.doc);

        IndexableField textField = doc.getField(textFieldName);

        // get the expected result
        IndexableField classField = doc.getField(classFieldName);

        if (textField != null && classField != null) {
            // assign class to the doc
            ClassificationResult<Boolean> classificationResult = assignClass(textField.stringValue());
            Boolean assignedClass = classificationResult.getAssignedClass();

            Boolean correctClass = Boolean.valueOf(classField.stringValue());
            long modifier = correctClass.compareTo(assignedClass);
            if (modifier != 0) {
                updateWeights(leafReader, scoreDoc.doc, assignedClass, weights, modifier,
                        batchCount % batchSize == 0);
            }
            batchCount++;
        }
    }
    weights.clear(); // free memory while waiting for GC
}