Example usage for org.apache.lucene.search IndexSearcher search

List of usage examples for org.apache.lucene.search IndexSearcher search

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher search.

Prototype

public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager)
        throws IOException 

Source Link

Document

Lower-level search API.

Usage

From source file:com.mathworks.xzheng.tools.BooksMoreLikeThis.java

License:Apache License

public static void main(String[] args) throws Throwable {

    String indexDir = System.getProperty("index.dir");
    FSDirectory directory = FSDirectory.open(new File(indexDir));
    IndexReader reader = IndexReader.open(directory);

    IndexSearcher searcher = new IndexSearcher(reader);

    int numDocs = reader.maxDoc();

    MoreLikeThis mlt = new MoreLikeThis(reader); // #A
    mlt.setFieldNames(new String[] { "title", "author" });
    mlt.setMinTermFreq(1); // #B
    mlt.setMinDocFreq(1);//from   www .  j a v a 2  s . c  om

    for (int docID = 0; docID < numDocs; docID++) { // #C
        System.out.println();
        Document doc = reader.document(docID);
        System.out.println(doc.get("title"));

        Query query = mlt.like(docID); // #D
        System.out.println("  query=" + query);

        TopDocs similarDocs = searcher.search(query, 10);
        if (similarDocs.totalHits == 0)
            System.out.println("  None like this");
        for (int i = 0; i < similarDocs.scoreDocs.length; i++) {
            if (similarDocs.scoreDocs[i].doc != docID) { // #E
                doc = reader.document(similarDocs.scoreDocs[i].doc);
                System.out.println("  -> " + doc.getField("title").stringValue());
            }
        }
    }

    reader.close();
    directory.close();
}

From source file:com.mathworks.xzheng.tools.FastVectorHighlighterSample.java

License:Apache License

static void searchIndex(String filename) throws Exception {
    QueryParser parser = new QueryParser(Version.LUCENE_46, F, analyzer);
    Query query = parser.parse(QUERY);
    FastVectorHighlighter highlighter = getHighlighter(); // #C
    FieldQuery fieldQuery = highlighter.getFieldQuery(query); // #D
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
    TopDocs docs = searcher.search(query, 10);

    FileWriter writer = new FileWriter(filename);
    writer.write("<html>");
    writer.write("<body>");
    writer.write("<p>QUERY : " + QUERY + "</p>");
    for (ScoreDoc scoreDoc : docs.scoreDocs) {
        String snippet = highlighter.getBestFragment( // #E
                fieldQuery, searcher.getIndexReader(), // #E
                scoreDoc.doc, F, 100); // #E
        if (snippet != null) {
            writer.write(scoreDoc.doc + " : " + snippet + "<br/>");
        }/* ww w. j  a va 2 s. c  om*/
    }
    writer.write("</body></html>");
    writer.close();

}

From source file:com.mathworks.xzheng.tools.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(TestUtil.getBookIndexDirectory()));
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_46);

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);/*from  w w  w. j  a v a 2 s .  c  o m*/
        String fragment = highlighter.getBestFragment(stream, title);

        System.out.println(fragment);
    }
}

From source file:com.mathworks.xzheng.tools.RegexQueryTest.java

License:Apache License

public void testRegexQuery() throws Exception {
    Directory directory = TestUtil.getBookIndexDirectory();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));
    RegexQuery q = new RegexQuery(new Term("title", ".*st.*"));
    TopDocs hits = searcher.search(q, 10);
    assertEquals(2, hits.totalHits);/*from w  ww .  java2 s .  c  o m*/
    assertTrue(TestUtil.hitsIncludeTitle(searcher, hits, "Tapestry in Action"));
    assertTrue(
            TestUtil.hitsIncludeTitle(searcher, hits, "Mindstorms: Children, Computers, And Powerful Ideas"));

    directory.close();
}

From source file:com.mathworks.xzheng.tools.remote.SearchClient.java

License:Apache License

private static void search(String name, String word) throws Exception {
    TermQuery query = new TermQuery(new Term("word", word));

    IndexSearcher searcher = //2
            (IndexSearcher) searcherCache.get(name); //2

    if (searcher == null) {
        searcher = new IndexSearcher(new MultiReader[] { lookupRemote(name) }); //3
        searcherCache.put(name, searcher);
    }/* w  ww.  j  a va2s.  com*/

    long begin = new Date().getTime(); //4
    TopDocs hits = searcher.search(query, 10); //4
    long end = new Date().getTime(); //4

    System.out.print("Searched " + name + " for '" + word + "' (" + (end - begin) + " ms): ");

    if (hits.scoreDocs.length == 0) {
        System.out.print("<NONE FOUND>");
    }

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String[] values = doc.getValues("syn");
        for (String syn : values) {
            System.out.print(syn + " ");
        }
    }
    System.out.println();
    System.out.println();
    // 5
}

From source file:com.meizu.nlp.classification.BooleanPerceptronClassifier.java

License:Apache License

/**
 * {@inheritDoc}/*w  w  w  .j  a v a 2 s  . c o m*/
 */
@Override
public void train(LeafReader leafReader, String textFieldName, String classFieldName, Analyzer analyzer,
        Query query) throws IOException {
    this.textTerms = MultiFields.getTerms(leafReader, textFieldName);

    if (textTerms == null) {
        throw new IOException("term vectors need to be available for field " + textFieldName);
    }

    this.analyzer = analyzer;
    this.textFieldName = textFieldName;

    if (threshold == null || threshold == 0d) {
        // automatic assign a threshold
        long sumDocFreq = leafReader.getSumDocFreq(textFieldName);
        if (sumDocFreq != -1) {
            this.threshold = (double) sumDocFreq / 2d;
        } else {
            throw new IOException("threshold cannot be assigned since term vectors for field " + textFieldName
                    + " do not exist");
        }
    }

    // TODO : remove this map as soon as we have a writable FST
    SortedMap<String, Double> weights = new TreeMap<>();

    TermsEnum termsEnum = textTerms.iterator();
    BytesRef textTerm;
    while ((textTerm = termsEnum.next()) != null) {
        weights.put(textTerm.utf8ToString(), (double) termsEnum.totalTermFreq());
    }
    updateFST(weights);

    IndexSearcher indexSearcher = new IndexSearcher(leafReader);

    int batchCount = 0;

    BooleanQuery q = new BooleanQuery();
    q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, "*")), BooleanClause.Occur.MUST));
    if (query != null) {
        q.add(new BooleanClause(query, BooleanClause.Occur.MUST));
    }
    // run the search and use stored field values
    for (ScoreDoc scoreDoc : indexSearcher.search(q, Integer.MAX_VALUE).scoreDocs) {
        Document doc = indexSearcher.doc(scoreDoc.doc);

        IndexableField textField = doc.getField(textFieldName);

        // get the expected result
        IndexableField classField = doc.getField(classFieldName);

        if (textField != null && classField != null) {
            // assign class to the doc
            ClassificationResult<Boolean> classificationResult = assignClass(textField.stringValue());
            Boolean assignedClass = classificationResult.getAssignedClass();

            Boolean correctClass = Boolean.valueOf(classField.stringValue());
            long modifier = correctClass.compareTo(assignedClass);
            if (modifier != 0) {
                updateWeights(leafReader, scoreDoc.doc, assignedClass, weights, modifier,
                        batchCount % batchSize == 0);
            }
            batchCount++;
        }
    }
    weights.clear(); // free memory while waiting for GC
}

From source file:com.meizu.nlp.classification.utils.DatasetSplitter.java

License:Apache License

/**
 * Split a given index into 3 indexes for training, test and cross validation tasks respectively
 *
 * @param originalIndex        an {@link org.apache.lucene.index.LeafReader} on the source index
 * @param trainingIndex        a {@link Directory} used to write the training index
 * @param testIndex            a {@link Directory} used to write the test index
 * @param crossValidationIndex a {@link Directory} used to write the cross validation index
 * @param analyzer             {@link Analyzer} used to create the new docs
 * @param fieldNames           names of fields that need to be put in the new indexes or <code>null</code> if all should be used
 * @throws IOException if any writing operation fails on any of the indexes
 *//*  w  w  w .ja va  2s .  c  om*/
public void split(LeafReader originalIndex, Directory trainingIndex, Directory testIndex,
        Directory crossValidationIndex, Analyzer analyzer, String... fieldNames) throws IOException {

    // create IWs for train / test / cv IDXs
    IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(analyzer));
    IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer));
    IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer));

    try {
        int size = originalIndex.maxDoc();

        IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
        TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);

        // set the type to be indexed, stored, with term vectors
        FieldType ft = new FieldType(TextField.TYPE_STORED);
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(true);
        ft.setStoreTermVectorPositions(true);

        int b = 0;

        // iterate over existing documents
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

            // create a new document for indexing
            Document doc = new Document();
            if (fieldNames != null && fieldNames.length > 0) {
                for (String fieldName : fieldNames) {
                    doc.add(new Field(fieldName,
                            originalIndex.document(scoreDoc.doc).getField(fieldName).stringValue(), ft));
                }
            } else {
                for (IndexableField storableField : originalIndex.document(scoreDoc.doc).getFields()) {
                    if (storableField.readerValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.readerValue(), ft));
                    } else if (storableField.binaryValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.binaryValue(), ft));
                    } else if (storableField.stringValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.stringValue(), ft));
                    } else if (storableField.numericValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.numericValue().toString(), ft));
                    }
                }
            }

            // add it to one of the IDXs
            if (b % 2 == 0 && testWriter.maxDoc() < size * testRatio) {
                testWriter.addDocument(doc);
            } else if (cvWriter.maxDoc() < size * crossValidationRatio) {
                cvWriter.addDocument(doc);
            } else {
                trainingWriter.addDocument(doc);
            }
            b++;
        }
    } catch (Exception e) {
        throw new IOException(e);
    } finally {
        testWriter.commit();
        cvWriter.commit();
        trainingWriter.commit();
        // close IWs
        testWriter.close();
        cvWriter.close();
        trainingWriter.close();
    }
}

From source file:com.meizu.nlp.classification.utils.DocToDoubleVectorUtilsTest.java

License:Apache License

@Test
public void testDenseFreqDoubleArrayConversion() throws Exception {
    IndexSearcher indexSearcher = new IndexSearcher(index);
    for (ScoreDoc scoreDoc : indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE).scoreDocs) {
        Terms docTerms = index.getTermVector(scoreDoc.doc, "text");
        Double[] vector = DocToDoubleVectorUtils.toDenseLocalFreqDoubleArray(docTerms);
        assertNotNull(vector);/*from  www .j  av  a2 s . c  o  m*/
        assertTrue(vector.length > 0);
    }
}

From source file:com.meizu.nlp.classification.utils.DocToDoubleVectorUtilsTest.java

License:Apache License

@Test
public void testSparseFreqDoubleArrayConversion() throws Exception {
    Terms fieldTerms = MultiFields.getTerms(index, "text");
    if (fieldTerms != null && fieldTerms.size() != -1) {
        IndexSearcher indexSearcher = new IndexSearcher(index);
        for (ScoreDoc scoreDoc : indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE).scoreDocs) {
            Terms docTerms = index.getTermVector(scoreDoc.doc, "text");
            Double[] vector = DocToDoubleVectorUtils.toSparseLocalFreqDoubleArray(docTerms, fieldTerms);
            assertNotNull(vector);//  w  w w . j av  a2  s.co  m
            assertTrue(vector.length > 0);
        }
    }
}

From source file:com.mikeqian.search.SearchFiles.java

License:Apache License

/**
 * Simple command-line based search demo.
 */// w w w. ja  v a 2s  .  co m
public static void main(String[] args) throws Exception {
    String index = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator")
            + "index-dir";

    System.out.println(index);
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    WordConfTools.set("dic.path", "classpath:dic.txt,classpath:dic_CN.txt");
    Analyzer analyzer = new ChineseWordAnalyzer();

    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}