List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:com.mathworks.xzheng.analysis.codec.MetaphoneAnalyzerTest.java
License:Apache License
public void testKoolKat() throws Exception { RAMDirectory directory = new RAMDirectory(); Analyzer analyzer = new MetaphoneReplacementAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, config); Document doc = new Document(); doc.add(new Field("contents", //#A "cool cat", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc);/* ww w. ja va2 s. c o m*/ writer.close(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); Query query = new QueryParser(Version.LUCENE_46, //#B "contents", analyzer) //#B .parse("kool kat"); //#B TopDocs hits = searcher.search(query, 1); assertEquals(1, hits.totalHits); //#C int docID = hits.scoreDocs[0].doc; doc = searcher.doc(docID); assertEquals("cool cat", doc.get("contents")); //#D }
From source file:com.mathworks.xzheng.extsearch.payloads.PayloadsTest.java
License:Apache License
public void testPayloadTermQuery() throws Throwable { addDoc("Hurricane warning", "Bulletin: A hurricane warning was issued at " + "6 AM for the outer great banks"); addDoc("Warning label maker", "The warning label maker is a delightful toy for " + "your precocious seven year old's warning needs"); addDoc("Tornado warning", "Bulletin: There is a tornado warning for " + "Worcester county until 6 PM today"); IndexReader r = writer.getReader();//from ww w . j a v a 2s . co m writer.close(); IndexSearcher searcher = new IndexSearcher(r); searcher.setSimilarity(new BoostingSimilarity()); Term warning = new Term("contents", "warning"); Query query1 = new TermQuery(warning); System.out.println("\nTermQuery results:"); TopDocs hits = searcher.search(query1, 10); TestUtil.dumpHits(searcher, hits); assertEquals("Warning label maker", // #B searcher.doc(hits.scoreDocs[0].doc).get("title")); // #B Query query2 = new PayloadTermQuery(warning, new AveragePayloadFunction()); System.out.println("\nPayloadTermQuery results:"); hits = searcher.search(query2, 10); TestUtil.dumpHits(searcher, hits); assertEquals("Warning label maker", // #C searcher.doc(hits.scoreDocs[2].doc).get("title")); // #C r.close(); }
From source file:com.mathworks.xzheng.meetlucene.Searcher.java
License:Apache License
public static void search(String indexDir, String q) throws IOException, ParseException { Directory dir = FSDirectory.open(new File(indexDir)); //3 IndexSearcher is = new IndexSearcher(DirectoryReader.open(dir)); //3 QueryParser parser = new QueryParser(Version.LUCENE_46, // 4 "contents", //4 new StandardAnalyzer( //4 Version.LUCENE_46)); //4 Query query = parser.parse(q); //4 long start = System.currentTimeMillis(); TopDocs hits = is.search(query, 10); //5 long end = System.currentTimeMillis(); System.err.println("Found " + hits.totalHits + //6 " document(s) (in " + (end - start) + // 6 " milliseconds) that matched query '" + // 6 q + "':"); // 6 for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); //7 System.out.println(doc.get("fullpath")); //8 }/*from w w w .j ava 2 s . c o m*/ is.getIndexReader().close(); //9 }
From source file:com.mathworks.xzheng.searching.BasicSearchingTest.java
License:Apache License
public void testQueryParser() throws Exception { Directory dir = TestUtil.getBookIndexDirectory(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir)); QueryParser parser = new QueryParser(Version.LUCENE_46, //A "contents", //A new SimpleAnalyzer(Version.LUCENE_46)); //A Query query = parser.parse("+JUNIT +ANT -MOCK"); //B TopDocs docs = searcher.search(query, 10); assertEquals(1, docs.totalHits);/*from w ww. j a v a2s . co m*/ Document d = searcher.doc(docs.scoreDocs[0].doc); assertEquals("Ant in Action", d.get("title")); query = parser.parse("mock OR junit"); //B docs = searcher.search(query, 10); assertEquals("Ant in Action, " + "JUnit in Action, Second Edition", 2, docs.totalHits); dir.close(); }
From source file:com.mathworks.xzheng.searching.Explainer.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: Explainer <index dir> <query>"); System.exit(1);//from www . j a va 2s. c o m } String indexDir = args[0]; String queryExpression = args[1]; Directory directory = FSDirectory.open(new File(indexDir)); QueryParser parser = new QueryParser(Version.LUCENE_46, "contents", new SimpleAnalyzer(Version.LUCENE_46)); Query query = parser.parse(queryExpression); System.out.println("Query: " + queryExpression); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); TopDocs topDocs = searcher.search(query, 10); for (ScoreDoc match : topDocs.scoreDocs) { Explanation explanation = searcher.explain(query, match.doc); //#A System.out.println("----------"); Document doc = searcher.doc(match.doc); System.out.println(doc.get("title")); System.out.println(explanation.toString()); //#B } directory.close(); }
From source file:com.mathworks.xzheng.searching.ScoreTest.java
License:Apache License
public void testFuzzy() throws Exception { indexSingleFieldDocs(new Field[] { new Field("contents", "fuzzy", Field.Store.YES, Field.Index.ANALYZED), new Field("contents", "wuzzy", Field.Store.YES, Field.Index.ANALYZED) }); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); Query query = new FuzzyQuery(new Term("contents", "wuzza")); TopDocs matches = searcher.search(query, 10); assertEquals("both close enough", 2, matches.totalHits); assertTrue("wuzzy closer than fuzzy", matches.scoreDocs[0].score != matches.scoreDocs[1].score); Document doc = searcher.doc(matches.scoreDocs[0].doc); assertEquals("wuzza bear", "wuzzy", doc.get("contents")); }
From source file:com.mathworks.xzheng.tools.HighlightTest.java
License:Apache License
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(TestUtil.getBookIndexDirectory())); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_46); for (ScoreDoc sd : hits.scoreDocs) { Document doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer);/* ww w. j av a 2s .c om*/ String fragment = highlighter.getBestFragment(stream, title); System.out.println(fragment); } }
From source file:com.mathworks.xzheng.tools.remote.SearchClient.java
License:Apache License
private static void search(String name, String word) throws Exception { TermQuery query = new TermQuery(new Term("word", word)); IndexSearcher searcher = //2 (IndexSearcher) searcherCache.get(name); //2 if (searcher == null) { searcher = new IndexSearcher(new MultiReader[] { lookupRemote(name) }); //3 searcherCache.put(name, searcher); }/*from w w w .j ava2s. c om*/ long begin = new Date().getTime(); //4 TopDocs hits = searcher.search(query, 10); //4 long end = new Date().getTime(); //4 System.out.print("Searched " + name + " for '" + word + "' (" + (end - begin) + " ms): "); if (hits.scoreDocs.length == 0) { System.out.print("<NONE FOUND>"); } for (ScoreDoc sd : hits.scoreDocs) { Document doc = searcher.doc(sd.doc); String[] values = doc.getValues("syn"); for (String syn : values) { System.out.print(syn + " "); } } System.out.println(); System.out.println(); // 5 }
From source file:com.mathworks.xzheng.tools.SpatialLuceneExample.java
License:Apache License
public void findNear(String what, double latitude, double longitude, double radius) throws CorruptIndexException, IOException { IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); DistanceQueryBuilder dq;/* w w w.j av a 2 s . c o m*/ dq = new DistanceQueryBuilder(latitude, // #A longitude, // #A radius, // #A latField, // #A lngField, // #A tierPrefix, // #A true); // #A Query tq; if (what == null) tq = new TermQuery(new Term("metafile", "doc")); // #B else tq = new TermQuery(new Term("name", what)); DistanceFieldComparatorSource dsort; // #C dsort = new DistanceFieldComparatorSource( // #C dq.getDistanceFilter()); // #C Sort sort = new Sort(new SortField("foo", dsort)); // #C TopDocs hits = searcher.search(tq, dq.getFilter(), 10, sort); Map<Integer, Double> distances = // #D dq.getDistanceFilter().getDistances(); // #D System.out.println("Number of results: " + hits.totalHits); System.out.println("Found:"); for (ScoreDoc sd : hits.scoreDocs) { int docID = sd.doc; Document d = searcher.doc(docID); String name = d.get("name"); double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField)); double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField)); Double geo_distance = distances.get(docID); System.out.printf(name + ": %.2f Miles\n", geo_distance); System.out.println("\t\t(" + rsLat + "," + rsLng + ")"); } }
From source file:com.meizu.nlp.classification.BooleanPerceptronClassifier.java
License:Apache License
/** * {@inheritDoc}/* ww w . j ava 2 s .c o m*/ */ @Override public void train(LeafReader leafReader, String textFieldName, String classFieldName, Analyzer analyzer, Query query) throws IOException { this.textTerms = MultiFields.getTerms(leafReader, textFieldName); if (textTerms == null) { throw new IOException("term vectors need to be available for field " + textFieldName); } this.analyzer = analyzer; this.textFieldName = textFieldName; if (threshold == null || threshold == 0d) { // automatic assign a threshold long sumDocFreq = leafReader.getSumDocFreq(textFieldName); if (sumDocFreq != -1) { this.threshold = (double) sumDocFreq / 2d; } else { throw new IOException("threshold cannot be assigned since term vectors for field " + textFieldName + " do not exist"); } } // TODO : remove this map as soon as we have a writable FST SortedMap<String, Double> weights = new TreeMap<>(); TermsEnum termsEnum = textTerms.iterator(); BytesRef textTerm; while ((textTerm = termsEnum.next()) != null) { weights.put(textTerm.utf8ToString(), (double) termsEnum.totalTermFreq()); } updateFST(weights); IndexSearcher indexSearcher = new IndexSearcher(leafReader); int batchCount = 0; BooleanQuery q = new BooleanQuery(); q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, "*")), BooleanClause.Occur.MUST)); if (query != null) { q.add(new BooleanClause(query, BooleanClause.Occur.MUST)); } // run the search and use stored field values for (ScoreDoc scoreDoc : indexSearcher.search(q, Integer.MAX_VALUE).scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); IndexableField textField = doc.getField(textFieldName); // get the expected result IndexableField classField = doc.getField(classFieldName); if (textField != null && classField != null) { // assign class to the doc ClassificationResult<Boolean> classificationResult = assignClass(textField.stringValue()); Boolean assignedClass = classificationResult.getAssignedClass(); Boolean correctClass = Boolean.valueOf(classField.stringValue()); long modifier = correctClass.compareTo(assignedClass); if (modifier != 0) { updateWeights(leafReader, scoreDoc.doc, assignedClass, weights, modifier, batchCount % batchSize == 0); } batchCount++; } } weights.clear(); // free memory while waiting for GC }