List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:org.talend.dataquality.standardization.query.FirstNameStandardize.java
License:Open Source License
@SuppressWarnings("unused") private TopDocsCollector<?> createTopDocsCollector() throws IOException { // TODO the goal is to sort the result in descending order according to the "count" field if (SORT_WITH_COUNT) { // TODO enable this when it works correctly SortField sortfield = new SortField(PluginConstant.FIRST_NAME_STANDARDIZE_COUNT, SortField.INT); Sort sort = new Sort(sortfield); // results are sorted according to a score and then to the count value return TopFieldCollector.create(sort, hitsPerPage, false, false, false, false); } else {// ww w. j a v a 2 s.com return TopScoreDocCollector.create(hitsPerPage, false); } }
From source file:org.tallison.lucene.contrast.QueryToCorpusContraster.java
License:Apache License
public List<TermIDF> contrast(Query query, String fieldName, int numResults) throws IOException { TopScoreDocCollector results = TopScoreDocCollector.create(maxDocs, maxDocs + 10000); searcher.search(query, results);//w ww .j a v a 2 s . com ScoreDoc[] scoreDocs = results.topDocs().scoreDocs; //if there are fewer documents than minTermFreq //return empty list now if (scoreDocs.length < minTermFreq) { return new ArrayList<TermIDF>(); } //total hack int initialSize = scoreDocs.length * 100; CharArrayMap<MutableValueInt> map = new CharArrayMap<MutableValueInt>(initialSize, ignoreCase); CharArraySet tmpSet = new CharArraySet(100, ignoreCase); Set<String> selector = new HashSet<String>(); selector.add(fieldName); for (ScoreDoc scoreDoc : scoreDocs) { //get terms from doc processDoc(scoreDoc.doc, fieldName, selector, tmpSet); //now update global doc freqs Iterator<Object> it = tmpSet.iterator(); while (it.hasNext()) { char[] token = (char[]) it.next(); MutableValueInt docCount = map.get(token, 0, token.length); if (docCount == null) { docCount = new MutableValueInt(); docCount.value = 1; } else { docCount.value++; } map.put(token, docCount); } tmpSet.clear(); } return getResults(fieldName, map, numResults); }
From source file:org.tallison.lucene.queryparser.spans.TestOverallSpanQueryParser.java
License:Apache License
private void compareHits(SpanQueryParser p, String s, IndexSearcher searcher, int... docids) throws Exception { Query q = p.parse(s);/*from ww w .j a v a 2 s . co m*/ TopScoreDocCollector results = TopScoreDocCollector.create(1000, 10000); searcher.search(q, results); ScoreDoc[] scoreDocs = results.topDocs().scoreDocs; Set<Integer> hits = new HashSet<>(); for (int i = 0; i < scoreDocs.length; i++) { hits.add(scoreDocs[i].doc); } assertEquals(docids.length, hits.size()); for (int i = 0; i < docids.length; i++) { assertTrue("couldn't find " + Integer.toString(docids[i]) + " among the hits", hits.contains(docids[i])); } }
From source file:org.tallison.lucene.queryparser.spans.TestOverallSpanQueryParser.java
License:Apache License
private void assertHits(String qString, SpanQueryParser p, IndexSearcher s, int expected) throws Exception { Query q = p.parse(qString);//from w ww . j a v a 2 s . c o m TopScoreDocCollector results = TopScoreDocCollector.create(1000, 10000); s.search(q, results); ScoreDoc[] scoreDocs = results.topDocs().scoreDocs; assertEquals(qString, expected, scoreDocs.length); }
From source file:org.uberfire.metadata.io.BatchIndexTest.java
License:Apache License
@Test public void testIndex() throws IOException, InterruptedException { {/* ww w .j a v a2 s . c om*/ final Path file = ioService().get("git://temp-repo-test/path/to/file.txt"); ioService().write(file, "some content here", Collections.<OpenOption>emptySet(), new FileAttribute<Object>() { @Override public String name() { return "dcore.author"; } @Override public Object value() { return "My User Name Here"; } }, new FileAttribute<Object>() { @Override public String name() { return "dcore.lastModification"; } @Override public Object value() { return new Date(); } }, new FileAttribute<Object>() { @Override public String name() { return "dcore.comment"; } @Override public Object value() { return "initial document version, should be revised later."; } }); } { final Path file = ioService().get("git://temp-repo-test/path/to/some/complex/file.txt"); ioService().write(file, "some other content here", Collections.<OpenOption>emptySet(), new FileAttribute<Object>() { @Override public String name() { return "dcore.author"; } @Override public Object value() { return "My Second User Name"; } }, new FileAttribute<Object>() { @Override public String name() { return "dcore.lastModification"; } @Override public Object value() { return new Date(); } }, new FileAttribute<Object>() { @Override public String name() { return "dcore.comment"; } @Override public Object value() { return "important document, should be used right now."; } }); } { final Path file = ioService().get("git://temp-repo-test/simple.doc"); ioService().write(file, "some doc content here", Collections.<OpenOption>emptySet(), new FileAttribute<Object>() { @Override public String name() { return "dcore.author"; } @Override public Object value() { return "My Original User"; } }, new FileAttribute<Object>() { @Override public String name() { return "dcore.lastModification"; } @Override public Object value() { return new Date(); } }, new FileAttribute<Object>() { @Override public String name() { return "dcore.comment"; } @Override public Object value() { return "unlock document updated, should be checked by boss."; } }); } { final Path file = ioService().get("git://temp-repo-test/xxx/simple.xls"); ioService().write(file, "plans!?"); } new BatchIndex(config.getIndexEngine(), ioService(), DublinCoreView.class) .run(ioService().get("git://temp-repo-test/"), new Runnable() { @Override public void run() { try { final LuceneIndex index = config.getIndexManager() .get(toKCluster(ioService().get("git://temp-repo-test/").getFileSystem())); final IndexSearcher searcher = index.nrtSearcher(); { final TopScoreDocCollector collector = TopScoreDocCollector.create(10, true); searcher.search(new MatchAllDocsQuery(), collector); final ScoreDoc[] hits = collector.topDocs().scoreDocs; assertEquals(4, hits.length); } { final TopScoreDocCollector collector = TopScoreDocCollector.create(10, true); searcher.search(new TermQuery(new Term("dcore.author", "name")), collector); final ScoreDoc[] hits = collector.topDocs().scoreDocs; assertEquals(2, hits.length); } { final TopScoreDocCollector collector = TopScoreDocCollector.create(10, true); searcher.search(new TermQuery(new Term("dcore.author", "second")), collector); final ScoreDoc[] hits = collector.topDocs().scoreDocs; assertEquals(1, hits.length); } index.nrtRelease(searcher); } catch (Exception ex) { ex.printStackTrace(); fail(); } } }); }
From source file:org.uberfire.metadata.io.IOServiceIndexedGitImplTest.java
License:Apache License
@Test public void testIndexedFile() throws IOException, InterruptedException { final Path newOtherPath = getDirectoryPath().resolveSibling("someNewOtherPath"); ioService().write(newOtherPath.resolve("dummy"), "<none>"); final Path path = newOtherPath.resolve("myIndexedFile.txt"); ioService().write(path, "ooooo!", Collections.<OpenOption>emptySet(), new FileAttribute<Object>() { @Override/*from w w w . j av a2 s . c o m*/ public String name() { return "custom"; } @Override public Object value() { return dateValue; } }, new FileAttribute<String>() { @Override public String name() { return "int.hello"; } @Override public String value() { return "hello some world jhere"; } }, new FileAttribute<Integer>() { @Override public String name() { return "int"; } @Override public Integer value() { return 10; } }); ioService().write(newOtherPath.resolve("myOtherIndexedFile.txt"), "ooooo!", Collections.<OpenOption>emptySet(), new FileAttribute<String>() { @Override public String name() { return "int.hello"; } @Override public String value() { return "jhere"; } }); Thread.sleep(5000); //wait for events to be consumed from jgit -> (notify changes -> watcher -> index) -> lucene index assertNotNull(config.getMetaModelStore().getMetaObject(Path.class.getName())); assertNotNull(config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("int")); assertNotNull(config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("int.hello")); assertNotNull(config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("custom")); assertNotNull(config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("int")); assertNotNull(config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("int.hello")); assertNotNull(config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("custom")); assertEquals(1, config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("int").getTypes() .size()); assertEquals(1, config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("int.hello") .getTypes().size()); assertEquals(1, config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("custom") .getTypes().size()); assertTrue(config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("int").getTypes() .contains(Integer.class)); assertTrue(config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("int.hello") .getTypes().contains(String.class)); assertTrue(config.getMetaModelStore().getMetaObject(Path.class.getName()).getProperty("custom").getTypes() .contains(Date.class)); final LuceneIndex index = config.getIndexManager().get(toKCluster(newOtherPath.getFileSystem())); final IndexSearcher searcher = index.nrtSearcher(); { final TopScoreDocCollector collector = TopScoreDocCollector.create(10, true); searcher.search(new TermQuery(new Term("int.hello", "world")), collector); final ScoreDoc[] hits = collector.topDocs().scoreDocs; assertEquals(1, hits.length); } { final TopScoreDocCollector collector = TopScoreDocCollector.create(10, true); searcher.search(new TermQuery(new Term("int.hello", "jhere")), collector); final ScoreDoc[] hits = collector.topDocs().scoreDocs; assertEquals(2, hits.length); } { final TopScoreDocCollector collector = TopScoreDocCollector.create(10, true); searcher.search(new MatchAllDocsQuery(), collector); final ScoreDoc[] hits = collector.topDocs().scoreDocs; assertEquals(2, hits.length); } index.nrtRelease(searcher); }
From source file:org.watermint.sourcecolon.org.opensolaris.opengrok.search.SearchEngine.java
License:Open Source License
/** * @param paging whether to use paging (if yes, first X pages will load faster) * @param root which db to search/*from w w w . jav a2 s . com*/ * @throws IOException */ private void searchSingleDatabase(File root, boolean paging) throws IOException { IndexReader ireader = IndexReader.open(FSDirectory.open(root)); searcher = new IndexSearcher(ireader); collector = TopScoreDocCollector.create(hitsPerPage * cachePages, docsScoredInOrder); searcher.search(query, collector); totalHits = collector.getTotalHits(); if (!paging) { collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder); searcher.search(query, collector); } hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); docs.add(d); } }
From source file:org.watermint.sourcecolon.org.opensolaris.opengrok.search.SearchEngine.java
License:Open Source License
/** * @param paging whether to use paging (if yes, first X pages will load faster) * @param root list of projects to search * @throws IOException/* ww w. ja v a 2 s . c o m*/ */ private void searchMultiDatabase(List<Project> root, boolean paging) throws IOException { IndexReader[] searchables = new IndexReader[root.size()]; File droot = new File(RuntimeEnvironment.getInstance().getDataRootFile(), "index"); int ii = 0; for (Project project : root) { searchables[ii++] = (IndexReader.open(FSDirectory.open(new File(droot, project.getPath())))); } searcher = new IndexSearcher(new MultiReader(searchables)); collector = TopScoreDocCollector.create(hitsPerPage * cachePages, docsScoredInOrder); searcher.search(query, collector); totalHits = collector.getTotalHits(); if (!paging) { collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder); searcher.search(query, collector); } hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); docs.add(d); } }
From source file:org.watermint.sourcecolon.org.opensolaris.opengrok.search.SearchEngine.java
License:Open Source License
/** * get results , if no search was started before, no results are returned * this method will requery if end end is more than first query from search, * hence performance hit applies, if you want results in later pages than number of cachePages * also end has to be bigger than start ! * * @param start start of the hit list//from w ww.jav a 2 s. c o m * @param end end of the hit list * @param ret list of results from start to end or null/empty if no search was started */ public void results(int start, int end, List<Hit> ret) { //return if no start search() was done if (hits == null || (end < start)) { ret.clear(); return; } ret.clear(); //TODO check if below fits for if end=old hits.length, or it should include it if (end > hits.length & !allCollected) { //do the requery, we want more than 5 pages collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder); try { searcher.search(query, collector); } catch (Exception e) { // this exception should never be hit, since search() will hit this before log.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e); } hits = collector.topDocs().scoreDocs; Document d = null; for (int i = start; i < hits.length; i++) { int docId = hits[i].doc; try { d = searcher.doc(docId); } catch (Exception e) { log.log(Level.SEVERE, SEARCH_EXCEPTION_MSG, e); } docs.add(d); } allCollected = true; } //TODO generation of ret(results) could be cashed and consumers of engine would just print them in whatever form they need, this way we could get rid of docs // the only problem is that count of docs is usually smaller than number of results for (int ii = start; ii < end; ++ii) { boolean alt = (ii % 2 == 0); boolean hasContext = false; try { Document doc = docs.get(ii); String filename = doc.get("path"); Genre genre = Genre.get(doc.get("t")); Definitions tags = null; Fieldable tagsField = doc.getFieldable("tags"); if (tagsField != null) { tags = Definitions.deserialize(tagsField.getBinaryValue()); } int nhits = docs.size(); if (sourceContext != null) { try { if (Genre.PLAIN == genre && (source != null)) { hasContext = sourceContext.getContext( IOUtils.readerWithCharsetDetect(source + filename), null, null, null, filename, tags, nhits > 100, ret); } else if (Genre.XREFABLE == genre && data != null && summarizer != null) { int l = 0; Reader r = null; if (RuntimeEnvironment.getInstance().isCompressXref()) { r = new TagFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream( new FileInputStream(data + "/xref" + filename + ".gz"))))); } else { r = new TagFilter(new BufferedReader(new FileReader(data + "/xref" + filename))); } try { l = r.read(content); } finally { IOUtils.close(r); } //TODO FIX below fragmenter according to either summarizer or context (to get line numbers, might be hard, since xref writers will need to be fixed too, they generate just one line of html code now :( ) Summary sum = summarizer.getSummary(new String(content, 0, l)); Fragment fragments[] = sum.getFragments(); for (Fragment fragment : fragments) { String match = fragment.toString(); if (match.length() > 0) { if (!fragment.isEllipsis()) { Hit hit = new Hit(filename, fragment.toString(), "", true, alt); ret.add(hit); } hasContext = true; } } } else { log.warning("Unknown genre: " + genre + " for " + filename); hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, ret); } } catch (FileNotFoundException exp) { log.warning("Couldn't read summary from " + filename + " (" + exp.getMessage() + ")"); hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, ret); } } if (!hasContext) { ret.add(new Hit(filename, "...", "", false, alt)); } } catch (IOException | ClassNotFoundException e) { log.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e); } } }
From source file:org.wso2.carbon.analytics.dataservice.indexing.AnalyticsDataIndexer.java
License:Open Source License
private List<SearchResultEntry> search(int tenantId, String tableName, String language, String query, int start, int count, String shardId) throws AnalyticsIndexException { List<SearchResultEntry> result = new ArrayList<SearchResultEntry>(); String shardedTableId = this.generateShardedTableId(tenantId, tableName, shardId); IndexReader reader = null;//from w w w . j a v a 2s . c om try { reader = DirectoryReader.open(this.lookupIndexDir(shardedTableId)); IndexSearcher searcher = new IndexSearcher(reader); Map<String, IndexType> indices = this.lookupIndices(tenantId, tableName); Query indexQuery = new AnalyticsQueryParser(DEFAULT_ANALYZER, indices).parse(query); TopScoreDocCollector collector = TopScoreDocCollector.create(count, true); searcher.search(indexQuery, collector); ScoreDoc[] hits = collector.topDocs(start).scoreDocs; Document indexDoc; for (ScoreDoc doc : hits) { indexDoc = searcher.doc(doc.doc); result.add(new SearchResultEntry(indexDoc.get(INDEX_ID_INTERNAL_FIELD), doc.score)); } return result; } catch (Exception e) { throw new AnalyticsIndexException( "Error in index search, shard table id: '" + shardedTableId + "': " + e.getMessage(), e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { log.error("Error in closing the reader: " + e.getMessage(), e); ; } } } }