List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:net.sourceforge.docfetcher.model.search.HighlightServiceTest.java
License:Open Source License
@SuppressWarnings("unchecked") @Test//from w ww . j ava 2 s .c o m public void testPhraseHighlighter() throws Exception { // Create index Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(IndexRegistry.LUCENE_VERSION, Collections.EMPTY_SET); IndexWriterAdapter writer = new IndexWriterAdapter(directory); Document doc = new Document(); doc.add(new Field("content", "some text", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.add(doc); Closeables.closeQuietly(writer); // flush unwritten documents into index // Perform phrase search QueryParser queryParser = new QueryParser(IndexRegistry.LUCENE_VERSION, "content", analyzer); Query query = queryParser.parse("\"text\""); FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, null, null); FieldQuery fieldQuery = highlighter.getFieldQuery(query); IndexSearcher searcher = null; try { searcher = new IndexSearcher(directory); TopDocs docs = searcher.search(query, 10); assertEquals(1, docs.scoreDocs.length); int docId = docs.scoreDocs[0].doc; // Get phrase highlighting offsets FieldTermStack fieldTermStack = new FieldTermStack(searcher.getIndexReader(), docId, "content", fieldQuery); FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery); java.lang.reflect.Field field = fieldPhraseList.getClass().getDeclaredField("phraseList"); field.setAccessible(true); LinkedList<WeightedPhraseInfo> list = (LinkedList<WeightedPhraseInfo>) field.get(fieldPhraseList); assertEquals(5, list.get(0).getStartOffset()); assertEquals(9, list.get(0).getEndOffset()); } finally { Closeables.closeQuietly(searcher); } }
From source file:net.sourceforge.vaticanfetcher.model.search.HighlightServiceTest.java
License:Open Source License
@SuppressWarnings("unchecked") @Test// ww w . j av a 2s . c om public void testPhraseHighlighter() throws Exception { // Create index Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(IndexRegistry.LUCENE_VERSION, Collections.EMPTY_SET); IndexWriterAdapter writer = new IndexWriterAdapter(directory); Document doc = new Document(); doc.add(new Field("content", "some text", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.add(doc); Closeables.closeQuietly(writer); // flush unwritten documents into index // Perform phrase search QueryParser queryParser = new QueryParser(IndexRegistry.LUCENE_VERSION, "content", analyzer); Query query = queryParser.parse("\"text\""); FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, null, null); FieldQuery fieldQuery = highlighter.getFieldQuery(query); IndexSearcher searcher = new IndexSearcher(directory); TopDocs docs = searcher.search(query, 10); assertEquals(1, docs.scoreDocs.length); int docId = docs.scoreDocs[0].doc; // Get phrase highlighting offsets FieldTermStack fieldTermStack = new FieldTermStack(searcher.getIndexReader(), docId, "content", fieldQuery); FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery); java.lang.reflect.Field field = fieldPhraseList.getClass().getDeclaredField("phraseList"); field.setAccessible(true); LinkedList<WeightedPhraseInfo> list = (LinkedList<WeightedPhraseInfo>) field.get(fieldPhraseList); assertEquals(5, list.get(0).getStartOffset()); assertEquals(9, list.get(0).getEndOffset()); }
From source file:net.stargraph.core.impl.lucene.LuceneSearcher.java
License:Open Source License
@Override public long countDocuments() { IndexSearcher idxSearcher = getLuceneSearcher(); if (idxSearcher != null) { return idxSearcher.getIndexReader().numDocs(); }//from w w w.j a v a2s. c o m throw new StarGraphException("Index not found for " + kbId); }
From source file:org.apache.carbondata.datamap.lucene.LuceneFineGrainDataMap.java
License:Apache License
/** * Prune the datamap with filter expression. It returns the list of * blocklets where these filters can exist. *///from www.j a v a 2 s .c o m @Override public List<FineGrainBlocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) throws IOException { // convert filter expr into lucene list query List<String> fields = new ArrayList<String>(); // only for test , query all data String strQuery = getQueryString(filterExp.getFilterExpression()); int maxDocs; try { maxDocs = getMaxDoc(filterExp.getFilterExpression()); } catch (NumberFormatException e) { maxDocs = Integer.MAX_VALUE; } if (null == strQuery) { return null; } String[] sFields = new String[fields.size()]; fields.toArray(sFields); // get analyzer if (analyzer == null) { analyzer = new StandardAnalyzer(); } // use MultiFieldQueryParser to parser query QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer); queryParser.setAllowLeadingWildcard(true); Query query; try { query = queryParser.parse(strQuery); } catch (ParseException e) { String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage()); LOGGER.error(errorMessage); return null; } // temporary data, delete duplicated data // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>> Map<String, Map<Integer, List<Short>>> mapBlocks = new HashMap<>(); long luceneSearchStartTime = System.currentTimeMillis(); for (Map.Entry<String, IndexSearcher> searcherEntry : indexSearcherMap.entrySet()) { IndexSearcher indexSearcher = searcherEntry.getValue(); // take the min of total documents available in the reader and limit if set by the user maxDocs = Math.min(maxDocs, indexSearcher.getIndexReader().maxDoc()); // execute index search TopDocs result = null; // the number of documents to be queried in one search. It will always be minimum of // search result and maxDocs int numberOfDocumentsToBeQueried = 0; // counter for maintaining the total number of documents finished querying int documentHitCounter = 0; try { numberOfDocumentsToBeQueried = Math.min(maxDocs, SEARCH_LIMIT); result = indexSearcher.search(query, numberOfDocumentsToBeQueried); documentHitCounter += numberOfDocumentsToBeQueried; } catch (IOException e) { String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage()); LOGGER.error(errorMessage); throw new IOException(errorMessage); } ByteBuffer intBuffer = ByteBuffer.allocate(4); // last scoreDoc in a result to be used in searchAfter API ScoreDoc lastScoreDoc = null; while (true) { for (ScoreDoc scoreDoc : result.scoreDocs) { // get a document Document doc = indexSearcher.doc(scoreDoc.doc); // get all fields List<IndexableField> fieldsInDoc = doc.getFields(); if (writeCacheSize > 0) { // It fills rowids to the map, its value is combined with multiple rows. fillMapForCombineRows(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey()); } else { // Fill rowids to the map fillMap(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey()); } lastScoreDoc = scoreDoc; } // result will have the total number of hits therefore we always need to query on the // left over documents int remainingHits = result.totalHits - documentHitCounter; // break the loop if count reaches maxDocs to be searched or remaining hits become <=0 if (remainingHits <= 0 || documentHitCounter >= maxDocs) { break; } numberOfDocumentsToBeQueried = Math.min(remainingHits, SEARCH_LIMIT); result = indexSearcher.searchAfter(lastScoreDoc, query, numberOfDocumentsToBeQueried); documentHitCounter += numberOfDocumentsToBeQueried; } } LOGGER.info( "Time taken for lucene search: " + (System.currentTimeMillis() - luceneSearchStartTime) + " ms"); // result blocklets List<FineGrainBlocklet> blocklets = new ArrayList<>(); // transform all blocks into result type blocklets // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>> for (Map.Entry<String, Map<Integer, List<Short>>> mapBlocklet : mapBlocks.entrySet()) { String blockletId = mapBlocklet.getKey(); Map<Integer, List<Short>> mapPageIds = mapBlocklet.getValue(); List<FineGrainBlocklet.Page> pages = new ArrayList<FineGrainBlocklet.Page>(); // for pages in this blocklet Map<PageId, Set<RowId>>> for (Map.Entry<Integer, List<Short>> mapPageId : mapPageIds.entrySet()) { // construct array rowid int[] rowIds = new int[mapPageId.getValue().size()]; int i = 0; // for rowids in this page Set<RowId> for (Short rowid : mapPageId.getValue()) { rowIds[i++] = rowid; } // construct one page FineGrainBlocklet.Page page = new FineGrainBlocklet.Page(); page.setPageId(mapPageId.getKey()); page.setRowId(rowIds); // add this page into list pages pages.add(page); } // add a FineGrainBlocklet blocklets.add(new FineGrainBlocklet(filePath, blockletId, pages)); } return blocklets; }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex.java
License:Apache License
@Override public Cursor query(final IndexPlan plan, NodeState rootState) { final Filter filter = plan.getFilter(); FullTextExpression ft = filter.getFullTextConstraint(); final Set<String> relPaths = getRelativePaths(ft); if (relPaths.size() > 1) { return new MultiLuceneIndex(filter, rootState, relPaths).query(); }//from ww w . j a va 2 s. c o m final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next(); // we only restrict non-full-text conditions if there is // no relative property in the full-text constraint final boolean nonFullTextConstraints = parent.isEmpty(); final int parentDepth = getDepth(parent); QueryEngineSettings settings = filter.getQueryEngineSettings(); Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() { private final Deque<LuceneResultRow> queue = Queues.newArrayDeque(); private final Set<String> seenPaths = Sets.newHashSet(); private ScoreDoc lastDoc; private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE; private boolean noDocs = false; private long lastSearchIndexerVersion; @Override protected LuceneResultRow computeNext() { while (!queue.isEmpty() || loadDocs()) { return queue.remove(); } return endOfData(); } private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt) throws IOException { IndexReader reader = searcher.getIndexReader(); PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); reader.document(doc.doc, visitor); String path = visitor.getPath(); if (path != null) { if ("".equals(path)) { path = "/"; } if (!parent.isEmpty()) { // TODO OAK-828 this breaks node aggregation // get the base path // ensure the path ends with the given // relative path // if (!path.endsWith("/" + parent)) { // continue; // } path = getAncestorPath(path, parentDepth); // avoid duplicate entries if (seenPaths.contains(path)) { return null; } seenPaths.add(path); } return new LuceneResultRow(path, doc.score, excerpt); } return null; } /** * Loads the lucene documents in batches * @return true if any document is loaded */ private boolean loadDocs() { if (noDocs) { return false; } ScoreDoc lastDocToRecord = null; IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH)); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TopDocs docs; long time = System.currentTimeMillis(); checkForIndexVersionChange(searcher); while (true) { if (lastDoc != null) { LOG.debug("loading the next {} entries for query {}", nextBatchSize, query); docs = searcher.searchAfter(lastDoc, query, nextBatchSize); } else { LOG.debug("loading the first {} entries for query {}", nextBatchSize, query); docs = searcher.search(query, nextBatchSize); } time = System.currentTimeMillis() - time; LOG.debug("... took {} ms", time); nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT); boolean addExcerpt = restriction != null && restriction.isNotNullRestriction(); Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); if (addExcerpt) { // setup highlighter QueryScorer scorer = new QueryScorer(query); scorer.setExpandMultiTermQuery(true); highlighter.setFragmentScorer(scorer); } for (ScoreDoc doc : docs.scoreDocs) { String excerpt = null; if (addExcerpt) { excerpt = getExcerpt(analyzer, searcher, doc); } LuceneResultRow row = convertToRow(doc, searcher, excerpt); if (row != null) { queue.add(row); } lastDocToRecord = doc; } if (queue.isEmpty() && docs.scoreDocs.length > 0) { lastDoc = lastDocToRecord; } else { break; } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) { SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade .getLuceneRequest(); noDocs = true; SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery); // ACL filter spellchecks Collection<String> suggestedWords = new ArrayList<String>(suggestWords.length); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().getAnalyzer()); for (SuggestWord suggestion : suggestWords) { Query query = qp.createPhraseQuery(FieldNames.SUGGEST, suggestion.string); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) { suggestedWords.add(suggestion.string); break; } } } } queue.add(new LuceneResultRow(suggestedWords)); } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) { SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade .getLuceneRequest(); noDocs = true; List<Lookup.LookupResult> lookupResults = SuggestHelper .getSuggestions(indexNode.getLookup(), suggestQuery); // ACL filter suggestions Collection<String> suggestedWords = new ArrayList<String>(lookupResults.size()); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.FULLTEXT, indexNode.getDefinition().getAnalyzer()); for (Lookup.LookupResult suggestion : lookupResults) { Query query = qp.createPhraseQuery(FieldNames.FULLTEXT, suggestion.key.toString()); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) { suggestedWords.add( "{term=" + suggestion.key + ",weight=" + suggestion.value + "}"); break; } } } } queue.add(new LuceneResultRow(suggestedWords)); } } catch (IOException e) { LOG.warn("query via {} failed.", LuceneIndex.this, e); } finally { indexNode.release(); } if (lastDocToRecord != null) { this.lastDoc = lastDocToRecord; } return !queue.isEmpty(); } private void checkForIndexVersionChange(IndexSearcher searcher) { long currentVersion = LucenePropertyIndex.getVersion(searcher); if (currentVersion != lastSearchIndexerVersion && lastDoc != null) { lastDoc = null; LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset", currentVersion, lastSearchIndexerVersion); } this.lastSearchIndexerVersion = currentVersion; } }; SizeEstimator sizeEstimator = new SizeEstimator() { @Override public long getSize() { IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH)); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); int totalHits = collector.getTotalHits(); LOG.debug("Estimated size for query {} is {}", query, totalHits); return totalHits; } LOG.debug("Estimated size: not a Query: {}", luceneRequestFacade.getLuceneRequest()); } catch (IOException e) { LOG.warn("query via {} failed.", LuceneIndex.this, e); } finally { indexNode.release(); } return -1; } }; return new LucenePathCursor(itr, settings, sizeEstimator); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex.java
License:Apache License
private String getExcerpt(Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc) throws IOException { StringBuilder excerpt = new StringBuilder(); for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) { String name = field.name(); // only full text or analyzed fields if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) { String text = field.stringValue(); TokenStream tokenStream = analyzer.tokenStream(name, text); try { TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 2); if (textFragments != null && textFragments.length > 0) { for (TextFragment fragment : textFragments) { if (excerpt.length() > 0) { excerpt.append("..."); }//from w ww .j a v a2s. c o m excerpt.append(fragment.toString()); } break; } } catch (InvalidTokenOffsetsException e) { LOG.error("higlighting failed", e); } } } return excerpt.toString(); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java
License:Apache License
@Override public Cursor query(final IndexPlan plan, NodeState rootState) { final Filter filter = plan.getFilter(); final Sort sort = getSort(plan); final PlanResult pr = getPlanResult(plan); QueryEngineSettings settings = filter.getQueryEngineSettings(); Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() { private final Deque<LuceneResultRow> queue = Queues.newArrayDeque(); private final Set<String> seenPaths = Sets.newHashSet(); private ScoreDoc lastDoc; private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE; private boolean noDocs = false; private long lastSearchIndexerVersion; @Override/*from ww w .j a v a 2 s .co m*/ protected LuceneResultRow computeNext() { while (!queue.isEmpty() || loadDocs()) { return queue.remove(); } return endOfData(); } private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt, Facets facets, String explanation) throws IOException { IndexReader reader = searcher.getIndexReader(); //TODO Look into usage of field cache for retrieving the path //instead of reading via reader if no of docs in index are limited PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); reader.document(doc.doc, visitor); String path = visitor.getPath(); if (path != null) { if ("".equals(path)) { path = "/"; } if (pr.isPathTransformed()) { String originalPath = path; path = pr.transformPath(path); if (path == null) { LOG.trace("Ignoring path {} : Transformation returned null", originalPath); return null; } // avoid duplicate entries if (seenPaths.contains(path)) { LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath); return null; } seenPaths.add(path); } LOG.trace("Matched path {}", path); return new LuceneResultRow(path, doc.score, excerpt, facets, explanation); } return null; } /** * Loads the lucene documents in batches * @return true if any document is loaded */ private boolean loadDocs() { if (noDocs) { return false; } ScoreDoc lastDocToRecord = null; final IndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query); if (customScoreQuery != null) { query = customScoreQuery; } checkForIndexVersionChange(searcher); TopDocs docs; long start = PERF_LOGGER.start(); while (true) { if (lastDoc != null) { LOG.debug("loading the next {} entries for query {}", nextBatchSize, query); if (sort == null) { docs = searcher.searchAfter(lastDoc, query, nextBatchSize); } else { docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort); } } else { LOG.debug("loading the first {} entries for query {}", nextBatchSize, query); if (sort == null) { docs = searcher.search(query, nextBatchSize); } else { docs = searcher.search(query, nextBatchSize, sort); } } PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length); nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); long f = PERF_LOGGER.start(); Facets facets = FacetHelper.getFacets(searcher, query, docs, plan, indexNode.getDefinition().isSecureFacets()); PERF_LOGGER.end(f, -1, "facets retrieved"); PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT); boolean addExcerpt = restriction != null && restriction.isNotNullRestriction(); restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION); boolean addExplain = restriction != null && restriction.isNotNullRestriction(); Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); FieldInfos mergedFieldInfos = null; if (addExcerpt) { // setup highlighter QueryScorer scorer = new QueryScorer(query); scorer.setExpandMultiTermQuery(true); highlighter.setFragmentScorer(scorer); mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader()); } for (ScoreDoc doc : docs.scoreDocs) { String excerpt = null; if (addExcerpt) { excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos); } String explanation = null; if (addExplain) { explanation = searcher.explain(query, doc.doc).toString(); } LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation); if (row != null) { queue.add(row); } lastDocToRecord = doc; } if (queue.isEmpty() && docs.scoreDocs.length > 0) { //queue is still empty but more results can be fetched //from Lucene so still continue lastDoc = lastDocToRecord; } else { break; } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) { String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK; noDocs = true; SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade .getLuceneRequest(); SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery); // ACL filter spellchecks QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer()); for (SuggestWord suggestion : suggestWords) { Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string)); query = addDescendantClauseIfRequired(query, plan); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); String prefix = filter.getPath(); if (prefix.length() == 1) { prefix = ""; } if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { queue.add(new LuceneResultRow(suggestion.string)); break; } } } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) { SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade .getLuceneRequest(); noDocs = true; List<Lookup.LookupResult> lookupResults = SuggestHelper .getSuggestions(indexNode.getLookup(), suggestQuery); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : SuggestHelper.getAnalyzer()); // ACL filter suggestions for (Lookup.LookupResult suggestion : lookupResults) { Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\""); query = addDescendantClauseIfRequired(query, plan); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); String prefix = filter.getPath(); if (prefix.length() == 1) { prefix = ""; } if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value)); break; } } } } } } catch (Exception e) { LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); } finally { indexNode.release(); } if (lastDocToRecord != null) { this.lastDoc = lastDocToRecord; } return !queue.isEmpty(); } private void checkForIndexVersionChange(IndexSearcher searcher) { long currentVersion = getVersion(searcher); if (currentVersion != lastSearchIndexerVersion && lastDoc != null) { lastDoc = null; LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset", currentVersion, lastSearchIndexerVersion); } this.lastSearchIndexerVersion = currentVersion; } }; SizeEstimator sizeEstimator = new SizeEstimator() { @Override public long getSize() { IndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); int totalHits = collector.getTotalHits(); LOG.debug("Estimated size for query {} is {}", query, totalHits); return totalHits; } LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest()); } catch (IOException e) { LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); } finally { indexNode.release(); } return -1; } }; return new LucenePathCursor(itr, plan, settings, sizeEstimator); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java
License:Apache License
private String getExcerpt(Query query, Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc, FieldInfos fieldInfos) throws IOException { StringBuilder excerpt = new StringBuilder(); int docID = doc.doc; List<String> names = new LinkedList<String>(); for (IndexableField field : searcher.getIndexReader().document(docID).getFields()) { String name = field.name(); // postings highlighter can be used on analyzed fields with docs, freqs, positions and offsets stored. if (name.startsWith(ANALYZED_FIELD_PREFIX) && fieldInfos.hasProx() && fieldInfos.hasOffsets()) { names.add(name);/*from w w w . jav a2s . c om*/ } } if (names.size() > 0) { int[] maxPassages = new int[names.size()]; for (int i = 0; i < maxPassages.length; i++) { maxPassages[i] = 1; } try { Map<String, String[]> stringMap = postingsHighlighter.highlightFields( names.toArray(new String[names.size()]), query, searcher, new int[] { docID }, maxPassages); for (Map.Entry<String, String[]> entry : stringMap.entrySet()) { String value = Arrays.toString(entry.getValue()); if (value.contains("<b>")) { if (excerpt.length() > 0) { excerpt.append("..."); } excerpt.append(value); } } } catch (Exception e) { LOG.error("postings highlighting failed", e); } } // fallback if no excerpt could be retrieved using postings highlighter if (excerpt.length() == 0) { for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) { String name = field.name(); // only full text or analyzed fields if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) { String text = field.stringValue(); TokenStream tokenStream = analyzer.tokenStream(name, text); try { TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 1); if (textFragments != null && textFragments.length > 0) { for (TextFragment fragment : textFragments) { if (excerpt.length() > 0) { excerpt.append("..."); } excerpt.append(fragment.toString()); } break; } } catch (InvalidTokenOffsetsException e) { LOG.error("higlighting failed", e); } } } } return excerpt.toString(); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java
License:Apache License
static long getVersion(IndexSearcher indexSearcher) { IndexReader reader = indexSearcher.getIndexReader(); if (reader instanceof DirectoryReader) { return ((DirectoryReader) reader).getVersion(); }//from www .j a v a 2s .co m return -1; }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.FacetHelper.java
License:Apache License
public static Facets getFacets(IndexSearcher searcher, Query query, TopDocs docs, QueryIndex.IndexPlan plan, boolean secure) throws IOException { Facets facets = null;/*from w w w .j ava 2 s.com*/ @SuppressWarnings("unchecked") List<String> facetFields = (List<String>) plan.getAttribute(ATTR_FACET_FIELDS); if (facetFields != null && facetFields.size() > 0) { Map<String, Facets> facetsMap = new HashMap<String, Facets>(); for (String facetField : facetFields) { FacetsCollector facetsCollector = new FacetsCollector(); try { DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState( searcher.getIndexReader(), FieldNames.createFacetFieldName(facetField)); FacetsCollector.search(searcher, query, 10, facetsCollector); facetsMap.put(facetField, secure ? new FilteredSortedSetDocValuesFacetCounts(state, facetsCollector, plan.getFilter(), docs) : new SortedSetDocValuesFacetCounts(state, facetsCollector)); } catch (IllegalArgumentException iae) { LOGGER.warn("facets for {} not yet indexed", facetField); } } if (facetsMap.size() > 0) { facets = new MultiFacets(facetsMap); } } return facets; }