Example usage for org.apache.lucene.search IndexSearcher getIndexReader

List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher getIndexReader.

Prototype

public IndexReader getIndexReader() 

Source Link

Document

Return the IndexReader this searches.

Usage

From source file:net.sourceforge.docfetcher.model.search.HighlightServiceTest.java

License:Open Source License

@SuppressWarnings("unchecked")
@Test//from w ww . j ava 2  s .c  o m
public void testPhraseHighlighter() throws Exception {
    // Create index
    Directory directory = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer(IndexRegistry.LUCENE_VERSION, Collections.EMPTY_SET);
    IndexWriterAdapter writer = new IndexWriterAdapter(directory);
    Document doc = new Document();
    doc.add(new Field("content", "some text", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
    writer.add(doc);
    Closeables.closeQuietly(writer); // flush unwritten documents into index

    // Perform phrase search
    QueryParser queryParser = new QueryParser(IndexRegistry.LUCENE_VERSION, "content", analyzer);
    Query query = queryParser.parse("\"text\"");
    FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, null, null);
    FieldQuery fieldQuery = highlighter.getFieldQuery(query);
    IndexSearcher searcher = null;
    try {
        searcher = new IndexSearcher(directory);
        TopDocs docs = searcher.search(query, 10);
        assertEquals(1, docs.scoreDocs.length);
        int docId = docs.scoreDocs[0].doc;

        // Get phrase highlighting offsets
        FieldTermStack fieldTermStack = new FieldTermStack(searcher.getIndexReader(), docId, "content",
                fieldQuery);
        FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery);
        java.lang.reflect.Field field = fieldPhraseList.getClass().getDeclaredField("phraseList");
        field.setAccessible(true);
        LinkedList<WeightedPhraseInfo> list = (LinkedList<WeightedPhraseInfo>) field.get(fieldPhraseList);
        assertEquals(5, list.get(0).getStartOffset());
        assertEquals(9, list.get(0).getEndOffset());
    } finally {
        Closeables.closeQuietly(searcher);
    }
}

From source file:net.sourceforge.vaticanfetcher.model.search.HighlightServiceTest.java

License:Open Source License

@SuppressWarnings("unchecked")
@Test// ww w  .  j av a 2s .  c  om
public void testPhraseHighlighter() throws Exception {
    // Create index
    Directory directory = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer(IndexRegistry.LUCENE_VERSION, Collections.EMPTY_SET);
    IndexWriterAdapter writer = new IndexWriterAdapter(directory);
    Document doc = new Document();
    doc.add(new Field("content", "some text", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
    writer.add(doc);
    Closeables.closeQuietly(writer); // flush unwritten documents into index

    // Perform phrase search
    QueryParser queryParser = new QueryParser(IndexRegistry.LUCENE_VERSION, "content", analyzer);
    Query query = queryParser.parse("\"text\"");
    FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, null, null);
    FieldQuery fieldQuery = highlighter.getFieldQuery(query);
    IndexSearcher searcher = new IndexSearcher(directory);
    TopDocs docs = searcher.search(query, 10);
    assertEquals(1, docs.scoreDocs.length);
    int docId = docs.scoreDocs[0].doc;

    // Get phrase highlighting offsets
    FieldTermStack fieldTermStack = new FieldTermStack(searcher.getIndexReader(), docId, "content", fieldQuery);
    FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery);
    java.lang.reflect.Field field = fieldPhraseList.getClass().getDeclaredField("phraseList");
    field.setAccessible(true);
    LinkedList<WeightedPhraseInfo> list = (LinkedList<WeightedPhraseInfo>) field.get(fieldPhraseList);
    assertEquals(5, list.get(0).getStartOffset());
    assertEquals(9, list.get(0).getEndOffset());
}

From source file:net.stargraph.core.impl.lucene.LuceneSearcher.java

License:Open Source License

@Override
public long countDocuments() {
    IndexSearcher idxSearcher = getLuceneSearcher();
    if (idxSearcher != null) {
        return idxSearcher.getIndexReader().numDocs();
    }//from   w w  w.j  a  v a2s. c o m
    throw new StarGraphException("Index not found for " + kbId);
}

From source file:org.apache.carbondata.datamap.lucene.LuceneFineGrainDataMap.java

License:Apache License

/**
 * Prune the datamap with filter expression. It returns the list of
 * blocklets where these filters can exist.
 *///from www.j a v a 2 s .c o  m
@Override
public List<FineGrainBlocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties,
        List<PartitionSpec> partitions) throws IOException {

    // convert filter expr into lucene list query
    List<String> fields = new ArrayList<String>();

    // only for test , query all data
    String strQuery = getQueryString(filterExp.getFilterExpression());
    int maxDocs;
    try {
        maxDocs = getMaxDoc(filterExp.getFilterExpression());
    } catch (NumberFormatException e) {
        maxDocs = Integer.MAX_VALUE;
    }

    if (null == strQuery) {
        return null;
    }

    String[] sFields = new String[fields.size()];
    fields.toArray(sFields);

    // get analyzer
    if (analyzer == null) {
        analyzer = new StandardAnalyzer();
    }

    // use MultiFieldQueryParser to parser query
    QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer);
    queryParser.setAllowLeadingWildcard(true);
    Query query;
    try {
        query = queryParser.parse(strQuery);
    } catch (ParseException e) {
        String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery,
                e.getMessage());
        LOGGER.error(errorMessage);
        return null;
    }
    // temporary data, delete duplicated data
    // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
    Map<String, Map<Integer, List<Short>>> mapBlocks = new HashMap<>();

    long luceneSearchStartTime = System.currentTimeMillis();
    for (Map.Entry<String, IndexSearcher> searcherEntry : indexSearcherMap.entrySet()) {
        IndexSearcher indexSearcher = searcherEntry.getValue();
        // take the min of total documents available in the reader and limit if set by the user
        maxDocs = Math.min(maxDocs, indexSearcher.getIndexReader().maxDoc());
        // execute index search
        TopDocs result = null;
        // the number of documents to be queried in one search. It will always be minimum of
        // search result and maxDocs
        int numberOfDocumentsToBeQueried = 0;
        // counter for maintaining the total number of documents finished querying
        int documentHitCounter = 0;
        try {
            numberOfDocumentsToBeQueried = Math.min(maxDocs, SEARCH_LIMIT);
            result = indexSearcher.search(query, numberOfDocumentsToBeQueried);
            documentHitCounter += numberOfDocumentsToBeQueried;
        } catch (IOException e) {
            String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage());
            LOGGER.error(errorMessage);
            throw new IOException(errorMessage);
        }

        ByteBuffer intBuffer = ByteBuffer.allocate(4);
        // last scoreDoc in a result to be used in searchAfter API
        ScoreDoc lastScoreDoc = null;
        while (true) {
            for (ScoreDoc scoreDoc : result.scoreDocs) {
                // get a document
                Document doc = indexSearcher.doc(scoreDoc.doc);
                // get all fields
                List<IndexableField> fieldsInDoc = doc.getFields();
                if (writeCacheSize > 0) {
                    // It fills rowids to the map, its value is combined with multiple rows.
                    fillMapForCombineRows(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey());
                } else {
                    // Fill rowids to the map
                    fillMap(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey());
                }
                lastScoreDoc = scoreDoc;
            }
            // result will have the total number of hits therefore we always need to query on the
            // left over documents
            int remainingHits = result.totalHits - documentHitCounter;
            // break the loop if count reaches maxDocs to be searched or remaining hits become <=0
            if (remainingHits <= 0 || documentHitCounter >= maxDocs) {
                break;
            }
            numberOfDocumentsToBeQueried = Math.min(remainingHits, SEARCH_LIMIT);
            result = indexSearcher.searchAfter(lastScoreDoc, query, numberOfDocumentsToBeQueried);
            documentHitCounter += numberOfDocumentsToBeQueried;
        }
    }
    LOGGER.info(
            "Time taken for lucene search: " + (System.currentTimeMillis() - luceneSearchStartTime) + " ms");

    // result blocklets
    List<FineGrainBlocklet> blocklets = new ArrayList<>();

    // transform all blocks into result type blocklets
    // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
    for (Map.Entry<String, Map<Integer, List<Short>>> mapBlocklet : mapBlocks.entrySet()) {
        String blockletId = mapBlocklet.getKey();
        Map<Integer, List<Short>> mapPageIds = mapBlocklet.getValue();
        List<FineGrainBlocklet.Page> pages = new ArrayList<FineGrainBlocklet.Page>();

        // for pages in this blocklet Map<PageId, Set<RowId>>>
        for (Map.Entry<Integer, List<Short>> mapPageId : mapPageIds.entrySet()) {
            // construct array rowid
            int[] rowIds = new int[mapPageId.getValue().size()];
            int i = 0;
            // for rowids in this page Set<RowId>
            for (Short rowid : mapPageId.getValue()) {
                rowIds[i++] = rowid;
            }
            // construct one page
            FineGrainBlocklet.Page page = new FineGrainBlocklet.Page();
            page.setPageId(mapPageId.getKey());
            page.setRowId(rowIds);

            // add this page into list pages
            pages.add(page);
        }

        // add a FineGrainBlocklet
        blocklets.add(new FineGrainBlocklet(filePath, blockletId, pages));
    }

    return blocklets;
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex.java

License:Apache License

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    FullTextExpression ft = filter.getFullTextConstraint();
    final Set<String> relPaths = getRelativePaths(ft);
    if (relPaths.size() > 1) {
        return new MultiLuceneIndex(filter, rootState, relPaths).query();
    }//from  ww  w  .  j  a  va 2  s. c o  m

    final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next();
    // we only restrict non-full-text conditions if there is
    // no relative property in the full-text constraint
    final boolean nonFullTextConstraints = parent.isEmpty();
    final int parentDepth = getDepth(parent);
    QueryEngineSettings settings = filter.getQueryEngineSettings();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
        private final Set<String> seenPaths = Sets.newHashSet();
        private ScoreDoc lastDoc;
        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
        private boolean noDocs = false;
        private long lastSearchIndexerVersion;

        @Override
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt)
                throws IOException {
            IndexReader reader = searcher.getIndexReader();
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (!parent.isEmpty()) {
                    // TODO OAK-828 this breaks node aggregation
                    // get the base path
                    // ensure the path ends with the given
                    // relative path
                    // if (!path.endsWith("/" + parent)) {
                    // continue;
                    // }
                    path = getAncestorPath(path, parentDepth);
                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        return null;
                    }
                    seenPaths.add(path);
                }

                return new LuceneResultRow(path, doc.score, excerpt);
            }
            return null;
        }

        /**
         * Loads the lucene documents in batches
         * @return true if any document is loaded
         */
        private boolean loadDocs() {

            if (noDocs) {
                return false;
            }

            ScoreDoc lastDocToRecord = null;

            IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(),
                        nonFullTextConstraints, indexNode.getDefinition());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TopDocs docs;
                    long time = System.currentTimeMillis();
                    checkForIndexVersionChange(searcher);
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            docs = searcher.search(query, nextBatchSize);
                        }
                        time = System.currentTimeMillis() - time;
                        LOG.debug("... took {} ms", time);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);

                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();

                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();

                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                        }

                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(analyzer, searcher, doc);
                            }

                            LuceneResultRow row = convertToRow(doc, searcher, excerpt);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }

                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade
                            .getLuceneRequest();
                    noDocs = true;
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);

                    // ACL filter spellchecks
                    Collection<String> suggestedWords = new ArrayList<String>(suggestWords.length);
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST,
                            indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(FieldNames.SUGGEST, suggestion.string);
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
                                    suggestedWords.add(suggestion.string);
                                    break;
                                }
                            }
                        }
                    }

                    queue.add(new LuceneResultRow(suggestedWords));
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade
                            .getLuceneRequest();
                    noDocs = true;
                    List<Lookup.LookupResult> lookupResults = SuggestHelper
                            .getSuggestions(indexNode.getLookup(), suggestQuery);

                    // ACL filter suggestions
                    Collection<String> suggestedWords = new ArrayList<String>(lookupResults.size());
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.FULLTEXT,
                            indexNode.getDefinition().getAnalyzer());
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.createPhraseQuery(FieldNames.FULLTEXT, suggestion.key.toString());
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
                                    suggestedWords.add(
                                            "{term=" + suggestion.key + ",weight=" + suggestion.value + "}");
                                    break;
                                }
                            }
                        }
                    }

                    queue.add(new LuceneResultRow(suggestedWords));
                }
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LuceneIndex.this, e);
            } finally {
                indexNode.release();
            }

            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }

            return !queue.isEmpty();
        }

        private void checkForIndexVersionChange(IndexSearcher searcher) {
            long currentVersion = LucenePropertyIndex.getVersion(searcher);
            if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
                lastDoc = null;
                LOG.debug("Change in index version detected {} => {}. Query would be performed without "
                        + "offset", currentVersion, lastSearchIndexerVersion);
            }
            this.lastSearchIndexerVersion = currentVersion;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {
        @Override
        public long getSize() {
            IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(),
                        nonFullTextConstraints, indexNode.getDefinition());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("Estimated size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LuceneIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, settings, sizeEstimator);
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex.java

License:Apache License

private String getExcerpt(Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc) throws IOException {
    StringBuilder excerpt = new StringBuilder();

    for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) {
        String name = field.name();
        // only full text or analyzed fields
        if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) {
            String text = field.stringValue();
            TokenStream tokenStream = analyzer.tokenStream(name, text);
            try {
                TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 2);
                if (textFragments != null && textFragments.length > 0) {
                    for (TextFragment fragment : textFragments) {
                        if (excerpt.length() > 0) {
                            excerpt.append("...");
                        }//from   w  ww  .j a v  a2s.  c o  m
                        excerpt.append(fragment.toString());
                    }
                    break;
                }
            } catch (InvalidTokenOffsetsException e) {
                LOG.error("higlighting failed", e);
            }
        }
    }
    return excerpt.toString();
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java

License:Apache License

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    final Sort sort = getSort(plan);
    final PlanResult pr = getPlanResult(plan);
    QueryEngineSettings settings = filter.getQueryEngineSettings();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
        private final Set<String> seenPaths = Sets.newHashSet();
        private ScoreDoc lastDoc;
        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
        private boolean noDocs = false;
        private long lastSearchIndexerVersion;

        @Override/*from  ww w .j  a v  a  2  s  .co  m*/
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt,
                Facets facets, String explanation) throws IOException {
            IndexReader reader = searcher.getIndexReader();
            //TODO Look into usage of field cache for retrieving the path
            //instead of reading via reader if no of docs in index are limited
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (pr.isPathTransformed()) {
                    String originalPath = path;
                    path = pr.transformPath(path);

                    if (path == null) {
                        LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
                        return null;
                    }

                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
                        return null;
                    }
                    seenPaths.add(path);
                }

                LOG.trace("Matched path {}", path);
                return new LuceneResultRow(path, doc.score, excerpt, facets, explanation);
            }
            return null;
        }

        /**
         * Loads the lucene documents in batches
         * @return true if any document is loaded
         */
        private boolean loadDocs() {

            if (noDocs) {
                return false;
            }

            ScoreDoc lastDocToRecord = null;

            final IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory,
                        searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();

                    CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query);

                    if (customScoreQuery != null) {
                        query = customScoreQuery;
                    }

                    checkForIndexVersionChange(searcher);

                    TopDocs docs;
                    long start = PERF_LOGGER.start();
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                            } else {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
                            }
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.search(query, nextBatchSize);
                            } else {
                                docs = searcher.search(query, nextBatchSize, sort);
                            }
                        }
                        PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);

                        long f = PERF_LOGGER.start();
                        Facets facets = FacetHelper.getFacets(searcher, query, docs, plan,
                                indexNode.getDefinition().isSecureFacets());
                        PERF_LOGGER.end(f, -1, "facets retrieved");

                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();

                        restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION);
                        boolean addExplain = restriction != null && restriction.isNotNullRestriction();

                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();

                        FieldInfos mergedFieldInfos = null;
                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                            mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
                        }

                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos);
                            }

                            String explanation = null;
                            if (addExplain) {
                                explanation = searcher.explain(query, doc.doc).toString();
                            }

                            LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }

                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            //queue is still empty but more results can be fetched
                            //from Lucene so still continue
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT
                            : FieldNames.SPELLCHECK;
                    noDocs = true;
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade
                            .getLuceneRequest();
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);

                    // ACL filter spellchecks
                    QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField,
                            indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(aclCheckField,
                                QueryParserBase.escape(suggestion.string));

                        query = addDescendantClauseIfRequired(query, plan);

                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.string));
                                    break;
                                }
                            }
                        }
                    }

                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade
                            .getLuceneRequest();
                    noDocs = true;

                    List<Lookup.LookupResult> lookupResults = SuggestHelper
                            .getSuggestions(indexNode.getLookup(), suggestQuery);

                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST,
                            indexNode.getDefinition().isSuggestAnalyzed()
                                    ? indexNode.getDefinition().getAnalyzer()
                                    : SuggestHelper.getAnalyzer());

                    // ACL filter suggestions
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");

                        query = addDescendantClauseIfRequired(query, plan);

                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value));
                                    break;
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }

            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }

            return !queue.isEmpty();
        }

        private void checkForIndexVersionChange(IndexSearcher searcher) {
            long currentVersion = getVersion(searcher);
            if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
                lastDoc = null;
                LOG.debug("Change in index version detected {} => {}. Query would be performed without "
                        + "offset", currentVersion, lastSearchIndexerVersion);
            }
            this.lastSearchIndexerVersion = currentVersion;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {
        @Override
        public long getSize() {
            IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory,
                        searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java

License:Apache License

private String getExcerpt(Query query, Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc,
        FieldInfos fieldInfos) throws IOException {
    StringBuilder excerpt = new StringBuilder();
    int docID = doc.doc;
    List<String> names = new LinkedList<String>();

    for (IndexableField field : searcher.getIndexReader().document(docID).getFields()) {
        String name = field.name();
        // postings highlighter can be used on analyzed fields with docs, freqs, positions and offsets stored.
        if (name.startsWith(ANALYZED_FIELD_PREFIX) && fieldInfos.hasProx() && fieldInfos.hasOffsets()) {
            names.add(name);/*from   w  w  w  .  jav a2s  . c om*/
        }
    }

    if (names.size() > 0) {
        int[] maxPassages = new int[names.size()];
        for (int i = 0; i < maxPassages.length; i++) {
            maxPassages[i] = 1;
        }
        try {
            Map<String, String[]> stringMap = postingsHighlighter.highlightFields(
                    names.toArray(new String[names.size()]), query, searcher, new int[] { docID }, maxPassages);
            for (Map.Entry<String, String[]> entry : stringMap.entrySet()) {
                String value = Arrays.toString(entry.getValue());
                if (value.contains("<b>")) {
                    if (excerpt.length() > 0) {
                        excerpt.append("...");
                    }
                    excerpt.append(value);
                }
            }
        } catch (Exception e) {
            LOG.error("postings highlighting failed", e);
        }
    }

    // fallback if no excerpt could be retrieved using postings highlighter
    if (excerpt.length() == 0) {

        for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) {
            String name = field.name();
            // only full text or analyzed fields
            if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) {
                String text = field.stringValue();
                TokenStream tokenStream = analyzer.tokenStream(name, text);

                try {
                    TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 1);
                    if (textFragments != null && textFragments.length > 0) {
                        for (TextFragment fragment : textFragments) {
                            if (excerpt.length() > 0) {
                                excerpt.append("...");
                            }
                            excerpt.append(fragment.toString());
                        }
                        break;
                    }
                } catch (InvalidTokenOffsetsException e) {
                    LOG.error("higlighting failed", e);
                }
            }
        }
    }
    return excerpt.toString();
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java

License:Apache License

static long getVersion(IndexSearcher indexSearcher) {
    IndexReader reader = indexSearcher.getIndexReader();
    if (reader instanceof DirectoryReader) {
        return ((DirectoryReader) reader).getVersion();
    }//from www .j  a v a 2s .co  m
    return -1;
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.FacetHelper.java

License:Apache License

public static Facets getFacets(IndexSearcher searcher, Query query, TopDocs docs, QueryIndex.IndexPlan plan,
        boolean secure) throws IOException {
    Facets facets = null;/*from w w  w  .j ava 2 s.com*/
    @SuppressWarnings("unchecked")
    List<String> facetFields = (List<String>) plan.getAttribute(ATTR_FACET_FIELDS);
    if (facetFields != null && facetFields.size() > 0) {
        Map<String, Facets> facetsMap = new HashMap<String, Facets>();

        for (String facetField : facetFields) {
            FacetsCollector facetsCollector = new FacetsCollector();
            try {
                DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
                        searcher.getIndexReader(), FieldNames.createFacetFieldName(facetField));
                FacetsCollector.search(searcher, query, 10, facetsCollector);
                facetsMap.put(facetField,
                        secure ? new FilteredSortedSetDocValuesFacetCounts(state, facetsCollector,
                                plan.getFilter(), docs)
                                : new SortedSetDocValuesFacetCounts(state, facetsCollector));

            } catch (IllegalArgumentException iae) {
                LOGGER.warn("facets for {} not yet indexed", facetField);
            }
        }
        if (facetsMap.size() > 0) {
            facets = new MultiFacets(facetsMap);
        }

    }
    return facets;
}