Example usage for org.apache.lucene.search IndexSearcher getIndexReader

List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher getIndexReader.

Prototype

public IndexReader getIndexReader() 

Source Link

Document

Return the IndexReader this searches.

Usage

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

private boolean checkAndDeleteExpiratedDocuments(String link, IndexSearcher searcher, Integer docId,
        Document doc, long now) throws Throwable {
    long expiration = 0;
    boolean hasExpired = false;
    IndexableField expirationValue = doc.getField(ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS);
    if (expirationValue != null) {
        expiration = expirationValue.numericValue().longValue();
        hasExpired = expiration <= now;
    }//from  w  w w  .  ja v  a  2s  .  com

    if (!hasExpired) {
        return false;
    }

    adjustStat(STAT_NAME_DOCUMENT_EXPIRATION_COUNT, 1);

    // update document with one that has all fields, including binary state
    doc = searcher.getIndexReader().document(docId, this.fieldsToLoadWithExpand);

    ServiceDocument s = null;
    try {
        s = getStateFromLuceneDocument(doc, link);
    } catch (Throwable e) {
        logWarning("Error deserializing state for %s: %s", link, e.getMessage());
    }

    deleteAllDocumentsForSelfLink(Operation.createDelete(null), link, s);
    return true;
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

private boolean applyIndexSearcherAndFileLimit() {
    File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory);
    String[] list = directory.list();
    int count = list == null ? 0 : list.length;

    boolean reOpenWriter = count >= INDEX_FILE_COUNT_THRESHOLD_FOR_WRITER_REFRESH;

    int searcherCount = this.searchersPendingClose.size();
    if (searcherCount < INDEX_SEARCHER_COUNT_THRESHOLD && !reOpenWriter) {
        return reOpenWriter;
    }/*from   w w  w.j  a  v a 2s.c o  m*/

    // We always close index searchers before re-opening the index writer, otherwise we risk
    // loosing pending commits on writer re-open. Notice this code executes if we either have
    // too many index files on disk, thus we need to re-open the writer to consolidate, or
    // when we have too many pending searchers
    final int acquireReleaseCount = QUERY_THREAD_COUNT + UPDATE_THREAD_COUNT;
    try {
        if (getHost().isStopping()) {
            return false;
        }

        this.writerAvailable.release();
        this.writerAvailable.acquire(acquireReleaseCount);
        this.searcher = null;

        logInfo("Closing %d pending searchers, index file count: %d", searcherCount, count);

        for (IndexSearcher s : this.searchersPendingClose) {
            try {
                s.getIndexReader().close();
            } catch (Throwable e) {
            }
        }
        this.searchersPendingClose.clear();

        IndexWriter w = this.writer;
        if (w != null) {
            try {
                w.deleteUnusedFiles();
            } catch (Throwable e) {
            }
        }

    } catch (InterruptedException e1) {
        logSevere(e1);
    } finally {
        // release all but one, so we stay owning one reference to the semaphore
        this.writerAvailable.release(acquireReleaseCount - 1);
    }

    return reOpenWriter;
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

private void applyMemoryLimit() {
    if (getHost().isStopping()) {
        return;//w  ww. j  a  v a2  s.c om
    }

    long memThresholdBytes = this.linkAccessMemoryLimitMB * 1024 * 1024;
    final int bytesPerLinkEstimate = 256;
    int count = 0;
    synchronized (this.searchSync) {
        if (this.linkAccessTimes.isEmpty()) {
            return;
        }
        if (memThresholdBytes < this.linkAccessTimes.size() * bytesPerLinkEstimate) {
            count = this.linkAccessTimes.size();
            this.linkAccessTimes.clear();
            // force searcher update next time updateSearcher is called
            if (this.searcher != null) {
                this.searchersPendingClose.add(this.searcher);
            }
            this.searcher = null;
        }
    }

    if (count > 0) {
        logInfo("Cleared %d link access times", count);
    }

    // close any paginated query searchers that have expired
    long now = Utils.getNowMicrosUtc();
    Map<Long, List<IndexSearcher>> entriesToClose = new HashMap<>();
    synchronized (this.searchSync) {
        Iterator<Entry<Long, List<IndexSearcher>>> itr = this.searchersForPaginatedQueries.entrySet()
                .iterator();
        while (itr.hasNext()) {
            Entry<Long, List<IndexSearcher>> entry = itr.next();
            if (entry.getKey() > now) {
                // all entries beyond this one, are in the future, since we use a sorted tree map
                break;
            }
            entriesToClose.put(entry.getKey(), entry.getValue());
            itr.remove();
        }
        setStat(STAT_NAME_ACTIVE_PAGINATED_QUERIES, this.searchersForPaginatedQueries.size());
    }

    for (Entry<Long, List<IndexSearcher>> entry : entriesToClose.entrySet()) {
        List<IndexSearcher> searchers = entry.getValue();
        for (IndexSearcher s : searchers) {
            try {
                logFine("Closing paginated query searcher, expired at %d", entry.getKey());
                s.getIndexReader().close();
            } catch (Throwable e) {

            }
        }
    }

}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

private void applyDocumentExpirationPolicy(IndexWriter w) throws Throwable {
    // if we miss a document update, we will catch it, and refresh the searcher on the
    // next update or maintenance
    IndexSearcher s = this.searcher != null ? this.searcher : updateSearcher(null, Integer.MAX_VALUE, w);
    if (s == null) {
        return;/*  w  ww.  j a v  a2s  . c o  m*/
    }

    long expirationUpperBound = Utils.getNowMicrosUtc();

    Query versionQuery = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS, 1L,
            expirationUpperBound);

    TopDocs results = s.search(versionQuery, EXPIRED_DOCUMENT_SEARCH_THRESHOLD);
    if (results.totalHits == 0) {
        return;
    }

    // The expiration query will return all versions for a link. Use a set so we only delete once per link
    Set<String> links = new HashSet<>();
    long now = Utils.getNowMicrosUtc();
    for (ScoreDoc sd : results.scoreDocs) {
        Document d = s.getIndexReader().document(sd.doc, this.fieldsToLoadNoExpand);
        String link = d.get(ServiceDocument.FIELD_NAME_SELF_LINK);
        IndexableField versionField = d.getField(ServiceDocument.FIELD_NAME_VERSION);
        long versionExpired = versionField.numericValue().longValue();
        long latestVersion = this.getLatestVersion(s, link);
        if (versionExpired < latestVersion) {
            continue;
        }
        if (!links.add(link)) {
            continue;
        }
        checkAndDeleteExpiratedDocuments(link, s, sd.doc, d, now);
    }

    // More documents to be expired trigger maintenance right away.
    if (results.totalHits > EXPIRED_DOCUMENT_SEARCH_THRESHOLD) {
        adjustStat(STAT_NAME_DOCUMENT_EXPIRATION_FORCED_MAINTENANCE_COUNT, 1);
        ServiceMaintenanceRequest body = ServiceMaintenanceRequest.create();
        Operation servicePost = Operation.createPost(UriUtils.buildUri(getHost(), getSelfLink()))
                .setReferer(getHost().getUri()).setBody(body);
        // servicePost can be cached
        handleMaintenance(servicePost);
    }
}

From source file:com.wrmsr.search.dsl.SearchServiceImpl.java

License:Apache License

@Override
public synchronized void commit() throws IOException {
    checkState(this.indexWriter.isPresent());

    Lock lock = indexSearcherLock.writeLock();
    try {//  w  w w.j av a2s. c  om
        lock.lock();

        if (this.indexSearcher.isPresent()) {
            IndexSearcher indexSearcher = this.indexSearcher.get();
            indexSearcher.getIndexReader().close();
            this.indexSearcher = Optional.empty();
        }

        IndexWriter indexWriter = this.indexWriter.get();
        indexWriter.commit();
        indexWriter.close();

        IndexReader indexReader = IndexReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        this.indexSearcher = Optional.of(indexSearcher);
    } finally {
        lock.unlock();
    }
}

From source file:com.xiaomi.linden.core.LindenUtil.java

License:Apache License

/**
 * Get fields by doc id./*from w  w w .  j a  v a  2 s.  co m*/
 *
 * @param indexSearcher The IndexSearcher
 * @param docId         Doc ID.
 * @param id            Id field value
 * @param sourceFields  Specify the fields, if null get all fields values.
 * @param config        the lindenConfig for search
 * @return JSON String which contains field values.
 * @throws IOException
 */

public static String getSource(IndexSearcher indexSearcher, int docId, String id, List<String> sourceFields,
        LindenConfig config) throws IOException {
    List<AtomicReaderContext> leaves = indexSearcher.getIndexReader().leaves();
    int idx = ReaderUtil.subIndex(docId, leaves);
    AtomicReaderContext atomicReaderContext = leaves.get(idx);
    AtomicReader reader = atomicReaderContext.reader();
    int locDocId = docId - atomicReaderContext.docBase;
    JSONObject src = new JSONObject();
    String idFieldName = config.getSchema().getId();
    if (id != null) {
        src.put(idFieldName, id);
    } else {
        src.put(idFieldName,
                FieldCache.DEFAULT.getTerms(reader, idFieldName, false).get(locDocId).utf8ToString());
    }

    List<LindenFieldSchema> fields = new ArrayList<>();
    if (sourceFields != null && !sourceFields.isEmpty()) {
        for (String sourceField : sourceFields) {
            if (sourceField.equals(idFieldName)) {
                continue;
            }
            LindenFieldSchema fieldSchema = config.getFieldSchema(sourceField);
            fields.add(fieldSchema);
        }
    } else {
        fields.addAll(config.getSchema().getFields());
    }

    Map<String, LindenFieldSchema> storedFields = new HashMap<>();
    for (LindenFieldSchema fieldSchema : fields) {
        String name = fieldSchema.getName();
        boolean fieldCache = false;
        if (fieldSchema.isMulti()) {
            /**
             * multi-field has multiple values, each value is indexed to the document according to field type
             * multi-field source value is in JSONArray format, something like "["MI4","MI Note","RedMI3"]"
             * multi-field source value is stored in BinaryDocValues
             */
            String blob = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString();
            if (StringUtils.isNotEmpty(blob)) {
                src.put(name, JSON.parseArray(blob));
            }
        } else if (fieldSchema.isDocValues()) {
            fieldCache = true;
        } else if (fieldSchema.isIndexed() && fieldSchema.isStored()) {
            // field cache doesn't support tokenized string field
            if (config.isEnableSourceFieldCache() && !possibleTokenizedString(fieldSchema)) {
                fieldCache = true;
            } else {
                storedFields.put(name, fieldSchema);
            }
        } else if (fieldSchema.isIndexed()) {
            if (!possibleTokenizedString(fieldSchema)) {
                fieldCache = true;
            }
        } else if (fieldSchema.isStored()) {
            storedFields.put(name, fieldSchema);
        }

        if (fieldCache) {
            Object val;
            switch (fieldSchema.getType()) {
            case STRING:
            case FACET:
                val = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString();
                String v = (String) val;
                fieldCache = !v.isEmpty() || actualContain(reader, name, locDocId);
                break;
            case INTEGER:
                val = FieldCache.DEFAULT.getInts(reader, name, false).get(locDocId);
                fieldCache = ((int) val) != 0 || actualContain(reader, name, locDocId);
                break;
            case LONG:
                val = FieldCache.DEFAULT.getLongs(reader, name, false).get(locDocId);
                fieldCache = ((long) val != 0) || actualContain(reader, name, locDocId);
                break;
            case FLOAT:
                val = FieldCache.DEFAULT.getFloats(reader, name, false).get(locDocId);
                fieldCache = ((float) val != 0) || actualContain(reader, name, locDocId);
                break;
            case DOUBLE:
                val = FieldCache.DEFAULT.getDoubles(reader, name, false).get(locDocId);
                fieldCache = ((double) val != 0) || actualContain(reader, name, locDocId);
                break;
            default:
                throw new IllegalStateException("Unsupported linden type");
            }
            if (fieldCache) {
                src.put(name, val);
            }
        }
    }

    if (!storedFields.isEmpty())

    {
        Document doc = indexSearcher.doc(docId, storedFields.keySet());
        for (IndexableField field : doc.getFields()) {
            String name = field.name();
            LindenFieldSchema schema = storedFields.get(name);
            Object obj = src.get(name);
            Object val = parseLindenValue(field.stringValue(), storedFields.get(name).getType());
            if (obj == null) {
                if (schema.isMulti()) {
                    JSONArray array = new JSONArray();
                    array.add(val);
                    src.put(name, array);
                } else {
                    src.put(name, val);
                }
            } else if (obj instanceof JSONArray) {
                ((JSONArray) obj).add(val);
            } else {
                JSONArray array = new JSONArray();
                array.add(obj);
                array.add(val);
                src.put(name, array);
            }
        }
    }
    return src.toJSONString();
}

From source file:com.xiaomi.linden.core.search.LindenResultParser.java

License:Apache License

public LindenResultParser(LindenConfig config, LindenSearchRequest request, IndexSearcher indexSearcher,
        LindenSnippetGenerator snippetGenerator, Query query, Filter filter, Sort sort) {
    this.config = config;
    this.request = request;
    this.indexSearcher = indexSearcher;
    this.snippetGenerator = snippetGenerator;
    this.query = query;
    this.filter = filter;
    this.sort = sort;
    this.sortScoreFieldPos = getSortScoreFieldPos(sort);
    this.leaves = indexSearcher.getIndexReader().leaves();
}

From source file:de.blizzy.documentr.search.PageFinder.java

License:Open Source License

private SearchResult findPages(String searchText, int page, Authentication authentication,
        IndexSearcher searcher) throws ParseException, IOException, TimeoutException {

    Future<Query> queryFuture = taskExecutor.submit(new ParseQueryTask(searchText, analyzer));
    ListenableFuture<Bits> visibleDocIdsFuture = taskExecutor.submit(
            new GetVisibleDocIdsTask(searcher, authentication, userStore, permissionEvaluator, taskExecutor));

    Query query;// w w w  .  j a v a2s. c  o  m
    TopDocs docs;
    try {
        query = queryFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS);
        Bits visibleDocIds = visibleDocIdsFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS);
        docs = searcher.search(query, new PagePermissionFilter(visibleDocIds), HITS_PER_PAGE * page);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    } catch (ExecutionException e) {
        Throwable cause = e.getCause();
        if (cause instanceof ParseException) {
            throw (ParseException) cause;
        } else {
            throw Util.toRuntimeException(cause);
        }
    } finally {
        queryFuture.cancel(false);
        visibleDocIdsFuture.cancel(false);
    }

    int start = HITS_PER_PAGE * (page - 1);
    int end = Math.min(HITS_PER_PAGE * page, docs.scoreDocs.length);
    IndexReader reader = searcher.getIndexReader();
    List<ListenableFuture<SearchHit>> hitFutures = Lists.newArrayList();
    for (int i = start; i < end; i++) {
        ListenableFuture<SearchHit> hitFuture = taskExecutor
                .submit(new GetSearchHitTask(query, reader, docs.scoreDocs[i].doc, analyzer));
        hitFutures.add(hitFuture);
    }

    try {
        ListenableFuture<List<SearchHit>> allHitsFuture = Futures.allAsList(hitFutures);
        List<SearchHit> hits = allHitsFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS);
        return new SearchResult(hits, docs.totalHits, HITS_PER_PAGE);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    } catch (ExecutionException e) {
        Throwable cause = e.getCause();
        if (cause instanceof IOException) {
            throw (IOException) cause;
        } else {
            throw Util.toRuntimeException(cause);
        }
    } finally {
        for (ListenableFuture<SearchHit> hitFuture : hitFutures) {
            hitFuture.cancel(false);
        }
    }
}

From source file:de.blizzy.documentr.search.PageFinder.java

License:Open Source License

private SearchTextSuggestion getSearchTextSuggestion(String searchText, Authentication authentication,
        IndexSearcher searcher) throws IOException, ParseException, TimeoutException {

    List<WordPosition> words = Lists.newArrayList();

    TokenStream tokenStream = null;/*from  w w w.  j  ava 2  s  .  com*/
    try {
        tokenStream = analyzer.tokenStream(PageIndex.ALL_TEXT_SUGGESTIONS, new StringReader(searchText));
        tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.addAttribute(OffsetAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
            String text = charTerm.toString();
            if (StringUtils.isNotBlank(text)) {
                OffsetAttribute offset = tokenStream.getAttribute(OffsetAttribute.class);
                WordPosition word = new WordPosition(text, offset.startOffset(), offset.endOffset());
                words.add(word);
            }
        }
        tokenStream.end();
    } finally {
        Util.closeQuietly(tokenStream);
    }

    Collections.reverse(words);

    StringBuilder suggestedSearchText = new StringBuilder(searchText);
    StringBuilder suggestedSearchTextHtml = new StringBuilder(searchText);
    boolean foundSuggestions = false;
    String now = String.valueOf(System.currentTimeMillis());
    String startMarker = "__SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$
    String endMarker = "__/SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$
    DirectSpellChecker spellChecker = new DirectSpellChecker();
    IndexReader reader = searcher.getIndexReader();
    for (WordPosition word : words) {
        Term term = new Term(PageIndex.ALL_TEXT_SUGGESTIONS, word.getWord());
        SuggestWord[] suggestions = spellChecker.suggestSimilar(term, 1, reader,
                SuggestMode.SUGGEST_MORE_POPULAR);
        if (suggestions.length > 0) {
            String suggestedWord = suggestions[0].string;
            int start = word.getStart();
            int end = word.getEnd();
            suggestedSearchText.replace(start, end, suggestedWord);
            suggestedSearchTextHtml.replace(start, end,
                    startMarker + StringEscapeUtils.escapeHtml4(suggestedWord) + endMarker);

            foundSuggestions = true;
        }
    }

    if (foundSuggestions) {
        String suggestion = suggestedSearchText.toString();
        SearchResult suggestionResult = findPages(suggestion, 1, authentication, searcher);
        int suggestionTotalHits = suggestionResult.getTotalHits();
        if (suggestionTotalHits > 0) {
            String html = StringEscapeUtils.escapeHtml4(suggestedSearchTextHtml.toString())
                    .replaceAll(startMarker + "(.*?)" + endMarker, "<strong><em>$1</em></strong>"); //$NON-NLS-1$ //$NON-NLS-2$
            return new SearchTextSuggestion(suggestedSearchText.toString(), html, suggestionTotalHits);
        }
    }

    return null;
}

From source file:de.blizzy.documentr.search.PageIndex.java

License:Open Source License

private SearchResult findPages(String searchText, int page, Authentication authentication,
        IndexSearcher searcher) throws ParseException, IOException, TimeoutException {

    Future<Query> queryFuture = taskExecutor.submit(new ParseQueryTask(searchText, analyzer));
    Bits visibleDocIds = getVisibleDocIds(searcher, authentication);

    Query query;/*from  w  w  w .jav a2  s.c  om*/
    try {
        query = queryFuture.get(INTERACTIVE_TIMEOUT, TimeUnit.SECONDS);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    } catch (ExecutionException e) {
        Throwable cause = e.getCause();
        if (cause instanceof ParseException) {
            throw (ParseException) cause;
        } else {
            throw Util.toRuntimeException(cause);
        }
    } finally {
        queryFuture.cancel(false);
    }
    TopDocs docs = searcher.search(query, new PagePermissionFilter(visibleDocIds), HITS_PER_PAGE * page);

    int start = HITS_PER_PAGE * (page - 1);
    int end = Math.min(HITS_PER_PAGE * page, docs.scoreDocs.length);
    IndexReader reader = searcher.getIndexReader();
    List<ListenableFuture<SearchHit>> hitFutures = Lists.newArrayList();
    for (int i = start; i < end; i++) {
        ListenableFuture<SearchHit> hitFuture = taskExecutor
                .submit(new GetSearchHitTask(query, reader, docs.scoreDocs[i].doc, analyzer));
        hitFutures.add(hitFuture);
    }

    try {
        ListenableFuture<List<SearchHit>> allHitsFuture = Futures.allAsList(hitFutures);
        List<SearchHit> hits = allHitsFuture.get(INTERACTIVE_TIMEOUT, TimeUnit.SECONDS);
        return new SearchResult(hits, docs.totalHits, HITS_PER_PAGE);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    } catch (ExecutionException e) {
        Throwable cause = e.getCause();
        if (cause instanceof IOException) {
            throw (IOException) cause;
        } else {
            throw Util.toRuntimeException(cause);
        }
    } finally {
        for (ListenableFuture<SearchHit> hitFuture : hitFutures) {
            hitFuture.cancel(false);
        }
    }
}