List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
private boolean checkAndDeleteExpiratedDocuments(String link, IndexSearcher searcher, Integer docId, Document doc, long now) throws Throwable { long expiration = 0; boolean hasExpired = false; IndexableField expirationValue = doc.getField(ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS); if (expirationValue != null) { expiration = expirationValue.numericValue().longValue(); hasExpired = expiration <= now; }//from w w w . ja v a 2s . com if (!hasExpired) { return false; } adjustStat(STAT_NAME_DOCUMENT_EXPIRATION_COUNT, 1); // update document with one that has all fields, including binary state doc = searcher.getIndexReader().document(docId, this.fieldsToLoadWithExpand); ServiceDocument s = null; try { s = getStateFromLuceneDocument(doc, link); } catch (Throwable e) { logWarning("Error deserializing state for %s: %s", link, e.getMessage()); } deleteAllDocumentsForSelfLink(Operation.createDelete(null), link, s); return true; }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
private boolean applyIndexSearcherAndFileLimit() { File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory); String[] list = directory.list(); int count = list == null ? 0 : list.length; boolean reOpenWriter = count >= INDEX_FILE_COUNT_THRESHOLD_FOR_WRITER_REFRESH; int searcherCount = this.searchersPendingClose.size(); if (searcherCount < INDEX_SEARCHER_COUNT_THRESHOLD && !reOpenWriter) { return reOpenWriter; }/*from w w w.j a v a 2s.c o m*/ // We always close index searchers before re-opening the index writer, otherwise we risk // loosing pending commits on writer re-open. Notice this code executes if we either have // too many index files on disk, thus we need to re-open the writer to consolidate, or // when we have too many pending searchers final int acquireReleaseCount = QUERY_THREAD_COUNT + UPDATE_THREAD_COUNT; try { if (getHost().isStopping()) { return false; } this.writerAvailable.release(); this.writerAvailable.acquire(acquireReleaseCount); this.searcher = null; logInfo("Closing %d pending searchers, index file count: %d", searcherCount, count); for (IndexSearcher s : this.searchersPendingClose) { try { s.getIndexReader().close(); } catch (Throwable e) { } } this.searchersPendingClose.clear(); IndexWriter w = this.writer; if (w != null) { try { w.deleteUnusedFiles(); } catch (Throwable e) { } } } catch (InterruptedException e1) { logSevere(e1); } finally { // release all but one, so we stay owning one reference to the semaphore this.writerAvailable.release(acquireReleaseCount - 1); } return reOpenWriter; }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
private void applyMemoryLimit() { if (getHost().isStopping()) { return;//w ww. j a v a2 s.c om } long memThresholdBytes = this.linkAccessMemoryLimitMB * 1024 * 1024; final int bytesPerLinkEstimate = 256; int count = 0; synchronized (this.searchSync) { if (this.linkAccessTimes.isEmpty()) { return; } if (memThresholdBytes < this.linkAccessTimes.size() * bytesPerLinkEstimate) { count = this.linkAccessTimes.size(); this.linkAccessTimes.clear(); // force searcher update next time updateSearcher is called if (this.searcher != null) { this.searchersPendingClose.add(this.searcher); } this.searcher = null; } } if (count > 0) { logInfo("Cleared %d link access times", count); } // close any paginated query searchers that have expired long now = Utils.getNowMicrosUtc(); Map<Long, List<IndexSearcher>> entriesToClose = new HashMap<>(); synchronized (this.searchSync) { Iterator<Entry<Long, List<IndexSearcher>>> itr = this.searchersForPaginatedQueries.entrySet() .iterator(); while (itr.hasNext()) { Entry<Long, List<IndexSearcher>> entry = itr.next(); if (entry.getKey() > now) { // all entries beyond this one, are in the future, since we use a sorted tree map break; } entriesToClose.put(entry.getKey(), entry.getValue()); itr.remove(); } setStat(STAT_NAME_ACTIVE_PAGINATED_QUERIES, this.searchersForPaginatedQueries.size()); } for (Entry<Long, List<IndexSearcher>> entry : entriesToClose.entrySet()) { List<IndexSearcher> searchers = entry.getValue(); for (IndexSearcher s : searchers) { try { logFine("Closing paginated query searcher, expired at %d", entry.getKey()); s.getIndexReader().close(); } catch (Throwable e) { } } } }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
private void applyDocumentExpirationPolicy(IndexWriter w) throws Throwable { // if we miss a document update, we will catch it, and refresh the searcher on the // next update or maintenance IndexSearcher s = this.searcher != null ? this.searcher : updateSearcher(null, Integer.MAX_VALUE, w); if (s == null) { return;/* w ww. j a v a2s . c o m*/ } long expirationUpperBound = Utils.getNowMicrosUtc(); Query versionQuery = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS, 1L, expirationUpperBound); TopDocs results = s.search(versionQuery, EXPIRED_DOCUMENT_SEARCH_THRESHOLD); if (results.totalHits == 0) { return; } // The expiration query will return all versions for a link. Use a set so we only delete once per link Set<String> links = new HashSet<>(); long now = Utils.getNowMicrosUtc(); for (ScoreDoc sd : results.scoreDocs) { Document d = s.getIndexReader().document(sd.doc, this.fieldsToLoadNoExpand); String link = d.get(ServiceDocument.FIELD_NAME_SELF_LINK); IndexableField versionField = d.getField(ServiceDocument.FIELD_NAME_VERSION); long versionExpired = versionField.numericValue().longValue(); long latestVersion = this.getLatestVersion(s, link); if (versionExpired < latestVersion) { continue; } if (!links.add(link)) { continue; } checkAndDeleteExpiratedDocuments(link, s, sd.doc, d, now); } // More documents to be expired trigger maintenance right away. if (results.totalHits > EXPIRED_DOCUMENT_SEARCH_THRESHOLD) { adjustStat(STAT_NAME_DOCUMENT_EXPIRATION_FORCED_MAINTENANCE_COUNT, 1); ServiceMaintenanceRequest body = ServiceMaintenanceRequest.create(); Operation servicePost = Operation.createPost(UriUtils.buildUri(getHost(), getSelfLink())) .setReferer(getHost().getUri()).setBody(body); // servicePost can be cached handleMaintenance(servicePost); } }
From source file:com.wrmsr.search.dsl.SearchServiceImpl.java
License:Apache License
@Override public synchronized void commit() throws IOException { checkState(this.indexWriter.isPresent()); Lock lock = indexSearcherLock.writeLock(); try {// w w w.j av a2s. c om lock.lock(); if (this.indexSearcher.isPresent()) { IndexSearcher indexSearcher = this.indexSearcher.get(); indexSearcher.getIndexReader().close(); this.indexSearcher = Optional.empty(); } IndexWriter indexWriter = this.indexWriter.get(); indexWriter.commit(); indexWriter.close(); IndexReader indexReader = IndexReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); this.indexSearcher = Optional.of(indexSearcher); } finally { lock.unlock(); } }
From source file:com.xiaomi.linden.core.LindenUtil.java
License:Apache License
/** * Get fields by doc id./*from w w w . j a v a 2 s. co m*/ * * @param indexSearcher The IndexSearcher * @param docId Doc ID. * @param id Id field value * @param sourceFields Specify the fields, if null get all fields values. * @param config the lindenConfig for search * @return JSON String which contains field values. * @throws IOException */ public static String getSource(IndexSearcher indexSearcher, int docId, String id, List<String> sourceFields, LindenConfig config) throws IOException { List<AtomicReaderContext> leaves = indexSearcher.getIndexReader().leaves(); int idx = ReaderUtil.subIndex(docId, leaves); AtomicReaderContext atomicReaderContext = leaves.get(idx); AtomicReader reader = atomicReaderContext.reader(); int locDocId = docId - atomicReaderContext.docBase; JSONObject src = new JSONObject(); String idFieldName = config.getSchema().getId(); if (id != null) { src.put(idFieldName, id); } else { src.put(idFieldName, FieldCache.DEFAULT.getTerms(reader, idFieldName, false).get(locDocId).utf8ToString()); } List<LindenFieldSchema> fields = new ArrayList<>(); if (sourceFields != null && !sourceFields.isEmpty()) { for (String sourceField : sourceFields) { if (sourceField.equals(idFieldName)) { continue; } LindenFieldSchema fieldSchema = config.getFieldSchema(sourceField); fields.add(fieldSchema); } } else { fields.addAll(config.getSchema().getFields()); } Map<String, LindenFieldSchema> storedFields = new HashMap<>(); for (LindenFieldSchema fieldSchema : fields) { String name = fieldSchema.getName(); boolean fieldCache = false; if (fieldSchema.isMulti()) { /** * multi-field has multiple values, each value is indexed to the document according to field type * multi-field source value is in JSONArray format, something like "["MI4","MI Note","RedMI3"]" * multi-field source value is stored in BinaryDocValues */ String blob = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString(); if (StringUtils.isNotEmpty(blob)) { src.put(name, JSON.parseArray(blob)); } } else if (fieldSchema.isDocValues()) { fieldCache = true; } else if (fieldSchema.isIndexed() && fieldSchema.isStored()) { // field cache doesn't support tokenized string field if (config.isEnableSourceFieldCache() && !possibleTokenizedString(fieldSchema)) { fieldCache = true; } else { storedFields.put(name, fieldSchema); } } else if (fieldSchema.isIndexed()) { if (!possibleTokenizedString(fieldSchema)) { fieldCache = true; } } else if (fieldSchema.isStored()) { storedFields.put(name, fieldSchema); } if (fieldCache) { Object val; switch (fieldSchema.getType()) { case STRING: case FACET: val = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString(); String v = (String) val; fieldCache = !v.isEmpty() || actualContain(reader, name, locDocId); break; case INTEGER: val = FieldCache.DEFAULT.getInts(reader, name, false).get(locDocId); fieldCache = ((int) val) != 0 || actualContain(reader, name, locDocId); break; case LONG: val = FieldCache.DEFAULT.getLongs(reader, name, false).get(locDocId); fieldCache = ((long) val != 0) || actualContain(reader, name, locDocId); break; case FLOAT: val = FieldCache.DEFAULT.getFloats(reader, name, false).get(locDocId); fieldCache = ((float) val != 0) || actualContain(reader, name, locDocId); break; case DOUBLE: val = FieldCache.DEFAULT.getDoubles(reader, name, false).get(locDocId); fieldCache = ((double) val != 0) || actualContain(reader, name, locDocId); break; default: throw new IllegalStateException("Unsupported linden type"); } if (fieldCache) { src.put(name, val); } } } if (!storedFields.isEmpty()) { Document doc = indexSearcher.doc(docId, storedFields.keySet()); for (IndexableField field : doc.getFields()) { String name = field.name(); LindenFieldSchema schema = storedFields.get(name); Object obj = src.get(name); Object val = parseLindenValue(field.stringValue(), storedFields.get(name).getType()); if (obj == null) { if (schema.isMulti()) { JSONArray array = new JSONArray(); array.add(val); src.put(name, array); } else { src.put(name, val); } } else if (obj instanceof JSONArray) { ((JSONArray) obj).add(val); } else { JSONArray array = new JSONArray(); array.add(obj); array.add(val); src.put(name, array); } } } return src.toJSONString(); }
From source file:com.xiaomi.linden.core.search.LindenResultParser.java
License:Apache License
public LindenResultParser(LindenConfig config, LindenSearchRequest request, IndexSearcher indexSearcher, LindenSnippetGenerator snippetGenerator, Query query, Filter filter, Sort sort) { this.config = config; this.request = request; this.indexSearcher = indexSearcher; this.snippetGenerator = snippetGenerator; this.query = query; this.filter = filter; this.sort = sort; this.sortScoreFieldPos = getSortScoreFieldPos(sort); this.leaves = indexSearcher.getIndexReader().leaves(); }
From source file:de.blizzy.documentr.search.PageFinder.java
License:Open Source License
private SearchResult findPages(String searchText, int page, Authentication authentication, IndexSearcher searcher) throws ParseException, IOException, TimeoutException { Future<Query> queryFuture = taskExecutor.submit(new ParseQueryTask(searchText, analyzer)); ListenableFuture<Bits> visibleDocIdsFuture = taskExecutor.submit( new GetVisibleDocIdsTask(searcher, authentication, userStore, permissionEvaluator, taskExecutor)); Query query;// w w w . j a v a2s. c o m TopDocs docs; try { query = queryFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS); Bits visibleDocIds = visibleDocIdsFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS); docs = searcher.search(query, new PagePermissionFilter(visibleDocIds), HITS_PER_PAGE * page); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof ParseException) { throw (ParseException) cause; } else { throw Util.toRuntimeException(cause); } } finally { queryFuture.cancel(false); visibleDocIdsFuture.cancel(false); } int start = HITS_PER_PAGE * (page - 1); int end = Math.min(HITS_PER_PAGE * page, docs.scoreDocs.length); IndexReader reader = searcher.getIndexReader(); List<ListenableFuture<SearchHit>> hitFutures = Lists.newArrayList(); for (int i = start; i < end; i++) { ListenableFuture<SearchHit> hitFuture = taskExecutor .submit(new GetSearchHitTask(query, reader, docs.scoreDocs[i].doc, analyzer)); hitFutures.add(hitFuture); } try { ListenableFuture<List<SearchHit>> allHitsFuture = Futures.allAsList(hitFutures); List<SearchHit> hits = allHitsFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS); return new SearchResult(hits, docs.totalHits, HITS_PER_PAGE); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof IOException) { throw (IOException) cause; } else { throw Util.toRuntimeException(cause); } } finally { for (ListenableFuture<SearchHit> hitFuture : hitFutures) { hitFuture.cancel(false); } } }
From source file:de.blizzy.documentr.search.PageFinder.java
License:Open Source License
private SearchTextSuggestion getSearchTextSuggestion(String searchText, Authentication authentication, IndexSearcher searcher) throws IOException, ParseException, TimeoutException { List<WordPosition> words = Lists.newArrayList(); TokenStream tokenStream = null;/*from w w w. j ava 2 s . com*/ try { tokenStream = analyzer.tokenStream(PageIndex.ALL_TEXT_SUGGESTIONS, new StringReader(searchText)); tokenStream.addAttribute(CharTermAttribute.class); tokenStream.addAttribute(OffsetAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class); String text = charTerm.toString(); if (StringUtils.isNotBlank(text)) { OffsetAttribute offset = tokenStream.getAttribute(OffsetAttribute.class); WordPosition word = new WordPosition(text, offset.startOffset(), offset.endOffset()); words.add(word); } } tokenStream.end(); } finally { Util.closeQuietly(tokenStream); } Collections.reverse(words); StringBuilder suggestedSearchText = new StringBuilder(searchText); StringBuilder suggestedSearchTextHtml = new StringBuilder(searchText); boolean foundSuggestions = false; String now = String.valueOf(System.currentTimeMillis()); String startMarker = "__SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$ String endMarker = "__/SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$ DirectSpellChecker spellChecker = new DirectSpellChecker(); IndexReader reader = searcher.getIndexReader(); for (WordPosition word : words) { Term term = new Term(PageIndex.ALL_TEXT_SUGGESTIONS, word.getWord()); SuggestWord[] suggestions = spellChecker.suggestSimilar(term, 1, reader, SuggestMode.SUGGEST_MORE_POPULAR); if (suggestions.length > 0) { String suggestedWord = suggestions[0].string; int start = word.getStart(); int end = word.getEnd(); suggestedSearchText.replace(start, end, suggestedWord); suggestedSearchTextHtml.replace(start, end, startMarker + StringEscapeUtils.escapeHtml4(suggestedWord) + endMarker); foundSuggestions = true; } } if (foundSuggestions) { String suggestion = suggestedSearchText.toString(); SearchResult suggestionResult = findPages(suggestion, 1, authentication, searcher); int suggestionTotalHits = suggestionResult.getTotalHits(); if (suggestionTotalHits > 0) { String html = StringEscapeUtils.escapeHtml4(suggestedSearchTextHtml.toString()) .replaceAll(startMarker + "(.*?)" + endMarker, "<strong><em>$1</em></strong>"); //$NON-NLS-1$ //$NON-NLS-2$ return new SearchTextSuggestion(suggestedSearchText.toString(), html, suggestionTotalHits); } } return null; }
From source file:de.blizzy.documentr.search.PageIndex.java
License:Open Source License
private SearchResult findPages(String searchText, int page, Authentication authentication, IndexSearcher searcher) throws ParseException, IOException, TimeoutException { Future<Query> queryFuture = taskExecutor.submit(new ParseQueryTask(searchText, analyzer)); Bits visibleDocIds = getVisibleDocIds(searcher, authentication); Query query;/*from w w w .jav a2 s.c om*/ try { query = queryFuture.get(INTERACTIVE_TIMEOUT, TimeUnit.SECONDS); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof ParseException) { throw (ParseException) cause; } else { throw Util.toRuntimeException(cause); } } finally { queryFuture.cancel(false); } TopDocs docs = searcher.search(query, new PagePermissionFilter(visibleDocIds), HITS_PER_PAGE * page); int start = HITS_PER_PAGE * (page - 1); int end = Math.min(HITS_PER_PAGE * page, docs.scoreDocs.length); IndexReader reader = searcher.getIndexReader(); List<ListenableFuture<SearchHit>> hitFutures = Lists.newArrayList(); for (int i = start; i < end; i++) { ListenableFuture<SearchHit> hitFuture = taskExecutor .submit(new GetSearchHitTask(query, reader, docs.scoreDocs[i].doc, analyzer)); hitFutures.add(hitFuture); } try { ListenableFuture<List<SearchHit>> allHitsFuture = Futures.allAsList(hitFutures); List<SearchHit> hits = allHitsFuture.get(INTERACTIVE_TIMEOUT, TimeUnit.SECONDS); return new SearchResult(hits, docs.totalHits, HITS_PER_PAGE); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof IOException) { throw (IOException) cause; } else { throw Util.toRuntimeException(cause); } } finally { for (ListenableFuture<SearchHit> hitFuture : hitFutures) { hitFuture.cancel(false); } } }