List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID, Set<String> fieldsToLoad) throws IOException
.getIndexReader().document(docID, fieldsToLoad)
From source file:com.b2international.snowowl.snomed.api.impl.ClassificationRunIndex.java
License:Apache License
public void invalidateClassificationRuns() throws IOException { final Query statusQuery = Fields.newQuery().field(FIELD_STATUS, ClassificationStatus.COMPLETED.name()) .field(FIELD_STATUS, ClassificationStatus.RUNNING.name()) .field(FIELD_STATUS, ClassificationStatus.SAVING_IN_PROGRESS.name()) .field(FIELD_STATUS, ClassificationStatus.SCHEDULED.name()).matchAny(); final Query query = Fields.newQuery().field(FIELD_CLASS, ClassificationRun.class.getSimpleName()) .and(statusQuery).matchAll(); IndexSearcher searcher = null; try {/*w w w . j a va2 s. com*/ searcher = manager.acquire(); final TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); final int totalHits = collector.getTotalHits(); final int docsToRetrieve = Ints.min(searcher.getIndexReader().maxDoc(), totalHits); if (docsToRetrieve < 1) { return; } final TopDocs docs = searcher.search(query, docsToRetrieve, Sort.INDEXORDER, false, false); final ScoreDoc[] scoreDocs = docs.scoreDocs; final ObjectReader reader = objectMapper.reader(ClassificationRun.class); for (int i = 0; i < scoreDocs.length; i++) { final Document sourceDocument = searcher.doc(scoreDocs[i].doc, ImmutableSet.of(FIELD_BRANCH_PATH, FIELD_SOURCE)); final String branchPath = sourceDocument.get(FIELD_BRANCH_PATH); final String source = sourceDocument.get(FIELD_SOURCE); final ClassificationRun run = reader.readValue(source); run.setStatus(ClassificationStatus.STALE); upsertClassificationRunNoCommit(branchPath, run); } commit(); } finally { if (null != searcher) { manager.release(searcher); } } }
From source file:com.b2international.snowowl.snomed.api.impl.ClassificationRunIndex.java
License:Apache License
private <T> List<T> search(final Query query, final Class<? extends T> sourceClass, Sort sort, final int offset, final int limit) throws IOException { IndexSearcher searcher = null; try {// w w w . j a v a 2 s. co m searcher = manager.acquire(); final TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); final int totalHits = collector.getTotalHits(); final int saturatedSum = Ints.saturatedCast((long) offset + limit); final int docsToRetrieve = Ints.min(saturatedSum, searcher.getIndexReader().maxDoc(), totalHits); final ImmutableList.Builder<T> resultBuilder = ImmutableList.builder(); if (docsToRetrieve < 1) { return resultBuilder.build(); } final TopDocs docs = searcher.search(query, docsToRetrieve, sort, false, false); final ScoreDoc[] scoreDocs = docs.scoreDocs; final ObjectReader reader = objectMapper.reader(sourceClass); for (int i = offset; i < docsToRetrieve && i < scoreDocs.length; i++) { final Document sourceDocument = searcher.doc(scoreDocs[i].doc, ImmutableSet.of(FIELD_SOURCE)); final String source = sourceDocument.get(FIELD_SOURCE); final T deserializedSource = reader.readValue(source); resultBuilder.add(deserializedSource); } return resultBuilder.build(); } finally { if (null != searcher) { manager.release(searcher); } } }
From source file:com.ibm.jaql.lang.expr.index.ProbeLuceneFn.java
License:Apache License
@Override public JsonIterator iter(Context context) throws Exception { JsonRecord fd = (JsonRecord) exprs[0].eval(context); if (fd == null) { return JsonIterator.NULL; }/*from w ww.ja v a 2s .c o m*/ JsonString loc = (JsonString) fd.get(new JsonString("location")); if (loc == null) { return JsonIterator.NULL; } JsonString jquery = (JsonString) exprs[1].eval(context); if (jquery == null) { return JsonIterator.NULL; } HashSet<String> fields = null; JsonIterator iter = exprs[2].iter(context); for (JsonValue sv : iter) { JsonString s = (JsonString) sv; if (s != null) { if (fields == null) { fields = new HashSet<String>(); } fields.add(s.toString()); } } final FieldSelector fieldSelector = (fields == null) ? null : new SetBasedFieldSelector(fields, new HashSet<String>()); final IndexSearcher searcher = new IndexSearcher(loc.toString()); Analyzer analyzer = new StandardAnalyzer(); QueryParser qp = new QueryParser("key", analyzer); Query query = qp.parse(jquery.toString()); query = searcher.rewrite(query); final Scorer scorer = query.weight(searcher).scorer(searcher.getIndexReader()); final BufferedJsonRecord rec = new BufferedJsonRecord(); final JsonString jdoc = new JsonString("doc"); final MutableJsonLong jdocid = new MutableJsonLong(); return new JsonIterator(rec) { @Override public boolean moveNext() throws Exception { if (!scorer.next()) { return false; } rec.clear(); int i = scorer.doc(); jdocid.set(i); rec.add(jdoc, jdocid); if (fieldSelector != null) { Document doc = searcher.doc(i, fieldSelector); for (Object x : doc.getFields()) { Field f = (Field) x; String name = f.name(); byte[] val = f.binaryValue(); ByteArrayInputStream bais = new ByteArrayInputStream(val); // TODO: reuse DataInputStream in = new DataInputStream(bais); // TODO: reuse JsonValue ival = serializer.read(in, null); rec.add(new JsonString(name), ival); } } return true; // currentValue == rec } }; }
From source file:com.stratio.cassandra.index.LuceneIndex.java
License:Apache License
/** * Finds the top {@code count} hits for {@code query}, applying {@code clusteringKeyFilter} if non-null, and sorting * the hits by the criteria in {@code sortFields}. * * @param query The {@link Query} to search for. * @param sort The {@link Sort} to be applied. * @param after The starting {@link SearchResult}. * @param count Return only the top {@code count} results. * @param fieldsToLoad The name of the fields to be loaded. * @return The found documents, sorted according to the supplied {@link Sort} instance. *///from w w w . j a v a 2 s . c o m public List<SearchResult> search(Query query, Sort sort, SearchResult after, Integer count, Set<String> fieldsToLoad, boolean usesRelevance) { Log.debug("Searching by query %s", query); try { IndexSearcher searcher = searcherManager.acquire(); try { // Search ScoreDoc start = after == null ? null : after.getScoreDoc(); TopDocs topDocs = topDocs(searcher, query, sort, start, count, usesRelevance); ScoreDoc[] scoreDocs = topDocs.scoreDocs; // Collect the documents from query result List<SearchResult> searchResults = new ArrayList<>(scoreDocs.length); for (ScoreDoc scoreDoc : scoreDocs) { Document document = searcher.doc(scoreDoc.doc, fieldsToLoad); SearchResult searchResult = rowMapper.searchResult(document, scoreDoc); searchResults.add(searchResult); } return searchResults; } finally { searcherManager.release(searcher); } } catch (IOException e) { Log.error(e, "Error while searching by query %s", query); throw new RuntimeException(e); } }
From source file:com.stratio.cassandra.lucene.index.RAMIndex.java
License:Apache License
/** * Finds the top {@code count} hits for {@code query} and sorting the hits by {@code sort}. * * @param query the {@link Query} to search for * @param sort the {@link Sort} to be applied * @param count the max number of results to be collected * @param fields the names of the fields to be loaded * @return the found documents// ww w.j a v a 2 s .com */ public List<Document> search(Query query, Sort sort, Integer count, Set<String> fields) { try { indexWriter.commit(); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); sort = sort.rewrite(searcher); TopDocs topDocs = searcher.search(query, count, sort); ScoreDoc[] scoreDocs = topDocs.scoreDocs; List<Document> documents = new LinkedList<>(); for (ScoreDoc scoreDoc : scoreDocs) { Document document = searcher.doc(scoreDoc.doc, fields); documents.add(document); } searcher.getIndexReader().close(); return documents; } catch (IOException e) { throw new IndexException(logger, e, "Error while searching"); } }
From source file:com.stratio.cassandra.lucene.service.LuceneIndex.java
License:Apache License
/** * Finds the top {@code count} hits for {@code query}, applying {@code clusteringKeyFilter} if non-null, and sorting * the hits by the criteria in {@code sortFields}. * * @param searcher The {@link IndexSearcher} to be used. * @param query The {@link Query} to search for. * @param sort The {@link Sort} to be applied. * @param after The starting {@link SearchResult}. * @param count Return only the top {@code count} results. * @param fieldsToLoad The name of the fields to be loaded. * @return The found documents, sorted according to the supplied {@link Sort} instance. * @throws IOException If Lucene throws IO errors. */// w ww . j a v a 2s . c o m public LinkedHashMap<Document, ScoreDoc> search(IndexSearcher searcher, Query query, Sort sort, ScoreDoc after, Integer count, Set<String> fieldsToLoad) throws IOException { Log.debug("%s search by query %s", logName, query); TopDocs topDocs; if (sort == null) { topDocs = searcher.searchAfter(after, query, count); } else { topDocs = searcher.searchAfter(after, query, count, sort); } ScoreDoc[] scoreDocs = topDocs.scoreDocs; // Collect the documents from query result LinkedHashMap<Document, ScoreDoc> searchResults = new LinkedHashMap<>(); for (ScoreDoc scoreDoc : scoreDocs) { Document document = searcher.doc(scoreDoc.doc, fieldsToLoad); searchResults.put(document, scoreDoc); } return searchResults; }
From source file:com.tekstosense.stemmer.index.Indexer.java
License:Open Source License
/** * Searcher.//from w w w .ja v a 2 s . c om * * @throws IOException * Signals that an I/O exception has occurred. * @throws QueryNodeException * the query node exception * @throws ParseException * the parse exception */ private static void searcher() throws IOException, QueryNodeException, ParseException { Path indexDirectoryPath = new File(INDEX_PATH).toPath(); FSDirectory indexDirectory = new SimpleFSDirectory(indexDirectoryPath); DirectoryReader ireader = DirectoryReader.open(indexDirectory); IndexSearcher isearcher = new IndexSearcher(ireader); QueryParser parser = new QueryParser("title", new StandardAnalyzer()); Query query = parser.parse("\"Lucene in Action\""); TopScoreDocCollector collector = TopScoreDocCollector.create(10); isearcher.search(query, new PositiveScoresOnlyCollector(collector)); TopDocs topDocs = collector.topDocs(); Set<String> fields = new HashSet<String>(); fields.add("title"); fields.add("isbn"); for (ScoreDoc result : topDocs.scoreDocs) { Document doc = isearcher.doc(result.doc, fields); if (LOGGER.isInfoEnabled()) { LOGGER.info("--- Title : " + doc.getField("title").stringValue() + " ---"); LOGGER.info("--- ISBN : " + doc.getField("isbn").stringValue() + " ---"); LOGGER.info(isearcher.explain(query, result.doc)); } } }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexBackupService.java
License:Open Source License
private void performTimeSnapshotRecovery(Long timeSnapshotBoundaryMicros, IndexWriter newWriter) throws IOException { // For documents with metadata indexing enabled, the version which was current at // the restore time may have subsequently been marked as not current. Update the // current field for any such documents. IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(newWriter, true, true)); Query updateTimeQuery = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, timeSnapshotBoundaryMicros + 1, Long.MAX_VALUE); Sort selfLinkSort = new Sort(new SortField( LuceneIndexDocumentHelper.createSortFieldPropertyName(ServiceDocument.FIELD_NAME_SELF_LINK), SortField.Type.STRING)); final int pageSize = 10000; Set<String> prevPageLinks = new HashSet<>(); ScoreDoc after = null;/* ww w . ja va 2s.c o m*/ while (true) { TopDocs results = searcher.searchAfter(after, updateTimeQuery, pageSize, selfLinkSort, false, false); if (results == null || results.scoreDocs == null || results.scoreDocs.length == 0) { break; } Set<String> pageLinks = new HashSet<>(); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); for (ScoreDoc sd : results.scoreDocs) { visitor.reset(ServiceDocument.FIELD_NAME_SELF_LINK); searcher.doc(sd.doc, visitor); if (prevPageLinks.contains(visitor.documentSelfLink)) { pageLinks.add(visitor.documentSelfLink); continue; } if (!pageLinks.add(visitor.documentSelfLink)) { continue; } updateCurrentAttributeForSelfLink(searcher, timeSnapshotBoundaryMicros, visitor.documentSelfLink, newWriter); } if (results.scoreDocs.length < pageSize) { break; } after = results.scoreDocs[results.scoreDocs.length - 1]; prevPageLinks = pageLinks; } // Now that metadata indexing attributes have been updated appropriately, delete any // documents which were created after the restore point. Query luceneQuery = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, timeSnapshotBoundaryMicros + 1, Long.MAX_VALUE); newWriter.deleteDocuments(luceneQuery); }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexBackupService.java
License:Open Source License
private void updateCurrentAttributeForSelfLink(IndexSearcher searcher, long timeSnapshotBoundaryMicros, String selfLink, IndexWriter newWriter) throws IOException { Query selfLinkClause = new TermQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, selfLink)); Query updateTimeClause = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, 0, timeSnapshotBoundaryMicros); Query booleanQuery = new BooleanQuery.Builder().add(selfLinkClause, Occur.MUST) .add(updateTimeClause, Occur.MUST).build(); Sort versionSort = new Sort( new SortedNumericSortField(ServiceDocument.FIELD_NAME_VERSION, SortField.Type.LONG, true)); TopDocs results = searcher.search(booleanQuery, 1, versionSort, false, false); if (results == null || results.scoreDocs == null || results.scoreDocs.length == 0) { return;// w w w.j av a 2 s. c om } DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); visitor.reset(LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_ID); searcher.doc(results.scoreDocs[0].doc, visitor); if (visitor.documentIndexingId == null) { return; } Term indexingIdTerm = new Term(LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_ID, visitor.documentIndexingId); newWriter.updateNumericDocValue(indexingIdTerm, LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_METADATA_VALUE_TOMBSTONE_TIME, LuceneIndexDocumentHelper.ACTIVE_DOCUMENT_TOMBSTONE_TIME); }
From source file:com.xiaomi.linden.core.LindenUtil.java
License:Apache License
/** * Get fields by doc id./*from w ww . j ava2 s .com*/ * * @param indexSearcher The IndexSearcher * @param docId Doc ID. * @param id Id field value * @param sourceFields Specify the fields, if null get all fields values. * @param config the lindenConfig for search * @return JSON String which contains field values. * @throws IOException */ public static String getSource(IndexSearcher indexSearcher, int docId, String id, List<String> sourceFields, LindenConfig config) throws IOException { List<AtomicReaderContext> leaves = indexSearcher.getIndexReader().leaves(); int idx = ReaderUtil.subIndex(docId, leaves); AtomicReaderContext atomicReaderContext = leaves.get(idx); AtomicReader reader = atomicReaderContext.reader(); int locDocId = docId - atomicReaderContext.docBase; JSONObject src = new JSONObject(); String idFieldName = config.getSchema().getId(); if (id != null) { src.put(idFieldName, id); } else { src.put(idFieldName, FieldCache.DEFAULT.getTerms(reader, idFieldName, false).get(locDocId).utf8ToString()); } List<LindenFieldSchema> fields = new ArrayList<>(); if (sourceFields != null && !sourceFields.isEmpty()) { for (String sourceField : sourceFields) { if (sourceField.equals(idFieldName)) { continue; } LindenFieldSchema fieldSchema = config.getFieldSchema(sourceField); fields.add(fieldSchema); } } else { fields.addAll(config.getSchema().getFields()); } Map<String, LindenFieldSchema> storedFields = new HashMap<>(); for (LindenFieldSchema fieldSchema : fields) { String name = fieldSchema.getName(); boolean fieldCache = false; if (fieldSchema.isMulti()) { /** * multi-field has multiple values, each value is indexed to the document according to field type * multi-field source value is in JSONArray format, something like "["MI4","MI Note","RedMI3"]" * multi-field source value is stored in BinaryDocValues */ String blob = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString(); if (StringUtils.isNotEmpty(blob)) { src.put(name, JSON.parseArray(blob)); } } else if (fieldSchema.isDocValues()) { fieldCache = true; } else if (fieldSchema.isIndexed() && fieldSchema.isStored()) { // field cache doesn't support tokenized string field if (config.isEnableSourceFieldCache() && !possibleTokenizedString(fieldSchema)) { fieldCache = true; } else { storedFields.put(name, fieldSchema); } } else if (fieldSchema.isIndexed()) { if (!possibleTokenizedString(fieldSchema)) { fieldCache = true; } } else if (fieldSchema.isStored()) { storedFields.put(name, fieldSchema); } if (fieldCache) { Object val; switch (fieldSchema.getType()) { case STRING: case FACET: val = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString(); String v = (String) val; fieldCache = !v.isEmpty() || actualContain(reader, name, locDocId); break; case INTEGER: val = FieldCache.DEFAULT.getInts(reader, name, false).get(locDocId); fieldCache = ((int) val) != 0 || actualContain(reader, name, locDocId); break; case LONG: val = FieldCache.DEFAULT.getLongs(reader, name, false).get(locDocId); fieldCache = ((long) val != 0) || actualContain(reader, name, locDocId); break; case FLOAT: val = FieldCache.DEFAULT.getFloats(reader, name, false).get(locDocId); fieldCache = ((float) val != 0) || actualContain(reader, name, locDocId); break; case DOUBLE: val = FieldCache.DEFAULT.getDoubles(reader, name, false).get(locDocId); fieldCache = ((double) val != 0) || actualContain(reader, name, locDocId); break; default: throw new IllegalStateException("Unsupported linden type"); } if (fieldCache) { src.put(name, val); } } } if (!storedFields.isEmpty()) { Document doc = indexSearcher.doc(docId, storedFields.keySet()); for (IndexableField field : doc.getFields()) { String name = field.name(); LindenFieldSchema schema = storedFields.get(name); Object obj = src.get(name); Object val = parseLindenValue(field.stringValue(), storedFields.get(name).getType()); if (obj == null) { if (schema.isMulti()) { JSONArray array = new JSONArray(); array.add(val); src.put(name, array); } else { src.put(name, val); } } else if (obj instanceof JSONArray) { ((JSONArray) obj).add(val); } else { JSONArray array = new JSONArray(); array.add(obj); array.add(val); src.put(name, array); } } } return src.toJSONString(); }