Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID, Set<String> fieldsToLoad) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID, fieldsToLoad)

Usage

From source file:com.b2international.snowowl.snomed.api.impl.ClassificationRunIndex.java

License:Apache License

public void invalidateClassificationRuns() throws IOException {

    final Query statusQuery = Fields.newQuery().field(FIELD_STATUS, ClassificationStatus.COMPLETED.name())
            .field(FIELD_STATUS, ClassificationStatus.RUNNING.name())
            .field(FIELD_STATUS, ClassificationStatus.SAVING_IN_PROGRESS.name())
            .field(FIELD_STATUS, ClassificationStatus.SCHEDULED.name()).matchAny();

    final Query query = Fields.newQuery().field(FIELD_CLASS, ClassificationRun.class.getSimpleName())
            .and(statusQuery).matchAll();

    IndexSearcher searcher = null;

    try {/*w w w  .  j  a va2  s. com*/

        searcher = manager.acquire();

        final TotalHitCountCollector collector = new TotalHitCountCollector();
        searcher.search(query, collector);
        final int totalHits = collector.getTotalHits();

        final int docsToRetrieve = Ints.min(searcher.getIndexReader().maxDoc(), totalHits);
        if (docsToRetrieve < 1) {
            return;
        }

        final TopDocs docs = searcher.search(query, docsToRetrieve, Sort.INDEXORDER, false, false);
        final ScoreDoc[] scoreDocs = docs.scoreDocs;

        final ObjectReader reader = objectMapper.reader(ClassificationRun.class);
        for (int i = 0; i < scoreDocs.length; i++) {
            final Document sourceDocument = searcher.doc(scoreDocs[i].doc,
                    ImmutableSet.of(FIELD_BRANCH_PATH, FIELD_SOURCE));

            final String branchPath = sourceDocument.get(FIELD_BRANCH_PATH);
            final String source = sourceDocument.get(FIELD_SOURCE);
            final ClassificationRun run = reader.readValue(source);

            run.setStatus(ClassificationStatus.STALE);

            upsertClassificationRunNoCommit(branchPath, run);
        }

        commit();

    } finally {
        if (null != searcher) {
            manager.release(searcher);
        }
    }
}

From source file:com.b2international.snowowl.snomed.api.impl.ClassificationRunIndex.java

License:Apache License

private <T> List<T> search(final Query query, final Class<? extends T> sourceClass, Sort sort, final int offset,
        final int limit) throws IOException {
    IndexSearcher searcher = null;

    try {//  w  w  w .  j a  v  a  2 s.  co m

        searcher = manager.acquire();

        final TotalHitCountCollector collector = new TotalHitCountCollector();
        searcher.search(query, collector);
        final int totalHits = collector.getTotalHits();

        final int saturatedSum = Ints.saturatedCast((long) offset + limit);
        final int docsToRetrieve = Ints.min(saturatedSum, searcher.getIndexReader().maxDoc(), totalHits);
        final ImmutableList.Builder<T> resultBuilder = ImmutableList.builder();

        if (docsToRetrieve < 1) {
            return resultBuilder.build();
        }

        final TopDocs docs = searcher.search(query, docsToRetrieve, sort, false, false);
        final ScoreDoc[] scoreDocs = docs.scoreDocs;

        final ObjectReader reader = objectMapper.reader(sourceClass);
        for (int i = offset; i < docsToRetrieve && i < scoreDocs.length; i++) {
            final Document sourceDocument = searcher.doc(scoreDocs[i].doc, ImmutableSet.of(FIELD_SOURCE));
            final String source = sourceDocument.get(FIELD_SOURCE);
            final T deserializedSource = reader.readValue(source);
            resultBuilder.add(deserializedSource);
        }

        return resultBuilder.build();

    } finally {

        if (null != searcher) {
            manager.release(searcher);
        }
    }
}

From source file:com.ibm.jaql.lang.expr.index.ProbeLuceneFn.java

License:Apache License

@Override
public JsonIterator iter(Context context) throws Exception {
    JsonRecord fd = (JsonRecord) exprs[0].eval(context);
    if (fd == null) {
        return JsonIterator.NULL;
    }/*from   w ww.ja  v a  2s  .c o  m*/
    JsonString loc = (JsonString) fd.get(new JsonString("location"));
    if (loc == null) {
        return JsonIterator.NULL;
    }
    JsonString jquery = (JsonString) exprs[1].eval(context);
    if (jquery == null) {
        return JsonIterator.NULL;
    }

    HashSet<String> fields = null;
    JsonIterator iter = exprs[2].iter(context);
    for (JsonValue sv : iter) {
        JsonString s = (JsonString) sv;
        if (s != null) {
            if (fields == null) {
                fields = new HashSet<String>();
            }
            fields.add(s.toString());
        }
    }
    final FieldSelector fieldSelector = (fields == null) ? null
            : new SetBasedFieldSelector(fields, new HashSet<String>());

    final IndexSearcher searcher = new IndexSearcher(loc.toString());
    Analyzer analyzer = new StandardAnalyzer();
    QueryParser qp = new QueryParser("key", analyzer);
    Query query = qp.parse(jquery.toString());

    query = searcher.rewrite(query);
    final Scorer scorer = query.weight(searcher).scorer(searcher.getIndexReader());
    final BufferedJsonRecord rec = new BufferedJsonRecord();
    final JsonString jdoc = new JsonString("doc");
    final MutableJsonLong jdocid = new MutableJsonLong();

    return new JsonIterator(rec) {
        @Override
        public boolean moveNext() throws Exception {
            if (!scorer.next()) {
                return false;
            }
            rec.clear();
            int i = scorer.doc();
            jdocid.set(i);
            rec.add(jdoc, jdocid);
            if (fieldSelector != null) {
                Document doc = searcher.doc(i, fieldSelector);
                for (Object x : doc.getFields()) {
                    Field f = (Field) x;
                    String name = f.name();
                    byte[] val = f.binaryValue();
                    ByteArrayInputStream bais = new ByteArrayInputStream(val); // TODO: reuse
                    DataInputStream in = new DataInputStream(bais); // TODO: reuse
                    JsonValue ival = serializer.read(in, null);
                    rec.add(new JsonString(name), ival);
                }
            }
            return true; // currentValue == rec
        }
    };
}

From source file:com.stratio.cassandra.index.LuceneIndex.java

License:Apache License

/**
 * Finds the top {@code count} hits for {@code query}, applying {@code clusteringKeyFilter} if non-null, and sorting
 * the hits by the criteria in {@code sortFields}.
 *
 * @param query        The {@link Query} to search for.
 * @param sort         The {@link Sort} to be applied.
 * @param after        The starting {@link SearchResult}.
 * @param count        Return only the top {@code count} results.
 * @param fieldsToLoad The name of the fields to be loaded.
 * @return The found documents, sorted according to the supplied {@link Sort} instance.
 *///from  w w w .  j a v a 2 s  . c  o m
public List<SearchResult> search(Query query, Sort sort, SearchResult after, Integer count,
        Set<String> fieldsToLoad, boolean usesRelevance) {
    Log.debug("Searching by query %s", query);
    try {
        IndexSearcher searcher = searcherManager.acquire();
        try {
            // Search
            ScoreDoc start = after == null ? null : after.getScoreDoc();
            TopDocs topDocs = topDocs(searcher, query, sort, start, count, usesRelevance);
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;

            // Collect the documents from query result
            List<SearchResult> searchResults = new ArrayList<>(scoreDocs.length);
            for (ScoreDoc scoreDoc : scoreDocs) {
                Document document = searcher.doc(scoreDoc.doc, fieldsToLoad);
                SearchResult searchResult = rowMapper.searchResult(document, scoreDoc);
                searchResults.add(searchResult);
            }

            return searchResults;
        } finally {
            searcherManager.release(searcher);
        }
    } catch (IOException e) {
        Log.error(e, "Error while searching by query %s", query);
        throw new RuntimeException(e);
    }
}

From source file:com.stratio.cassandra.lucene.index.RAMIndex.java

License:Apache License

/**
 * Finds the top {@code count} hits for {@code query} and sorting the hits by {@code sort}.
 *
 * @param query the {@link Query} to search for
 * @param sort the {@link Sort} to be applied
 * @param count the max number of results to be collected
 * @param fields the names of the fields to be loaded
 * @return the found documents//  ww w.j a  v  a 2  s .com
 */
public List<Document> search(Query query, Sort sort, Integer count, Set<String> fields) {
    try {
        indexWriter.commit();
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        sort = sort.rewrite(searcher);
        TopDocs topDocs = searcher.search(query, count, sort);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        List<Document> documents = new LinkedList<>();
        for (ScoreDoc scoreDoc : scoreDocs) {
            Document document = searcher.doc(scoreDoc.doc, fields);
            documents.add(document);
        }
        searcher.getIndexReader().close();
        return documents;
    } catch (IOException e) {
        throw new IndexException(logger, e, "Error while searching");
    }
}

From source file:com.stratio.cassandra.lucene.service.LuceneIndex.java

License:Apache License

/**
 * Finds the top {@code count} hits for {@code query}, applying {@code clusteringKeyFilter} if non-null, and sorting
 * the hits by the criteria in {@code sortFields}.
 *
 * @param searcher     The {@link IndexSearcher} to be used.
 * @param query        The {@link Query} to search for.
 * @param sort         The {@link Sort} to be applied.
 * @param after        The starting {@link SearchResult}.
 * @param count        Return only the top {@code count} results.
 * @param fieldsToLoad The name of the fields to be loaded.
 * @return The found documents, sorted according to the supplied {@link Sort} instance.
 * @throws IOException If Lucene throws IO errors.
 *///  w ww . j  a v  a 2s . c  o  m
public LinkedHashMap<Document, ScoreDoc> search(IndexSearcher searcher, Query query, Sort sort, ScoreDoc after,
        Integer count, Set<String> fieldsToLoad) throws IOException {
    Log.debug("%s search by query %s", logName, query);

    TopDocs topDocs;
    if (sort == null) {
        topDocs = searcher.searchAfter(after, query, count);
    } else {
        topDocs = searcher.searchAfter(after, query, count, sort);
    }
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;

    // Collect the documents from query result
    LinkedHashMap<Document, ScoreDoc> searchResults = new LinkedHashMap<>();
    for (ScoreDoc scoreDoc : scoreDocs) {
        Document document = searcher.doc(scoreDoc.doc, fieldsToLoad);
        searchResults.put(document, scoreDoc);
    }

    return searchResults;
}

From source file:com.tekstosense.stemmer.index.Indexer.java

License:Open Source License

/**
 * Searcher.//from w  w  w .ja v a 2 s .  c om
 *
 * @throws IOException
 *             Signals that an I/O exception has occurred.
 * @throws QueryNodeException
 *             the query node exception
 * @throws ParseException
 *             the parse exception
 */
private static void searcher() throws IOException, QueryNodeException, ParseException {
    Path indexDirectoryPath = new File(INDEX_PATH).toPath();
    FSDirectory indexDirectory = new SimpleFSDirectory(indexDirectoryPath);
    DirectoryReader ireader = DirectoryReader.open(indexDirectory);
    IndexSearcher isearcher = new IndexSearcher(ireader);
    QueryParser parser = new QueryParser("title", new StandardAnalyzer());
    Query query = parser.parse("\"Lucene in Action\"");

    TopScoreDocCollector collector = TopScoreDocCollector.create(10);
    isearcher.search(query, new PositiveScoresOnlyCollector(collector));
    TopDocs topDocs = collector.topDocs();
    Set<String> fields = new HashSet<String>();
    fields.add("title");
    fields.add("isbn");
    for (ScoreDoc result : topDocs.scoreDocs) {
        Document doc = isearcher.doc(result.doc, fields);

        if (LOGGER.isInfoEnabled()) {

            LOGGER.info("--- Title :  " + doc.getField("title").stringValue() + " ---");
            LOGGER.info("--- ISBN : " + doc.getField("isbn").stringValue() + " ---");
            LOGGER.info(isearcher.explain(query, result.doc));
        }

    }

}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexBackupService.java

License:Open Source License

private void performTimeSnapshotRecovery(Long timeSnapshotBoundaryMicros, IndexWriter newWriter)
        throws IOException {

    // For documents with metadata indexing enabled, the version which was current at
    // the restore time may have subsequently been marked as not current. Update the
    // current field for any such documents.

    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(newWriter, true, true));

    Query updateTimeQuery = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS,
            timeSnapshotBoundaryMicros + 1, Long.MAX_VALUE);

    Sort selfLinkSort = new Sort(new SortField(
            LuceneIndexDocumentHelper.createSortFieldPropertyName(ServiceDocument.FIELD_NAME_SELF_LINK),
            SortField.Type.STRING));

    final int pageSize = 10000;

    Set<String> prevPageLinks = new HashSet<>();
    ScoreDoc after = null;/*  ww  w .  ja  va 2s.c  o  m*/
    while (true) {
        TopDocs results = searcher.searchAfter(after, updateTimeQuery, pageSize, selfLinkSort, false, false);
        if (results == null || results.scoreDocs == null || results.scoreDocs.length == 0) {
            break;
        }

        Set<String> pageLinks = new HashSet<>();
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
        for (ScoreDoc sd : results.scoreDocs) {
            visitor.reset(ServiceDocument.FIELD_NAME_SELF_LINK);
            searcher.doc(sd.doc, visitor);
            if (prevPageLinks.contains(visitor.documentSelfLink)) {
                pageLinks.add(visitor.documentSelfLink);
                continue;
            }

            if (!pageLinks.add(visitor.documentSelfLink)) {
                continue;
            }

            updateCurrentAttributeForSelfLink(searcher, timeSnapshotBoundaryMicros, visitor.documentSelfLink,
                    newWriter);
        }

        if (results.scoreDocs.length < pageSize) {
            break;
        }

        after = results.scoreDocs[results.scoreDocs.length - 1];
        prevPageLinks = pageLinks;
    }

    // Now that metadata indexing attributes have been updated appropriately, delete any
    // documents which were created after the restore point.
    Query luceneQuery = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS,
            timeSnapshotBoundaryMicros + 1, Long.MAX_VALUE);
    newWriter.deleteDocuments(luceneQuery);
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexBackupService.java

License:Open Source License

private void updateCurrentAttributeForSelfLink(IndexSearcher searcher, long timeSnapshotBoundaryMicros,
        String selfLink, IndexWriter newWriter) throws IOException {

    Query selfLinkClause = new TermQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, selfLink));
    Query updateTimeClause = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, 0,
            timeSnapshotBoundaryMicros);
    Query booleanQuery = new BooleanQuery.Builder().add(selfLinkClause, Occur.MUST)
            .add(updateTimeClause, Occur.MUST).build();

    Sort versionSort = new Sort(
            new SortedNumericSortField(ServiceDocument.FIELD_NAME_VERSION, SortField.Type.LONG, true));

    TopDocs results = searcher.search(booleanQuery, 1, versionSort, false, false);
    if (results == null || results.scoreDocs == null || results.scoreDocs.length == 0) {
        return;//  w  w w.j  av a 2 s.  c om
    }

    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
    visitor.reset(LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_ID);
    searcher.doc(results.scoreDocs[0].doc, visitor);
    if (visitor.documentIndexingId == null) {
        return;
    }

    Term indexingIdTerm = new Term(LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_ID,
            visitor.documentIndexingId);
    newWriter.updateNumericDocValue(indexingIdTerm,
            LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_METADATA_VALUE_TOMBSTONE_TIME,
            LuceneIndexDocumentHelper.ACTIVE_DOCUMENT_TOMBSTONE_TIME);
}

From source file:com.xiaomi.linden.core.LindenUtil.java

License:Apache License

/**
 * Get fields by doc id./*from   w ww  . j ava2 s  .com*/
 *
 * @param indexSearcher The IndexSearcher
 * @param docId         Doc ID.
 * @param id            Id field value
 * @param sourceFields  Specify the fields, if null get all fields values.
 * @param config        the lindenConfig for search
 * @return JSON String which contains field values.
 * @throws IOException
 */

public static String getSource(IndexSearcher indexSearcher, int docId, String id, List<String> sourceFields,
        LindenConfig config) throws IOException {
    List<AtomicReaderContext> leaves = indexSearcher.getIndexReader().leaves();
    int idx = ReaderUtil.subIndex(docId, leaves);
    AtomicReaderContext atomicReaderContext = leaves.get(idx);
    AtomicReader reader = atomicReaderContext.reader();
    int locDocId = docId - atomicReaderContext.docBase;
    JSONObject src = new JSONObject();
    String idFieldName = config.getSchema().getId();
    if (id != null) {
        src.put(idFieldName, id);
    } else {
        src.put(idFieldName,
                FieldCache.DEFAULT.getTerms(reader, idFieldName, false).get(locDocId).utf8ToString());
    }

    List<LindenFieldSchema> fields = new ArrayList<>();
    if (sourceFields != null && !sourceFields.isEmpty()) {
        for (String sourceField : sourceFields) {
            if (sourceField.equals(idFieldName)) {
                continue;
            }
            LindenFieldSchema fieldSchema = config.getFieldSchema(sourceField);
            fields.add(fieldSchema);
        }
    } else {
        fields.addAll(config.getSchema().getFields());
    }

    Map<String, LindenFieldSchema> storedFields = new HashMap<>();
    for (LindenFieldSchema fieldSchema : fields) {
        String name = fieldSchema.getName();
        boolean fieldCache = false;
        if (fieldSchema.isMulti()) {
            /**
             * multi-field has multiple values, each value is indexed to the document according to field type
             * multi-field source value is in JSONArray format, something like "["MI4","MI Note","RedMI3"]"
             * multi-field source value is stored in BinaryDocValues
             */
            String blob = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString();
            if (StringUtils.isNotEmpty(blob)) {
                src.put(name, JSON.parseArray(blob));
            }
        } else if (fieldSchema.isDocValues()) {
            fieldCache = true;
        } else if (fieldSchema.isIndexed() && fieldSchema.isStored()) {
            // field cache doesn't support tokenized string field
            if (config.isEnableSourceFieldCache() && !possibleTokenizedString(fieldSchema)) {
                fieldCache = true;
            } else {
                storedFields.put(name, fieldSchema);
            }
        } else if (fieldSchema.isIndexed()) {
            if (!possibleTokenizedString(fieldSchema)) {
                fieldCache = true;
            }
        } else if (fieldSchema.isStored()) {
            storedFields.put(name, fieldSchema);
        }

        if (fieldCache) {
            Object val;
            switch (fieldSchema.getType()) {
            case STRING:
            case FACET:
                val = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString();
                String v = (String) val;
                fieldCache = !v.isEmpty() || actualContain(reader, name, locDocId);
                break;
            case INTEGER:
                val = FieldCache.DEFAULT.getInts(reader, name, false).get(locDocId);
                fieldCache = ((int) val) != 0 || actualContain(reader, name, locDocId);
                break;
            case LONG:
                val = FieldCache.DEFAULT.getLongs(reader, name, false).get(locDocId);
                fieldCache = ((long) val != 0) || actualContain(reader, name, locDocId);
                break;
            case FLOAT:
                val = FieldCache.DEFAULT.getFloats(reader, name, false).get(locDocId);
                fieldCache = ((float) val != 0) || actualContain(reader, name, locDocId);
                break;
            case DOUBLE:
                val = FieldCache.DEFAULT.getDoubles(reader, name, false).get(locDocId);
                fieldCache = ((double) val != 0) || actualContain(reader, name, locDocId);
                break;
            default:
                throw new IllegalStateException("Unsupported linden type");
            }
            if (fieldCache) {
                src.put(name, val);
            }
        }
    }

    if (!storedFields.isEmpty())

    {
        Document doc = indexSearcher.doc(docId, storedFields.keySet());
        for (IndexableField field : doc.getFields()) {
            String name = field.name();
            LindenFieldSchema schema = storedFields.get(name);
            Object obj = src.get(name);
            Object val = parseLindenValue(field.stringValue(), storedFields.get(name).getType());
            if (obj == null) {
                if (schema.isMulti()) {
                    JSONArray array = new JSONArray();
                    array.add(val);
                    src.put(name, array);
                } else {
                    src.put(name, val);
                }
            } else if (obj instanceof JSONArray) {
                ((JSONArray) obj).add(val);
            } else {
                JSONArray array = new JSONArray();
                array.add(obj);
                array.add(val);
                src.put(name, array);
            }
        }
    }
    return src.toJSONString();
}