Example usage for org.apache.lucene.index ReaderUtil subIndex

List of usage examples for org.apache.lucene.index ReaderUtil subIndex

Introduction

In this page you can find the example usage for org.apache.lucene.index ReaderUtil subIndex.

Prototype

public static int subIndex(int n, List<LeafReaderContext> leaves) 

Source Link

Document

Returns index of the searcher/reader for document n in the array used to construct this searcher/reader.

Usage

From source file:com.joliciel.jochre.search.highlight.LuceneQueryHighlighter.java

License:Open Source License

public Map<Integer, Set<HighlightTerm>> highlight(Set<Integer> docIds, Set<String> fields) {
    try {/*w w  w .  j  a v  a  2s.  c om*/
        Map<Integer, Set<HighlightTerm>> termMap = new HashMap<Integer, Set<HighlightTerm>>();
        Map<Integer, Document> idToDocMap = new HashMap<Integer, Document>();
        Map<Integer, CoordinateStorage> idToCoordinateStorageMap = new HashMap<Integer, CoordinateStorage>();

        Map<Integer, Set<Integer>> myLeaves = new HashMap<Integer, Set<Integer>>();
        for (int docId : docIds) {
            Document luceneDoc = indexSearcher.doc(docId);
            idToDocMap.put(docId, luceneDoc);
            JochreIndexDocument jochreDoc = searchService.getJochreIndexDocument(indexSearcher, docId);
            idToCoordinateStorageMap.put(docId, jochreDoc.getCoordinateStorage());
            termMap.put(docId, new TreeSet<HighlightTerm>());
            int leaf = ReaderUtil.subIndex(docId, leaves);
            Set<Integer> docsPerLeaf = myLeaves.get(leaf);
            if (docsPerLeaf == null) {
                docsPerLeaf = new HashSet<Integer>();
                myLeaves.put(leaf, docsPerLeaf);
            }
            docsPerLeaf.add(docId);
        }

        for (int leaf : myLeaves.keySet()) {
            if (LOG.isTraceEnabled())
                LOG.trace("Searching leaf " + leaf);
            Set<Integer> docsPerLeaf = myLeaves.get(leaf);
            AtomicReaderContext subContext = leaves.get(leaf);
            AtomicReader atomicReader = subContext.reader();

            int fieldCounter = 0;
            for (String field : fields) {
                fieldCounter++;
                if (LOG.isTraceEnabled())
                    LOG.trace("Field " + fieldCounter + ": " + field);

                Terms atomicReaderTerms = atomicReader.terms(field);
                if (atomicReaderTerms == null) {
                    continue; // nothing to do
                }
                TermsEnum termsEnum = atomicReaderTerms.iterator(TermsEnum.EMPTY);

                int termCounter = 0;
                for (BytesRef term : terms) {
                    termCounter++;
                    if (LOG.isTraceEnabled())
                        LOG.trace("Searching for term " + termCounter + ": " + term.utf8ToString()
                                + " in field " + field);

                    if (!termsEnum.seekExact(term)) {
                        continue; // term not found
                    }

                    DocsAndPositionsEnum docPosEnum = termsEnum.docsAndPositions(null, null,
                            DocsAndPositionsEnum.FLAG_OFFSETS);
                    int relativeDocId = docPosEnum.nextDoc();
                    while (relativeDocId != DocsAndPositionsEnum.NO_MORE_DOCS) {
                        int docId = subContext.docBase + relativeDocId;
                        if (docsPerLeaf.contains(docId)) {
                            Document doc = idToDocMap.get(docId);
                            Set<HighlightTerm> highlightTerms = termMap.get(docId);
                            //Retrieve the term frequency in the current document
                            int freq = docPosEnum.freq();
                            if (LOG.isTraceEnabled()) {
                                String extId = doc.get("id");
                                String path = doc.get("path");
                                LOG.trace("Found " + freq + " matches for doc " + docId + ", extId: " + extId
                                        + ", path: " + path);
                            }

                            for (int i = 0; i < freq; i++) {
                                int position = docPosEnum.nextPosition();
                                int start = docPosEnum.startOffset();
                                int end = docPosEnum.endOffset();

                                if (LOG.isTraceEnabled())
                                    LOG.trace("Found match " + position + " at docId " + docId + ", field "
                                            + field + " start=" + start + ", end=" + end);

                                CoordinateStorage coordinateStorage = idToCoordinateStorageMap.get(docId);
                                int imageIndex = coordinateStorage.getImageIndex(start);
                                int pageIndex = coordinateStorage.getPageIndex(start);

                                HighlightTerm highlightTerm = new HighlightTerm(docId, field, start, end,
                                        imageIndex, pageIndex);
                                highlightTerm.setWeight(this.weigh(term));
                                if (highlightTerm.getWeight() > 0)
                                    highlightTerms.add(highlightTerm);
                            }
                        }
                        relativeDocId = docPosEnum.nextDoc();
                    }
                } // next term
            } // next field
        } // next index leaf to search

        return termMap;
    } catch (IOException e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.xiaomi.linden.core.LindenUtil.java

License:Apache License

public static Float getFieldFloatValue(List<AtomicReaderContext> leaves, int docId, String fieldName)
        throws IOException {
    AtomicReaderContext atomicReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves));
    FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(atomicReaderContext.reader(), fieldName, false);
    return floats.get(docId - atomicReaderContext.docBase);
}

From source file:com.xiaomi.linden.core.LindenUtil.java

License:Apache License

public static Double getFieldDoubleValue(List<AtomicReaderContext> leaves, int docId, String fieldName)
        throws IOException {
    AtomicReaderContext atomicReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves));
    FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(atomicReaderContext.reader(), fieldName, false);
    return doubles.get(docId - atomicReaderContext.docBase);
}

From source file:com.xiaomi.linden.core.LindenUtil.java

License:Apache License

public static String getFieldStringValue(List<AtomicReaderContext> leaves, int docId, String fieldName)
        throws IOException {
    AtomicReaderContext atomicReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves));
    BinaryDocValues terms = FieldCache.DEFAULT.getTerms(atomicReaderContext.reader(), fieldName, false);
    return terms.get(docId - atomicReaderContext.docBase).utf8ToString();
}

From source file:com.xiaomi.linden.core.LindenUtil.java

License:Apache License

public static Long getFieldLongValue(List<AtomicReaderContext> leaves, int docId, String fieldName)
        throws IOException {
    AtomicReaderContext atomicReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves));
    FieldCache.Longs longs = FieldCache.DEFAULT.getLongs(atomicReaderContext.reader(), fieldName, false);
    return longs.get(docId - atomicReaderContext.docBase);
}

From source file:com.xiaomi.linden.core.LindenUtil.java

License:Apache License

public static Integer getFieldIntValue(List<AtomicReaderContext> leaves, int docId, String fieldName)
        throws IOException {
    AtomicReaderContext atomicReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves));
    FieldCache.Ints ints = FieldCache.DEFAULT.getInts(atomicReaderContext.reader(), fieldName, false);
    return ints.get(docId - atomicReaderContext.docBase);
}

From source file:com.xiaomi.linden.core.LindenUtil.java

License:Apache License

/**
 * Get fields by doc id.// www .j  a  v a 2s . com
 *
 * @param indexSearcher The IndexSearcher
 * @param docId         Doc ID.
 * @param id            Id field value
 * @param sourceFields  Specify the fields, if null get all fields values.
 * @param config        the lindenConfig for search
 * @return JSON String which contains field values.
 * @throws IOException
 */

public static String getSource(IndexSearcher indexSearcher, int docId, String id, List<String> sourceFields,
        LindenConfig config) throws IOException {
    List<AtomicReaderContext> leaves = indexSearcher.getIndexReader().leaves();
    int idx = ReaderUtil.subIndex(docId, leaves);
    AtomicReaderContext atomicReaderContext = leaves.get(idx);
    AtomicReader reader = atomicReaderContext.reader();
    int locDocId = docId - atomicReaderContext.docBase;
    JSONObject src = new JSONObject();
    String idFieldName = config.getSchema().getId();
    if (id != null) {
        src.put(idFieldName, id);
    } else {
        src.put(idFieldName,
                FieldCache.DEFAULT.getTerms(reader, idFieldName, false).get(locDocId).utf8ToString());
    }

    List<LindenFieldSchema> fields = new ArrayList<>();
    if (sourceFields != null && !sourceFields.isEmpty()) {
        for (String sourceField : sourceFields) {
            if (sourceField.equals(idFieldName)) {
                continue;
            }
            LindenFieldSchema fieldSchema = config.getFieldSchema(sourceField);
            fields.add(fieldSchema);
        }
    } else {
        fields.addAll(config.getSchema().getFields());
    }

    Map<String, LindenFieldSchema> storedFields = new HashMap<>();
    for (LindenFieldSchema fieldSchema : fields) {
        String name = fieldSchema.getName();
        boolean fieldCache = false;
        if (fieldSchema.isMulti()) {
            /**
             * multi-field has multiple values, each value is indexed to the document according to field type
             * multi-field source value is in JSONArray format, something like "["MI4","MI Note","RedMI3"]"
             * multi-field source value is stored in BinaryDocValues
             */
            String blob = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString();
            if (StringUtils.isNotEmpty(blob)) {
                src.put(name, JSON.parseArray(blob));
            }
        } else if (fieldSchema.isDocValues()) {
            fieldCache = true;
        } else if (fieldSchema.isIndexed() && fieldSchema.isStored()) {
            // field cache doesn't support tokenized string field
            if (config.isEnableSourceFieldCache() && !possibleTokenizedString(fieldSchema)) {
                fieldCache = true;
            } else {
                storedFields.put(name, fieldSchema);
            }
        } else if (fieldSchema.isIndexed()) {
            if (!possibleTokenizedString(fieldSchema)) {
                fieldCache = true;
            }
        } else if (fieldSchema.isStored()) {
            storedFields.put(name, fieldSchema);
        }

        if (fieldCache) {
            Object val;
            switch (fieldSchema.getType()) {
            case STRING:
            case FACET:
                val = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString();
                String v = (String) val;
                fieldCache = !v.isEmpty() || actualContain(reader, name, locDocId);
                break;
            case INTEGER:
                val = FieldCache.DEFAULT.getInts(reader, name, false).get(locDocId);
                fieldCache = ((int) val) != 0 || actualContain(reader, name, locDocId);
                break;
            case LONG:
                val = FieldCache.DEFAULT.getLongs(reader, name, false).get(locDocId);
                fieldCache = ((long) val != 0) || actualContain(reader, name, locDocId);
                break;
            case FLOAT:
                val = FieldCache.DEFAULT.getFloats(reader, name, false).get(locDocId);
                fieldCache = ((float) val != 0) || actualContain(reader, name, locDocId);
                break;
            case DOUBLE:
                val = FieldCache.DEFAULT.getDoubles(reader, name, false).get(locDocId);
                fieldCache = ((double) val != 0) || actualContain(reader, name, locDocId);
                break;
            default:
                throw new IllegalStateException("Unsupported linden type");
            }
            if (fieldCache) {
                src.put(name, val);
            }
        }
    }

    if (!storedFields.isEmpty())

    {
        Document doc = indexSearcher.doc(docId, storedFields.keySet());
        for (IndexableField field : doc.getFields()) {
            String name = field.name();
            LindenFieldSchema schema = storedFields.get(name);
            Object obj = src.get(name);
            Object val = parseLindenValue(field.stringValue(), storedFields.get(name).getType());
            if (obj == null) {
                if (schema.isMulti()) {
                    JSONArray array = new JSONArray();
                    array.add(val);
                    src.put(name, array);
                } else {
                    src.put(name, val);
                }
            } else if (obj instanceof JSONArray) {
                ((JSONArray) obj).add(val);
            } else {
                JSONArray array = new JSONArray();
                array.add(obj);
                array.add(val);
                src.put(name, array);
            }
        }
    }
    return src.toJSONString();
}

From source file:io.crate.execution.engine.collect.collectors.ScoreDocRowFunction.java

License:Apache License

@Nullable
@Override//from   ww w  .  j a va  2s  .  co m
public Row apply(@Nullable ScoreDoc input) {
    if (input == null) {
        return null;
    }
    FieldDoc fieldDoc = (FieldDoc) input;
    scorer.score(fieldDoc.score);
    for (OrderByCollectorExpression orderByCollectorExpression : orderByCollectorExpressions) {
        orderByCollectorExpression.setNextFieldDoc(fieldDoc);
    }
    List<LeafReaderContext> leaves = indexReader.leaves();
    int readerIndex = ReaderUtil.subIndex(fieldDoc.doc, leaves);
    LeafReaderContext subReaderContext = leaves.get(readerIndex);
    int subDoc = fieldDoc.doc - subReaderContext.docBase;
    for (LuceneCollectorExpression<?> expression : expressions) {
        try {
            expression.setNextReader(subReaderContext);
            expression.setNextDocId(subDoc);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
    return inputRow;
}

From source file:io.crate.execution.engine.fetch.FetchCollector.java

License:Apache License

public StreamBucket collect(IntContainer docIds) throws IOException {
    StreamBucket.Builder builder = new StreamBucket.Builder(streamers, ramAccountingContext);
    for (IntCursor cursor : docIds) {
        int docId = cursor.value;
        int readerIndex = ReaderUtil.subIndex(docId, readerContexts);
        LeafReaderContext subReaderContext = readerContexts.get(readerIndex);
        setNextDocId(subReaderContext, docId - subReaderContext.docBase);
        builder.add(row);//from   w  w w .j  a  va 2s. com
    }
    return builder.build();
}

From source file:io.crate.operation.collect.collectors.ScoreDocRowFunction.java

License:Apache License

@Nullable
@Override//w ww  .  j  a va2s.  c om
public Row apply(@Nullable ScoreDoc input) {
    if (input == null) {
        return null;
    }
    FieldDoc fieldDoc = (FieldDoc) input;
    scorer.score(fieldDoc.score);
    for (OrderByCollectorExpression orderByCollectorExpression : orderByCollectorExpressions) {
        orderByCollectorExpression.setNextFieldDoc(fieldDoc);
    }
    List<AtomicReaderContext> leaves = indexReader.leaves();
    int readerIndex = ReaderUtil.subIndex(fieldDoc.doc, leaves);
    AtomicReaderContext subReaderContext = leaves.get(readerIndex);
    int subDoc = fieldDoc.doc - subReaderContext.docBase;
    for (LuceneCollectorExpression<?> expression : expressions) {
        expression.setNextReader(subReaderContext);
        expression.setNextDocId(subDoc);
    }
    return inputRow;
}