List of usage examples for org.apache.lucene.index ReaderUtil subIndex
public static int subIndex(int n, List<LeafReaderContext> leaves)
n in the array used to construct this searcher/reader. From source file:com.joliciel.jochre.search.highlight.LuceneQueryHighlighter.java
License:Open Source License
public Map<Integer, Set<HighlightTerm>> highlight(Set<Integer> docIds, Set<String> fields) { try {/*w w w . j a v a 2s. c om*/ Map<Integer, Set<HighlightTerm>> termMap = new HashMap<Integer, Set<HighlightTerm>>(); Map<Integer, Document> idToDocMap = new HashMap<Integer, Document>(); Map<Integer, CoordinateStorage> idToCoordinateStorageMap = new HashMap<Integer, CoordinateStorage>(); Map<Integer, Set<Integer>> myLeaves = new HashMap<Integer, Set<Integer>>(); for (int docId : docIds) { Document luceneDoc = indexSearcher.doc(docId); idToDocMap.put(docId, luceneDoc); JochreIndexDocument jochreDoc = searchService.getJochreIndexDocument(indexSearcher, docId); idToCoordinateStorageMap.put(docId, jochreDoc.getCoordinateStorage()); termMap.put(docId, new TreeSet<HighlightTerm>()); int leaf = ReaderUtil.subIndex(docId, leaves); Set<Integer> docsPerLeaf = myLeaves.get(leaf); if (docsPerLeaf == null) { docsPerLeaf = new HashSet<Integer>(); myLeaves.put(leaf, docsPerLeaf); } docsPerLeaf.add(docId); } for (int leaf : myLeaves.keySet()) { if (LOG.isTraceEnabled()) LOG.trace("Searching leaf " + leaf); Set<Integer> docsPerLeaf = myLeaves.get(leaf); AtomicReaderContext subContext = leaves.get(leaf); AtomicReader atomicReader = subContext.reader(); int fieldCounter = 0; for (String field : fields) { fieldCounter++; if (LOG.isTraceEnabled()) LOG.trace("Field " + fieldCounter + ": " + field); Terms atomicReaderTerms = atomicReader.terms(field); if (atomicReaderTerms == null) { continue; // nothing to do } TermsEnum termsEnum = atomicReaderTerms.iterator(TermsEnum.EMPTY); int termCounter = 0; for (BytesRef term : terms) { termCounter++; if (LOG.isTraceEnabled()) LOG.trace("Searching for term " + termCounter + ": " + term.utf8ToString() + " in field " + field); if (!termsEnum.seekExact(term)) { continue; // term not found } DocsAndPositionsEnum docPosEnum = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS); int relativeDocId = docPosEnum.nextDoc(); while (relativeDocId != DocsAndPositionsEnum.NO_MORE_DOCS) { int docId = subContext.docBase + relativeDocId; if (docsPerLeaf.contains(docId)) { Document doc = idToDocMap.get(docId); Set<HighlightTerm> highlightTerms = termMap.get(docId); //Retrieve the term frequency in the current document int freq = docPosEnum.freq(); if (LOG.isTraceEnabled()) { String extId = doc.get("id"); String path = doc.get("path"); LOG.trace("Found " + freq + " matches for doc " + docId + ", extId: " + extId + ", path: " + path); } for (int i = 0; i < freq; i++) { int position = docPosEnum.nextPosition(); int start = docPosEnum.startOffset(); int end = docPosEnum.endOffset(); if (LOG.isTraceEnabled()) LOG.trace("Found match " + position + " at docId " + docId + ", field " + field + " start=" + start + ", end=" + end); CoordinateStorage coordinateStorage = idToCoordinateStorageMap.get(docId); int imageIndex = coordinateStorage.getImageIndex(start); int pageIndex = coordinateStorage.getPageIndex(start); HighlightTerm highlightTerm = new HighlightTerm(docId, field, start, end, imageIndex, pageIndex); highlightTerm.setWeight(this.weigh(term)); if (highlightTerm.getWeight() > 0) highlightTerms.add(highlightTerm); } } relativeDocId = docPosEnum.nextDoc(); } } // next term } // next field } // next index leaf to search return termMap; } catch (IOException e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:com.xiaomi.linden.core.LindenUtil.java
License:Apache License
public static Float getFieldFloatValue(List<AtomicReaderContext> leaves, int docId, String fieldName) throws IOException { AtomicReaderContext atomicReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves)); FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(atomicReaderContext.reader(), fieldName, false); return floats.get(docId - atomicReaderContext.docBase); }
From source file:com.xiaomi.linden.core.LindenUtil.java
License:Apache License
public static Double getFieldDoubleValue(List<AtomicReaderContext> leaves, int docId, String fieldName) throws IOException { AtomicReaderContext atomicReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves)); FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(atomicReaderContext.reader(), fieldName, false); return doubles.get(docId - atomicReaderContext.docBase); }
From source file:com.xiaomi.linden.core.LindenUtil.java
License:Apache License
public static String getFieldStringValue(List<AtomicReaderContext> leaves, int docId, String fieldName) throws IOException { AtomicReaderContext atomicReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves)); BinaryDocValues terms = FieldCache.DEFAULT.getTerms(atomicReaderContext.reader(), fieldName, false); return terms.get(docId - atomicReaderContext.docBase).utf8ToString(); }
From source file:com.xiaomi.linden.core.LindenUtil.java
License:Apache License
public static Long getFieldLongValue(List<AtomicReaderContext> leaves, int docId, String fieldName) throws IOException { AtomicReaderContext atomicReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves)); FieldCache.Longs longs = FieldCache.DEFAULT.getLongs(atomicReaderContext.reader(), fieldName, false); return longs.get(docId - atomicReaderContext.docBase); }
From source file:com.xiaomi.linden.core.LindenUtil.java
License:Apache License
public static Integer getFieldIntValue(List<AtomicReaderContext> leaves, int docId, String fieldName) throws IOException { AtomicReaderContext atomicReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves)); FieldCache.Ints ints = FieldCache.DEFAULT.getInts(atomicReaderContext.reader(), fieldName, false); return ints.get(docId - atomicReaderContext.docBase); }
From source file:com.xiaomi.linden.core.LindenUtil.java
License:Apache License
/** * Get fields by doc id.// www .j a v a 2s . com * * @param indexSearcher The IndexSearcher * @param docId Doc ID. * @param id Id field value * @param sourceFields Specify the fields, if null get all fields values. * @param config the lindenConfig for search * @return JSON String which contains field values. * @throws IOException */ public static String getSource(IndexSearcher indexSearcher, int docId, String id, List<String> sourceFields, LindenConfig config) throws IOException { List<AtomicReaderContext> leaves = indexSearcher.getIndexReader().leaves(); int idx = ReaderUtil.subIndex(docId, leaves); AtomicReaderContext atomicReaderContext = leaves.get(idx); AtomicReader reader = atomicReaderContext.reader(); int locDocId = docId - atomicReaderContext.docBase; JSONObject src = new JSONObject(); String idFieldName = config.getSchema().getId(); if (id != null) { src.put(idFieldName, id); } else { src.put(idFieldName, FieldCache.DEFAULT.getTerms(reader, idFieldName, false).get(locDocId).utf8ToString()); } List<LindenFieldSchema> fields = new ArrayList<>(); if (sourceFields != null && !sourceFields.isEmpty()) { for (String sourceField : sourceFields) { if (sourceField.equals(idFieldName)) { continue; } LindenFieldSchema fieldSchema = config.getFieldSchema(sourceField); fields.add(fieldSchema); } } else { fields.addAll(config.getSchema().getFields()); } Map<String, LindenFieldSchema> storedFields = new HashMap<>(); for (LindenFieldSchema fieldSchema : fields) { String name = fieldSchema.getName(); boolean fieldCache = false; if (fieldSchema.isMulti()) { /** * multi-field has multiple values, each value is indexed to the document according to field type * multi-field source value is in JSONArray format, something like "["MI4","MI Note","RedMI3"]" * multi-field source value is stored in BinaryDocValues */ String blob = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString(); if (StringUtils.isNotEmpty(blob)) { src.put(name, JSON.parseArray(blob)); } } else if (fieldSchema.isDocValues()) { fieldCache = true; } else if (fieldSchema.isIndexed() && fieldSchema.isStored()) { // field cache doesn't support tokenized string field if (config.isEnableSourceFieldCache() && !possibleTokenizedString(fieldSchema)) { fieldCache = true; } else { storedFields.put(name, fieldSchema); } } else if (fieldSchema.isIndexed()) { if (!possibleTokenizedString(fieldSchema)) { fieldCache = true; } } else if (fieldSchema.isStored()) { storedFields.put(name, fieldSchema); } if (fieldCache) { Object val; switch (fieldSchema.getType()) { case STRING: case FACET: val = FieldCache.DEFAULT.getTerms(reader, name, false).get(locDocId).utf8ToString(); String v = (String) val; fieldCache = !v.isEmpty() || actualContain(reader, name, locDocId); break; case INTEGER: val = FieldCache.DEFAULT.getInts(reader, name, false).get(locDocId); fieldCache = ((int) val) != 0 || actualContain(reader, name, locDocId); break; case LONG: val = FieldCache.DEFAULT.getLongs(reader, name, false).get(locDocId); fieldCache = ((long) val != 0) || actualContain(reader, name, locDocId); break; case FLOAT: val = FieldCache.DEFAULT.getFloats(reader, name, false).get(locDocId); fieldCache = ((float) val != 0) || actualContain(reader, name, locDocId); break; case DOUBLE: val = FieldCache.DEFAULT.getDoubles(reader, name, false).get(locDocId); fieldCache = ((double) val != 0) || actualContain(reader, name, locDocId); break; default: throw new IllegalStateException("Unsupported linden type"); } if (fieldCache) { src.put(name, val); } } } if (!storedFields.isEmpty()) { Document doc = indexSearcher.doc(docId, storedFields.keySet()); for (IndexableField field : doc.getFields()) { String name = field.name(); LindenFieldSchema schema = storedFields.get(name); Object obj = src.get(name); Object val = parseLindenValue(field.stringValue(), storedFields.get(name).getType()); if (obj == null) { if (schema.isMulti()) { JSONArray array = new JSONArray(); array.add(val); src.put(name, array); } else { src.put(name, val); } } else if (obj instanceof JSONArray) { ((JSONArray) obj).add(val); } else { JSONArray array = new JSONArray(); array.add(obj); array.add(val); src.put(name, array); } } } return src.toJSONString(); }
From source file:io.crate.execution.engine.collect.collectors.ScoreDocRowFunction.java
License:Apache License
@Nullable @Override//from ww w . j a va 2s . co m public Row apply(@Nullable ScoreDoc input) { if (input == null) { return null; } FieldDoc fieldDoc = (FieldDoc) input; scorer.score(fieldDoc.score); for (OrderByCollectorExpression orderByCollectorExpression : orderByCollectorExpressions) { orderByCollectorExpression.setNextFieldDoc(fieldDoc); } List<LeafReaderContext> leaves = indexReader.leaves(); int readerIndex = ReaderUtil.subIndex(fieldDoc.doc, leaves); LeafReaderContext subReaderContext = leaves.get(readerIndex); int subDoc = fieldDoc.doc - subReaderContext.docBase; for (LuceneCollectorExpression<?> expression : expressions) { try { expression.setNextReader(subReaderContext); expression.setNextDocId(subDoc); } catch (IOException e) { throw new RuntimeException(e); } } return inputRow; }
From source file:io.crate.execution.engine.fetch.FetchCollector.java
License:Apache License
public StreamBucket collect(IntContainer docIds) throws IOException { StreamBucket.Builder builder = new StreamBucket.Builder(streamers, ramAccountingContext); for (IntCursor cursor : docIds) { int docId = cursor.value; int readerIndex = ReaderUtil.subIndex(docId, readerContexts); LeafReaderContext subReaderContext = readerContexts.get(readerIndex); setNextDocId(subReaderContext, docId - subReaderContext.docBase); builder.add(row);//from w w w .j a va 2s. com } return builder.build(); }
From source file:io.crate.operation.collect.collectors.ScoreDocRowFunction.java
License:Apache License
@Nullable @Override//w ww . j a va2s. c om public Row apply(@Nullable ScoreDoc input) { if (input == null) { return null; } FieldDoc fieldDoc = (FieldDoc) input; scorer.score(fieldDoc.score); for (OrderByCollectorExpression orderByCollectorExpression : orderByCollectorExpressions) { orderByCollectorExpression.setNextFieldDoc(fieldDoc); } List<AtomicReaderContext> leaves = indexReader.leaves(); int readerIndex = ReaderUtil.subIndex(fieldDoc.doc, leaves); AtomicReaderContext subReaderContext = leaves.get(readerIndex); int subDoc = fieldDoc.doc - subReaderContext.docBase; for (LuceneCollectorExpression<?> expression : expressions) { expression.setNextReader(subReaderContext); expression.setNextDocId(subDoc); } return inputRow; }