List of usage examples for org.apache.lucene.index SegmentReader directory
public Directory directory()
From source file:org.apache.blur.filter.FilterCache.java
License:Apache License
private Directory getDirectory(SegmentReader reader) { return reader.directory(); }
From source file:org.apache.blur.lucene.warmup.IndexWarmup.java
License:Apache License
private Directory getDirectory(IndexReader reader, String segmentName, String context) { if (reader instanceof AtomicReader) { return getDirectory((AtomicReader) reader, segmentName, context); }/*from w w w . j a v a 2s . com*/ for (IndexReaderContext ctext : reader.getContext().leaves()) { if (_isClosed.get()) { LOG.info("Context [{0}] index closed", context); return null; } AtomicReaderContext atomicReaderContext = (AtomicReaderContext) ctext; AtomicReader atomicReader = atomicReaderContext.reader(); if (atomicReader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) atomicReader; if (segmentReader.getSegmentName().equals(segmentName)) { return segmentReader.directory(); } } } return null; }
From source file:org.apache.blur.lucene.warmup.IndexWarmup.java
License:Apache License
private Directory getDirectory(AtomicReader atomicReader, String segmentName, String context) { if (atomicReader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) atomicReader; if (segmentReader.getSegmentName().equals(segmentName)) { return segmentReader.directory(); }//from w ww. j a v a 2 s. c om } return null; }
From source file:org.apache.blur.lucene.warmup.IndexWarmup.java
License:Apache License
public Map<String, List<IndexTracerResult>> sampleIndex(AtomicReader atomicReader, String context) throws IOException { Map<String, List<IndexTracerResult>> results = new HashMap<String, List<IndexTracerResult>>(); if (atomicReader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) atomicReader; Directory directory = segmentReader.directory(); if (!(directory instanceof TraceableDirectory)) { LOG.info("Context [{1}] cannot warmup directory [{0}] needs to be a TraceableDirectory.", directory, context);//from w ww . j a va2 s. c o m return results; } IndexTracer tracer = new IndexTracer((TraceableDirectory) directory, _maxSampleSize); String fileName = getSampleFileName(segmentReader.getSegmentName()); List<IndexTracerResult> segmentTraces = new ArrayList<IndexTracerResult>(); if (directory.fileExists(fileName)) { IndexInput input = directory.openInput(fileName, IOContext.READONCE); segmentTraces = read(input); input.close(); } else { Fields fields = atomicReader.fields(); for (String field : fields) { LOG.debug("Context [{1}] sampling field [{0}].", field, context); Terms terms = fields.terms(field); boolean hasOffsets = terms.hasOffsets(); boolean hasPayloads = terms.hasPayloads(); boolean hasPositions = terms.hasPositions(); tracer.initTrace(segmentReader, field, hasPositions, hasPayloads, hasOffsets); IndexTracerResult result = tracer.runTrace(terms); segmentTraces.add(result); } if (_isClosed.get()) { LOG.info("Context [{0}] index closed", context); return null; } IndexOutput output = directory.createOutput(fileName, IOContext.DEFAULT); write(segmentTraces, output); output.close(); } results.put(segmentReader.getSegmentName(), segmentTraces); } return results; }
From source file:org.apache.solr.codecs.onsql.ONSQLStoredFieldsWriter.java
License:Apache License
@Override public int merge(MergeState mergeState) throws IOException { log.debug("merge has been called"); // check for our primary key completeness /*/*from w w w. j a va 2 s .com*/ * instead of copying stored fields we need to update mapping table and remove non-existing entries * several cases: doc may be deleted and non existing in new segment * doc may be deleted, yet similar doc might be found * thing is, these fields might be updated, so using linkup is not so productive * because in fact there can be two diffirent versions of documents present after the update * one, old version, deleted now, and one new version * hmmm, since we're using custom primary key, it means on the update entry with our fields will be overwritten, * also, worth considering, that merge procedure only creates new segment based on previous ones, * deletion of old segments is happening later, via Directory.deletefile API * have to check on this. * * first, since it's merge, we assume entries in kvstore already existing * so, first we search for our segment key: segID-docID->customPK, then copy it * all other fields will be left unchanged * * have to consider the failure case, when merge might be aborted in the middle of it * since merge first copies data to new segment, we are safe here, as in worst case we will lose just the links * for this new segment * */ int docCount = 0; int idx = 0; String new_segment_kvstore_key_part = Base62Converter .fromBase10(mergeState.segmentInfo.name.concat(STORED_FIELDS_EXTENSION).hashCode()); for (AtomicReader reader : mergeState.readers) { final SegmentReader seg_reader = mergeState.matchingSegmentReaders[idx++]; ONSQLStoredFieldsReader fields_reader = null; if (seg_reader != null) { final StoredFieldsReader fieldsReader = seg_reader.getFieldsReader(); // we can do the merge only if the matching reader is also a ONSQLStoredFieldsReader if (fieldsReader != null && fieldsReader instanceof ONSQLStoredFieldsReader) { fields_reader = (ONSQLStoredFieldsReader) fieldsReader; } else throw new IllegalStateException("incorrect fieldsreader class at merge procedure, is " + fieldsReader.getClass().getName() + ", only ONSQLStoredFieldsReader is accepted"); } String current_segment = seg_reader.getSegmentName().concat(STORED_FIELDS_EXTENSION); log.debug("current segment name = " + seg_reader.getSegmentName()); // we assume reader always uses the instance of FSDirectory, so that we can extract directory path String dir = ((FSDirectory) seg_reader.directory()).getDirectory().getAbsolutePath(); final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); boolean canmerge = ONSQLKVstoreHandler.getInstance().getAllowWriting(this.tdir); for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) { ++docCount; if (canmerge) { // retrieve link using our doc id Key doc_key = Key.createKey(Arrays.asList(Base62Converter.fromBase10(dir.hashCode()), Base62Converter.fromBase10(current_segment.hashCode()), Base62Converter.fromBase10(i))); Iterator<Key> kv_it = kvstore.multiGetKeysIterator(Direction.FORWARD, 1, doc_key, null, Depth.PARENT_AND_DESCENDANTS); if (!kv_it.hasNext()) throw new IllegalStateException( "unable to get doc segment key using key id=" + doc_key.toString()); Key entry_key = kv_it.next(); // create link to doc id for new segment Key link_key = Key.createKey(Arrays.asList(Base62Converter.fromBase10(dir.hashCode()), new_segment_kvstore_key_part, Base62Converter.fromBase10(numDocsWritten)), entry_key.getMinorPath()); log.debug("putting link key=" + link_key.toString()); kvstore.put(link_key, Value.EMPTY_VALUE); // next add backref Key backref_key = Key.createKey(entry_key.getMinorPath(), Arrays.asList("_1", Base62Converter.fromBase10(dir.hashCode()), new_segment_kvstore_key_part, Base62Converter.fromBase10(numDocsWritten))); kvstore.put(backref_key, Value.EMPTY_VALUE); log.debug("putting backref key=" + backref_key.toString()); //addDocument(doc, mergeState.fieldInfos); } else log.debug("merging is not allowed, skipping doc with internal id=" + i); ++numDocsWritten; mergeState.checkAbort.work(300); } } finish(mergeState.fieldInfos, docCount); return docCount; }
From source file:org.elasticsearch.index.shard.ShardUtils.java
License:Apache License
@Nullable private static ShardId extractShardId(SegmentReader reader) { if (reader != null) { assert reader.getRefCount() > 0 : "SegmentReader is already closed"; // reader.directory doesn't call ensureOpen for internal reasons. Store.StoreDirectory storeDir = DirectoryUtils.getStoreDirectory(reader.directory()); if (storeDir != null) { return storeDir.shardId(); }/*w ww.j a v a 2 s . c om*/ } return null; }
From source file:perf.DiskUsage.java
License:Apache License
static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception { Map<String, FieldStats> stats = new HashMap<>(); Map<String, String> dvSuffixes = new HashMap<>(); Map<String, String> postingsSuffixes = new HashMap<>(); for (FieldInfo field : reader.getFieldInfos()) { FieldStats fieldStats = new FieldStats(field.name); stats.put(field.name, fieldStats); Map<String, String> attributes = field.attributes(); if (attributes != null) { String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY); if (postingsSuffix != null) { postingsSuffixes.put(postingsSuffix, field.name); }/*from w ww.ja va 2s .com*/ String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY); if (dvSuffix != null) { dvSuffixes.put(dvSuffix, field.name); } } Bits docsWithField = reader.getDocsWithField(field.name); if (docsWithField != null) { int count = 0; for (int docID = 0; docID < reader.maxDoc(); docID++) { if (docsWithField.get(docID)) { count++; } } fieldStats.docCountWithField = count; } } Directory directory = reader.directory(); for (String file : directory.listAll()) { String suffix = parseSuffix(file); long bytes = directory.fileLength(file); if (suffix != null) { switch (IndexFileNames.getExtension(file)) { case "dvd": case "dvm": stats.get(dvSuffixes.get(suffix)).dvBytes += bytes; break; case "tim": case "tip": stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes; break; case "doc": stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes; break; case "pos": case "pay": stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes; break; default: throw new AssertionError("unexpected suffixed file: " + file); } } else { // not a per-field file, but we can hackishly do this for the points case. if ("dii".equals(IndexFileNames.getExtension(file))) { System.err.println( "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!"); try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) { // fail hard if its not exactly the version we do this hack for. CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0, reader.getSegmentInfo().info.getId(), ""); int fieldCount = in.readVInt(); // strangely, bkd offsets are not in any guaranteed order TreeMap<Long, String> offsetToField = new TreeMap<>(); for (int i = 0; i < fieldCount; i++) { int field = in.readVInt(); long offset = in.readVLong(); offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name); } // now we can traverse in order long previousOffset = 0; for (Map.Entry<Long, String> entry : offsetToField.entrySet()) { long offset = entry.getKey(); String field = entry.getValue(); stats.get(field).pointsBytes += (offset - previousOffset); previousOffset = offset; } CodecUtil.checkFooter(in); } } } } return new TreeSet<FieldStats>(stats.values()); }
From source file:perf.DiskUsage.java
License:Apache License
static void report(SegmentReader reader, Set<FieldStats> stats) throws Exception { long totalSize = 0; long storeSize = 0; long vectorSize = 0; long normsSize = 0; long dvsSize = 0; long postingsSize = 0; long pointsSize = 0; long termsSize = 0; long proxSize = 0; for (String file : reader.directory().listAll()) { long size = reader.directory().fileLength(file); totalSize += size;/*from w w w .j av a2s.c o m*/ String extension = IndexFileNames.getExtension(file); if (extension != null) { switch (extension) { case "fdt": case "fdx": storeSize += size; break; case "tvx": case "tvd": vectorSize += size; break; case "nvd": case "nvm": normsSize += size; break; case "dvd": case "dvm": dvsSize += size; break; case "tim": case "tip": termsSize += size; break; case "pos": case "pay": proxSize += size; break; case "doc": postingsSize += size; break; case "dii": case "dim": pointsSize += size; break; } } } DecimalFormat df = new DecimalFormat("#,##0"); System.out.printf("total disk: %15s\n", df.format(totalSize)); System.out.printf("num docs: %15s\n", df.format(reader.numDocs())); System.out.printf("stored fields: %15s\n", df.format(storeSize)); System.out.printf("term vectors: %15s\n", df.format(vectorSize)); System.out.printf("norms: %15s\n", df.format(normsSize)); System.out.printf("docvalues: %15s\n", df.format(dvsSize)); System.out.printf("postings: %15s\n", df.format(postingsSize)); System.out.printf("prox: %15s\n", df.format(proxSize)); System.out.printf("points: %15s\n", df.format(pointsSize)); System.out.printf("terms: %15s\n", df.format(termsSize)); System.out.println(); int maxFieldNameLength = 0; for (FieldStats field : stats) { maxFieldNameLength = Math.max(maxFieldNameLength, field.name.length()); } // Make sure we format to enough room for the max field length: String fieldNameFormat = "%" + maxFieldNameLength + "s"; System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %15s %20s\n", "field", "total", "terms dict", "postings", "proximity", "points", "docvalues", "% with dv", "features"); System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %15s %20s\n", "=====", "=====", "==========", "========", "=========", "=========", "=========", "========", "========"); for (FieldStats field : stats) { System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %14.1f%% %20s\n", field.name, df.format(field.totalBytes()), df.format(field.termsBytes), df.format(field.postingsBytes), df.format(field.proxBytes), df.format(field.pointsBytes), df.format(field.dvBytes), (100.0 * field.docCountWithField) / reader.maxDoc(), features(reader.getFieldInfos().fieldInfo(field.name))); } }
From source file:perf.DiskUsage.java
License:Apache License
static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception { Map<String, FieldStats> stats = new HashMap<>(); Map<String, String> dvSuffixes = new HashMap<>(); Map<String, String> postingsSuffixes = new HashMap<>(); for (FieldInfo field : reader.getFieldInfos()) { FieldStats fieldStats = new FieldStats(field.name); stats.put(field.name, fieldStats); Map<String, String> attributes = field.attributes(); if (attributes != null) { String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY); if (postingsSuffix != null) { postingsSuffixes.put(postingsSuffix, field.name); }/*from ww w . j a va 2s . c om*/ String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY); if (dvSuffix != null) { dvSuffixes.put(dvSuffix, field.name); } } DocIdSetIterator docsWithField; switch (field.getDocValuesType()) { case NUMERIC: docsWithField = reader.getNumericDocValues(field.name); break; case BINARY: docsWithField = reader.getBinaryDocValues(field.name); break; case SORTED: docsWithField = reader.getSortedDocValues(field.name); break; case SORTED_NUMERIC: docsWithField = reader.getSortedNumericDocValues(field.name); break; case SORTED_SET: docsWithField = reader.getSortedSetDocValues(field.name); break; case NONE: docsWithField = null; break; default: docsWithField = null; break; } if (docsWithField != null) { int count = 0; while (docsWithField.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } fieldStats.docCountWithField = count; } } Directory directory = reader.directory(); for (String file : directory.listAll()) { String suffix = parseSuffix(file); long bytes = directory.fileLength(file); if (suffix != null) { switch (IndexFileNames.getExtension(file)) { case "dvd": case "dvm": stats.get(dvSuffixes.get(suffix)).dvBytes += bytes; break; case "tim": case "tip": stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes; break; case "doc": stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes; break; case "pos": case "pay": stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes; break; default: throw new AssertionError("unexpected suffixed file: " + file); } } else { // not a per-field file, but we can hackishly do this for the points case. if ("dii".equals(IndexFileNames.getExtension(file))) { System.err.println( "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!"); try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) { // fail hard if its not exactly the version we do this hack for. CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0, reader.getSegmentInfo().info.getId(), ""); int fieldCount = in.readVInt(); // strangely, bkd offsets are not in any guaranteed order TreeMap<Long, String> offsetToField = new TreeMap<>(); for (int i = 0; i < fieldCount; i++) { int field = in.readVInt(); long offset = in.readVLong(); offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name); } // now we can traverse in order long previousOffset = 0; for (Map.Entry<Long, String> entry : offsetToField.entrySet()) { long offset = entry.getKey(); String field = entry.getValue(); stats.get(field).pointsBytes += (offset - previousOffset); previousOffset = offset; } CodecUtil.checkFooter(in); } } } } return new TreeSet<FieldStats>(stats.values()); }