List of usage examples for org.apache.lucene.index IndexFileNames segmentFileName
public static String segmentFileName(String segmentName, String segmentSuffix, String ext)
From source file:com.lucure.core.codec.CompressingStoredFieldsReader.java
License:Apache License
/** Sole constructor. */ public CompressingStoredFieldsReader(Directory d, SegmentInfo si, String segmentSuffix, FieldInfos fn, IOContext context, String formatName, CompressionMode compressionMode) throws IOException { this.compressionMode = compressionMode; final String segment = si.name; boolean success = false; fieldInfos = fn;//from w ww . j ava2 s . c o m numDocs = si.getDocCount(); ChecksumIndexInput indexStream = null; try { final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION); final String fieldsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION); // Load the index into memory indexStream = d.openChecksumInput(indexStreamFN, context); final String codecNameIdx = formatName + CODEC_SFX_IDX; version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT); assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer(); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); long maxPointer = -1; if (version >= VERSION_CHECKSUM) { maxPointer = indexStream.readVLong(); CodecUtil.checkFooter(indexStream); } else { CodecUtil.checkEOF(indexStream); } indexStream.close(); indexStream = null; // Open the data file and read metadata fieldsStream = d.openInput(fieldsStreamFN, context); if (version >= VERSION_CHECKSUM) { if (maxPointer + CodecUtil.footerLength() != fieldsStream.length()) { throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.length()); } } else { maxPointer = fieldsStream.length(); } this.maxPointer = maxPointer; final String codecNameDat = formatName + CODEC_SFX_DAT; final int fieldsVersion = CodecUtil.checkHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT); if (version != fieldsVersion) { throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion); } assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer(); if (version >= VERSION_BIG_CHUNKS) { chunkSize = fieldsStream.readVInt(); } else { chunkSize = -1; } packedIntsVersion = fieldsStream.readVInt(); decompressor = compressionMode.newDecompressor(); this.bytes = new BytesRef(); if (version >= VERSION_CHECKSUM) { // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(fieldsStream); } success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(this, indexStream); } } }
From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java
License:Apache License
/** Sole constructor. */ public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context, String formatName, CompressionMode compressionMode, int chunkSize) throws IOException { assert directory != null; this.directory = directory; this.segment = si.name; this.segmentSuffix = segmentSuffix; this.compressionMode = compressionMode; this.compressor = compressionMode.newCompressor(); this.chunkSize = chunkSize; this.docBase = 0; this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize); this.numStoredFields = new int[16]; this.endOffsets = new int[16]; this.numBufferedDocs = 0; boolean success = false; IndexOutput indexStream = directory.createOutput( IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context); try {/*from ww w . j a va 2 s . c o m*/ fieldsStream = directory.createOutput( IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context); final String codecNameIdx = formatName + CODEC_SFX_IDX; final String codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.writeHeader(fieldsStream, codecNameDat, VERSION_CURRENT); assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer(); assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer(); indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; fieldsStream.writeVInt(chunkSize); fieldsStream.writeVInt(PackedInts.VERSION_CURRENT); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(indexStream); abort(); } } }
From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java
License:Apache License
@Override public void abort() { IOUtils.closeWhileHandlingException(this); IOUtils.deleteFilesIgnoringExceptions(directory, IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION)); }
From source file:com.lucure.core.codec.LucurePostingsWriter.java
License:Apache License
/** Creates a postings writer with the specified PackedInts overhead ratio */ // TODO: does this ctor even make sense? public LucurePostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) throws IOException { super();/* w w w .jav a 2s. c o m*/ docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, LucurePostingsFormat.DOC_EXTENSION), state.context); IndexOutput posOut = null; IndexOutput payOut = null; boolean success = false; try { CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT); forUtil = new ForUtil(acceptableOverheadRatio, docOut); if (state.fieldInfos.hasProx()) { posDeltaBuffer = new int[MAX_DATA_SIZE]; posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, LucurePostingsFormat.POS_EXTENSION), state.context); CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT); if (state.fieldInfos.hasPayloads()) { payloadBytes = new byte[128]; payloadLengthBuffer = new int[MAX_DATA_SIZE]; } else { payloadBytes = null; payloadLengthBuffer = null; } if (state.fieldInfos.hasOffsets()) { offsetStartDeltaBuffer = new int[MAX_DATA_SIZE]; offsetLengthBuffer = new int[MAX_DATA_SIZE]; } else { offsetStartDeltaBuffer = null; offsetLengthBuffer = null; } if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) { payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, LucurePostingsFormat.PAY_EXTENSION), state.context); CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT); } } else { posDeltaBuffer = null; payloadLengthBuffer = null; offsetStartDeltaBuffer = null; offsetLengthBuffer = null; payloadBytes = null; } this.payOut = payOut; this.posOut = posOut; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(docOut, posOut, payOut); } } docDeltaBuffer = new int[MAX_DATA_SIZE]; freqBuffer = new int[MAX_DATA_SIZE]; // TODO: should we try skipping every 2/4 blocks...? skipWriter = new LucureSkipWriter(maxSkipLevels, BLOCK_SIZE, state.segmentInfo.getDocCount(), docOut, posOut, payOut); encoded = new byte[MAX_ENCODED_SIZE]; }
From source file:com.rocana.lucene.codec.v1.RocanaBlockTreeTermsReader.java
License:Apache License
/** Sole constructor. */ public RocanaBlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { boolean success = false; IndexInput indexIn = null;/*from w ww . j av a2 s . co m*/ this.postingsReader = postingsReader; this.segment = state.segmentInfo.name; String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION); try { termsIn = state.directory.openInput(termsName, state.context); version = CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); if (version < VERSION_AUTO_PREFIX_TERMS) { // Old (pre-5.2.0) index, no auto-prefix terms: this.anyAutoPrefixTerms = false; } else if (version == VERSION_AUTO_PREFIX_TERMS) { // 5.2.x index, might have auto-prefix terms: this.anyAutoPrefixTerms = true; } else { // 5.3.x index, we record up front if we may have written any auto-prefix terms: assert version >= VERSION_AUTO_PREFIX_TERMS_COND; byte b = termsIn.readByte(); if (b == 0) { this.anyAutoPrefixTerms = false; } else if (b == 1) { this.anyAutoPrefixTerms = true; } else { throw new CorruptIndexException("invalid anyAutoPrefixTerms: expected 0 or 1 but got " + b, termsIn); } } String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION); indexIn = state.directory.openInput(indexName, state.context); CodecUtil.checkIndexHeader(indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix); // IMPORTANT: comment out this one line to prevent checksumming the entire file. // This is the reason we have a custom Lucene codec and forked Lucene classes. //CodecUtil.checksumEntireFile(indexIn); // Have PostingsReader init itself postingsReader.init(termsIn, state); // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(termsIn); // Read per-field details seekDir(termsIn, dirOffset); seekDir(indexIn, indexDirOffset); final int numFields = termsIn.readVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields, termsIn); } for (int i = 0; i < numFields; ++i) { final int field = termsIn.readVInt(); final long numTerms = termsIn.readVLong(); if (numTerms <= 0) { throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn); } final int numBytes = termsIn.readVInt(); if (numBytes < 0) { throw new CorruptIndexException( "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn); } final BytesRef rootCode = new BytesRef(new byte[numBytes]); termsIn.readBytes(rootCode.bytes, 0, numBytes); rootCode.length = numBytes; final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); if (fieldInfo == null) { throw new CorruptIndexException("invalid field number: " + field, termsIn); } final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong(); final long sumDocFreq = termsIn.readVLong(); final int docCount = termsIn.readVInt(); final int longsSize = termsIn.readVInt(); if (longsSize < 0) { throw new CorruptIndexException( "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn); } BytesRef minTerm = readBytesRef(termsIn); BytesRef maxTerm = readBytesRef(termsIn); if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs throw new CorruptIndexException( "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), termsIn); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn); } final long indexStartFP = indexIn.readVLong(); RocanaFieldReader previous = fields.put(fieldInfo.name, new RocanaFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn); } } indexIn.close(); success = true; } finally { if (!success) { // this.close() will close in: IOUtils.closeWhileHandlingException(indexIn, this); } } }
From source file:com.sindicetech.siren.index.codecs.siren10.Siren10PostingsReader.java
License:Open Source License
public Siren10PostingsReader(final Directory dir, final SegmentInfo segmentInfo, final IOContext context, final String segmentSuffix, final Siren10BlockStreamFactory factory) throws IOException { boolean success = false; try {/*www . jav a 2 s . c om*/ final String docFileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Siren10PostingsFormat.DOC_EXTENSION); docIn = factory.openDocsFreqInput(dir, docFileName, context); nodIn = factory.openNodInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Siren10PostingsFormat.NOD_EXTENSION), context); skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Siren10PostingsFormat.SKIP_EXTENSION), context); posIn = factory.openPosInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Siren10PostingsFormat.POS_EXTENSION), context); success = true; } finally { if (!success) { this.close(); } } }
From source file:com.sindicetech.siren.index.codecs.siren10.Siren10PostingsWriter.java
License:Open Source License
public Siren10PostingsWriter(final SegmentWriteState state, final int blockSkipInterval, final Siren10BlockStreamFactory factory) throws IOException { nodOut = null;// w w w . jav a 2 s .c o m nodIndex = null; posOut = null; posIndex = null; boolean success = false; try { this.blockSkipInterval = blockSkipInterval; this.blockSkipMinimum = blockSkipInterval; /* set to the same for now */ final String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Siren10PostingsFormat.DOC_EXTENSION); docOut = factory.createDocsFreqOutput(state.directory, docFileName, state.context); docWriter = docOut.getBlockWriter(); docIndex = docOut.index(); this.maxBlockSize = docWriter.getMaxBlockSize(); final String nodFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Siren10PostingsFormat.NOD_EXTENSION); nodOut = factory.createNodOutput(state.directory, nodFileName, state.context); nodWriter = nodOut.getBlockWriter(); nodIndex = nodOut.index(); final String posFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Siren10PostingsFormat.POS_EXTENSION); posOut = factory.createPosOutput(state.directory, posFileName, state.context); posWriter = posOut.getBlockWriter(); posIndex = posOut.index(); final String skipFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Siren10PostingsFormat.SKIP_EXTENSION); skipOut = state.directory.createOutput(skipFileName, state.context); totalNumDocs = state.segmentInfo.getDocCount(); // EStimate number of blocks that will be written final int numBlocks = (int) Math.ceil(totalNumDocs / (double) docWriter.getMaxBlockSize()); skipWriter = new Siren10SkipListWriter(blockSkipInterval, maxSkipLevels, numBlocks, docOut); docWriter.setNodeBlockIndex(nodIndex); docWriter.setPosBlockIndex(posIndex); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(docOut, skipOut, nodOut, posOut); } } }
From source file:com.vmware.xenon.services.common.Lucene60FieldInfosFormatWithCache.java
License:Open Source License
@Override public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException { ////////////////////// boolean checkInfosCache = true; ////////////////////// final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION); try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) { Throwable priorE = null;//from ww w. j a v a 2s.c o m FieldInfo[] infos = null; try { CodecUtil.checkIndexHeader(input, Lucene60FieldInfosFormatWithCache.CODEC_NAME, Lucene60FieldInfosFormatWithCache.FORMAT_START, Lucene60FieldInfosFormatWithCache.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix); final int size = input.readVInt(); //read in the size infos = new FieldInfo[size]; // previous field's attribute map, we share when possible: Map<String, String> lastAttributes = Collections.emptyMap(); for (int i = 0; i < size; i++) { String name = input.readString(); final int fieldNumber = input.readVInt(); if (fieldNumber < 0) { throw new CorruptIndexException( "invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input); } byte bits = input.readByte(); boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0; boolean omitNorms = (bits & OMIT_NORMS) != 0; boolean storePayloads = (bits & STORE_PAYLOADS) != 0; final IndexOptions indexOptions = getIndexOptions(input, input.readByte()); // DV Types are packed in one byte final DocValuesType docValuesType = getDocValuesType(input, input.readByte()); final long dvGen = input.readLong(); Map<String, String> attributes = input.readMapOfStrings(); // just use the last field's map if its the same if (attributes.equals(lastAttributes)) { attributes = lastAttributes; } lastAttributes = attributes; int pointDimensionCount = input.readVInt(); int pointNumBytes; if (pointDimensionCount != 0) { pointNumBytes = input.readVInt(); } else { pointNumBytes = 0; } try { ////////////////////// if (dvGen >= 0) { // skip fields with docValues, they don't cache well checkInfosCache = false; infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, attributes, pointDimensionCount, pointNumBytes); } else { infos[i] = this.cache.dedupFieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, attributes, pointDimensionCount, pointNumBytes); } ////////////////////// } catch (IllegalStateException e) { throw new CorruptIndexException( "invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e); } } } catch (Throwable exception) { priorE = exception; } finally { CodecUtil.checkFooter(input, priorE); } ////////////////////// if (checkInfosCache) { return this.cache.dedupFieldInfos(infos); } else { FieldInfos result = new FieldInfos(infos); this.cache.trimFieldInfos(result); return result; } ////////////////////// } }
From source file:com.vmware.xenon.services.common.Lucene60FieldInfosFormatWithCache.java
License:Open Source License
@Override public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION); try (IndexOutput output = directory.createOutput(fileName, context)) { CodecUtil.writeIndexHeader(output, Lucene60FieldInfosFormatWithCache.CODEC_NAME, Lucene60FieldInfosFormatWithCache.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix); output.writeVInt(infos.size());//from w w w . j av a 2s . c o m for (FieldInfo fi : infos) { fi.checkConsistency(); output.writeString(fi.name); output.writeVInt(fi.number); byte bits = 0x0; if (fi.hasVectors()) { bits |= STORE_TERMVECTOR; } if (fi.omitsNorms()) { bits |= OMIT_NORMS; } if (fi.hasPayloads()) { bits |= STORE_PAYLOADS; } output.writeByte(bits); output.writeByte(indexOptionsByte(fi.getIndexOptions())); // pack the DV type and hasNorms in one byte output.writeByte(docValuesByte(fi.getDocValuesType())); output.writeLong(fi.getDocValuesGen()); output.writeMapOfStrings(fi.attributes()); int pointDimensionCount = fi.getPointDimensionCount(); output.writeVInt(pointDimensionCount); if (pointDimensionCount != 0) { output.writeVInt(fi.getPointNumBytes()); } } CodecUtil.writeFooter(output); } }
From source file:it.agilelab.bigdata.spark.search.impl.BigChunksRAMDirectory.java
License:Apache License
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException { ensureOpen();//from w w w .ja v a 2 s . c o m // Make the file first... BigChunksRAMFile file = newRAMFile(); // ... then try to find a unique name for it: while (true) { String name = IndexFileNames.segmentFileName(prefix, suffix + "_" + Long.toString(nextTempFileCounter.getAndIncrement(), Character.MAX_RADIX), "tmp"); if (fileMap.putIfAbsent(name, file) == null) { return new BigChunksRAMOutputStream(name, file, true); } } }