Example usage for org.apache.hadoop.fs FSDataOutputStream getPos

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataOutputStream getPos.

Prototype

public long getPos()

Source Link

Document

Get the current position in the output stream.

Usage

From source file:org.apache.tajo.storage.thirdparty.parquet.ParquetFileWriter.java

License:Apache License

private static void serializeFooter(ParquetMetadata footer, FSDataOutputStream out) throws IOException {
    long footerIndex = out.getPos();
    parquet.format.FileMetaData parquetMetadata = new ParquetMetadataConverter()
            .toParquetMetadata(CURRENT_VERSION, footer);
    writeFileMetaData(parquetMetadata, out);
    if (DEBUG)/*from  w ww  .ja  va2  s.com*/
        LOG.debug(out.getPos() + ": footer length = " + (out.getPos() - footerIndex));
    BytesUtils.writeIntLittleEndian(out, (int) (out.getPos() - footerIndex));
    out.write(MAGIC);
}

From source file:org.apache.tajo.worker.TestFetcherWithTajoPullServer.java

License:Apache License

@Test
public void testGetRangeShuffle() throws IOException {
    Random rnd = new Random();
    QueryId queryId = QueryIdFactory.NULL_QUERY_ID;
    String sid = "1";
    String partId = "1";
    String taskId = "1";
    String attemptId = "0";

    Path queryBaseDir = PullServerUtil.getBaseOutputDir(queryId.toString(), sid);
    Path outDir = StorageUtil.concatPath(queryBaseDir, taskId + "_" + attemptId, "output");
    Path dataPath = StorageUtil.concatPath(outDir, "output");
    Path indexPath = StorageUtil.concatPath(outDir, "index");

    List<String> strings = new ArrayList<>(100);
    for (int i = 0; i < 100; i++) {
        strings.add("" + rnd.nextInt());
    }/*from  w w w .  j  ava2s  .  c o m*/
    Collections.sort(strings);

    Path inputPath = new Path(INPUT_DIR, dataPath);
    FileSystem fs = FileSystem.getLocal(conf);
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    final FSDataOutputStream stream = fs.create(inputPath, true);
    BSTIndex index = new BSTIndex(conf);
    Schema schema = SchemaBuilder.builder().addAll(new Column[] { new Column("rnd", Type.TEXT) }).build();
    SortSpec[] sortSpecs = new SortSpec[] { new SortSpec(schema.getColumn(0)) };
    BSTIndexWriter writer = index.getIndexWriter(new Path(INPUT_DIR, indexPath), BSTIndex.TWO_LEVEL_INDEX,
            schema, new BaseTupleComparator(schema, sortSpecs), true);
    writer.init();

    for (String t : strings) {
        writer.write(new VTuple(new Datum[] { DatumFactory.createText(t) }), stream.getPos());
        stream.write(t.getBytes());
    }
    stream.flush();
    writer.flush();
    stream.close();
    writer.close();

    RangeParam rangeParam = new RangeParam(
            new TupleRange(sortSpecs, new VTuple(new Datum[] { DatumFactory.createText(strings.get(0)) }),
                    new VTuple(new Datum[] { DatumFactory.createText(strings.get(strings.size() - 1)) })),
            true, RowStoreUtil.createEncoder(schema));
    PullServerRequestURIBuilder builder = new PullServerRequestURIBuilder("127.0.0.1", pullserverPort,
            maxUrlLength);
    builder.setRequestType(PullServerConstants.CHUNK_REQUEST_PARAM_STRING).setQueryId(queryId.toString())
            .setEbId(sid).setPartId(partId).setShuffleType(PullServerConstants.RANGE_SHUFFLE_PARAM_STRING)
            .setTaskIds(Lists.newArrayList(Integer.parseInt(taskId)))
            .setAttemptIds(Lists.newArrayList(Integer.parseInt(attemptId)))
            .setStartKeyBase64(new String(Base64.encodeBase64(rangeParam.getStart())))
            .setEndKeyBase64(new String(Base64.encodeBase64(rangeParam.getEnd()))).setLastInclude(true);

    URI uri = builder.build(true).get(0);
    File data = new File(OUTPUT_DIR + "data");

    final AbstractFetcher fetcher = getFetcher(uri, data);

    FileChunk chunk = fetcher.get().get(0);
    assertNotNull(chunk);
    assertNotNull(chunk.getFile());

    FileStatus inStatus = fs.getFileStatus(inputPath);
    FileStatus outStatus = fs.getFileStatus(new Path(chunk.getFile().getAbsolutePath()));

    assertEquals(inStatus.getLen(), outStatus.getLen());
    assertEquals(FetcherState.FETCH_DATA_FINISHED, fetcher.getState());
}

From source file:org.apache.tez.engine.common.sort.impl.dflt.DefaultSorter.java

License:Apache License

protected void spill(int mstart, int mend) throws IOException, InterruptedException {

    //approximate the length of the output file to be the length of the
    //buffer + header lengths for the partitions
    final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart)
            + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {//from  www  .ja v  a 2  s.  c  o  m
        // create spill file
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
        out = rfs.create(filename);

        int spindex = mstart;
        final InMemValBytes value = createInMemValBytes();
        for (int i = 0; i < partitions; ++i) {
            IFile.Writer writer = null;
            try {
                long segmentStart = out.getPos();
                writer = new Writer(job, out, keyClass, valClass, codec, spilledRecordsCounter);
                if (combineProcessor == null) {
                    // spill directly
                    DataInputBuffer key = new DataInputBuffer();
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        final int kvoff = offsetFor(spindex);
                        key.reset(kvbuffer, kvmeta.get(kvoff + KEYSTART),
                                (kvmeta.get(kvoff + VALSTART) - kvmeta.get(kvoff + KEYSTART)));
                        getVBytesForOffset(kvoff, value);
                        writer.append(key, value);
                        ++spindex;
                    }
                } else {
                    int spstart = spindex;
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        ++spindex;
                    }
                    // Note: we would like to avoid the combiner if we've fewer
                    // than some threshold of records for a partition
                    if (spstart != spindex) {
                        TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Running combine processor");
                        }
                        runCombineProcessor(kvIter, writer);
                    }
                }

                // close the writer
                writer.close();

                // record offsets
                final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(),
                        writer.getCompressedLength());
                spillRec.putIndex(rec, i);

                writer = null;
            } finally {
                if (null != writer)
                    writer.close();
            }
        }

        if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
            // create spill index file
            Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills,
                    partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            spillRec.writeToFile(indexFilename, job);
        } else {
            indexCacheList.add(spillRec);
            totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        LOG.info("Finished spill " + numSpills);
        ++numSpills;
    } finally {
        if (out != null)
            out.close();
    }
}

From source file:org.apache.tez.engine.common.sort.impl.dflt.DefaultSorter.java

License:Apache License

/**
 * Handles the degenerate case where serialization fails to fit in
 * the in-memory buffer, so we must spill the record from collect
 * directly to a spill file. Consider this "losing".
 *//*from   w  w w .  j a  v a  2  s. c  o  m*/
private void spillSingleRecord(final Object key, final Object value, int partition) throws IOException {
    long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {
        // create spill file
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
        out = rfs.create(filename);

        // we don't run the combiner for a single record
        for (int i = 0; i < partitions; ++i) {
            IFile.Writer writer = null;
            try {
                long segmentStart = out.getPos();
                // Create a new codec, don't care!
                writer = new IFile.Writer(job, out, keyClass, valClass, codec, spilledRecordsCounter);

                if (i == partition) {
                    final long recordStart = out.getPos();
                    writer.append(key, value);
                    // Note that our map byte count will not be accurate with
                    // compression
                    mapOutputByteCounter.increment(out.getPos() - recordStart);
                }
                writer.close();

                // record offsets
                TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(),
                        writer.getCompressedLength());
                spillRec.putIndex(rec, i);

                writer = null;
            } catch (IOException e) {
                if (null != writer)
                    writer.close();
                throw e;
            }
        }
        if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
            // create spill index file
            Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills,
                    partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            spillRec.writeToFile(indexFilename, job);
        } else {
            indexCacheList.add(spillRec);
            totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        ++numSpills;
    } finally {
        if (out != null)
            out.close();
    }
}

From source file:org.apache.tez.engine.common.sort.impl.dflt.DefaultSorter.java

License:Apache License

private void mergeParts() throws IOException, InterruptedException {
    // get the approximate size of the final output/index files
    long finalOutFileSize = 0;
    long finalIndexFileSize = 0;
    final Path[] filename = new Path[numSpills];
    final TezTaskAttemptID mapId = task.getTaskAttemptId();

    for (int i = 0; i < numSpills; i++) {
        filename[i] = mapOutputFile.getSpillFile(i);
        finalOutFileSize += rfs.getFileStatus(filename[i]).getLen();
    }/*from  w  ww.j  a va2s . co m*/
    if (numSpills == 1) { //the spill is the final output
        sameVolRename(filename[0], mapOutputFile.getOutputFileForWriteInVolume(filename[0]));
        if (indexCacheList.size() == 0) {
            sameVolRename(mapOutputFile.getSpillIndexFile(0),
                    mapOutputFile.getOutputIndexFileForWriteInVolume(filename[0]));
        } else {
            indexCacheList.get(0).writeToFile(mapOutputFile.getOutputIndexFileForWriteInVolume(filename[0]),
                    job);
        }
        sortPhase.complete();
        return;
    }

    // read in paged indices
    for (int i = indexCacheList.size(); i < numSpills; ++i) {
        Path indexFileName = mapOutputFile.getSpillIndexFile(i);
        indexCacheList.add(new TezSpillRecord(indexFileName, job));
    }

    //make correction in the length to include the sequence file header
    //lengths for each partition
    finalOutFileSize += partitions * APPROX_HEADER_LENGTH;
    finalIndexFileSize = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    Path finalOutputFile = mapOutputFile.getOutputFileForWrite(finalOutFileSize);
    Path finalIndexFile = mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize);

    //The output stream for the final single output file
    FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);

    if (numSpills == 0) {
        //create dummy files

        TezSpillRecord sr = new TezSpillRecord(partitions);
        try {
            for (int i = 0; i < partitions; i++) {
                long segmentStart = finalOut.getPos();
                Writer writer = new Writer(job, finalOut, keyClass, valClass, codec, null);
                writer.close();

                TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(),
                        writer.getCompressedLength());
                sr.putIndex(rec, i);
            }
            sr.writeToFile(finalIndexFile, job);
        } finally {
            finalOut.close();
        }
        sortPhase.complete();
        return;
    } else {
        sortPhase.addPhases(partitions); // Divide sort phase into sub-phases
        TezMerger.considerFinalMergeForProgress();

        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        for (int parts = 0; parts < partitions; parts++) {
            //create the segments to be merged
            List<Segment> segmentList = new ArrayList<Segment>(numSpills);
            for (int i = 0; i < numSpills; i++) {
                TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);

                Segment s = new Segment(job, rfs, filename[i], indexRecord.getStartOffset(),
                        indexRecord.getPartLength(), codec, true);
                segmentList.add(i, s);

                if (LOG.isDebugEnabled()) {
                    LOG.debug("MapId=" + mapId + " Reducer=" + parts + "Spill =" + i + "("
                            + indexRecord.getStartOffset() + "," + indexRecord.getRawLength() + ", "
                            + indexRecord.getPartLength() + ")");
                }
            }

            int mergeFactor = job.getInt(TezJobConfig.TEZ_ENGINE_IO_SORT_FACTOR,
                    TezJobConfig.DEFAULT_TEZ_ENGINE_IO_SORT_FACTOR);
            // sort the segments only if there are intermediate merges
            boolean sortSegments = segmentList.size() > mergeFactor;
            //merge
            TezRawKeyValueIterator kvIter = TezMerger.merge(job, rfs, keyClass, valClass, codec, segmentList,
                    mergeFactor, new Path(mapId.toString()),
                    (RawComparator) ConfigUtils.getIntermediateOutputKeyComparator(job),
                    runningTaskContext.getTaskReporter(), sortSegments, null, spilledRecordsCounter,
                    sortPhase.phase());

            //write merged output to disk
            long segmentStart = finalOut.getPos();
            Writer writer = new Writer(job, finalOut, keyClass, valClass, codec, spilledRecordsCounter);
            if (combineProcessor == null || numSpills < minSpillsForCombine) {
                TezMerger.writeFile(kvIter, writer, runningTaskContext.getTaskReporter(), job);
            } else {
                runCombineProcessor(kvIter, writer);
            }
            writer.close();

            sortPhase.startNextPhase();
            // record offsets
            final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(),
                    writer.getCompressedLength());
            spillRec.putIndex(rec, parts);
        }
        spillRec.writeToFile(finalIndexFile, job);
        finalOut.close();
        for (int i = 0; i < numSpills; i++) {
            rfs.delete(filename[i], true);
        }
    }
}

From source file:org.apache.tez.engine.common.sort.impl.PipelinedSorter.java

License:Apache License

public void spill() throws IOException {
    // create spill file
    final long size = largeBuffer.capacity() + (partitions * APPROX_HEADER_LENGTH);
    final TezSpillRecord spillRec = new TezSpillRecord(partitions);
    final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
    FSDataOutputStream out = rfs.create(filename, true, 4096);

    try {/*  www. j  a  v a 2  s.co  m*/
        merger.ready(); // wait for all the future results from sort threads
        LOG.info("Spilling to " + filename.toString());
        for (int i = 0; i < partitions; ++i) {
            TezRawKeyValueIterator kvIter = merger.filter(i);
            //write merged output to disk
            long segmentStart = out.getPos();
            Writer writer = new Writer(job, out, keyClass, valClass, codec, spilledRecordsCounter);
            writer.setRLE(merger.needsRLE());
            if (combineProcessor == null) {
                while (kvIter.next()) {
                    writer.append(kvIter.getKey(), kvIter.getValue());
                }
            } else {
                runCombineProcessor(kvIter, writer);
            }
            //close
            writer.close();

            // record offsets
            final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(),
                    writer.getCompressedLength());
            spillRec.putIndex(rec, i);
        }

        Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills,
                partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
        // TODO: cache
        spillRec.writeToFile(indexFilename, job);
        ++numSpills;
    } catch (InterruptedException ie) {
        // TODO:the combiner has been interrupted
    } finally {
        out.close();
    }
}

From source file:org.apache.tez.engine.common.sort.impl.PipelinedSorter.java

License:Apache License

@Override
public void flush() throws IOException, InterruptedException {
    final TezTaskAttemptID mapId = task.getTaskAttemptId();
    Path finalOutputFile = mapOutputFile.getOutputFileForWrite(0); //TODO
    Path finalIndexFile = mapOutputFile.getOutputIndexFileForWrite(0); //TODO

    LOG.info("Starting flush of map output");
    span.end();// ww  w . j  ava 2 s . co  m
    merger.add(span.sort(sorter, comparator));
    spill();
    sortmaster.shutdown();

    largeBuffer = null;

    if (numSpills == 1) {
        // someday be able to pass this directly to shuffle
        // without writing to disk
        final Path filename = mapOutputFile.getSpillFile(0);
        Path indexFilename = mapOutputFile.getSpillIndexFile(0);
        sameVolRename(filename, finalOutputFile);
        sameVolRename(indexFilename, finalIndexFile);
        return;
    }

    //The output stream for the final single output file
    FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);

    sortPhase.addPhases(partitions); // Divide sort phase into sub-phases
    TezMerger.considerFinalMergeForProgress();

    final TezSpillRecord spillRec = new TezSpillRecord(partitions);
    final ArrayList<TezSpillRecord> indexCacheList = new ArrayList<TezSpillRecord>();

    for (int i = 0; i < numSpills; i++) {
        // TODO: build this cache before
        Path indexFilename = mapOutputFile.getSpillIndexFile(i);
        TezSpillRecord spillIndex = new TezSpillRecord(indexFilename, job);
        indexCacheList.add(spillIndex);
    }

    for (int parts = 0; parts < partitions; parts++) {
        //create the segments to be merged
        List<Segment> segmentList = new ArrayList<Segment>(numSpills);
        for (int i = 0; i < numSpills; i++) {
            Path spillFilename = mapOutputFile.getSpillFile(i);
            TezIndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);

            Segment s = new Segment(job, rfs, spillFilename, indexRecord.getStartOffset(),
                    indexRecord.getPartLength(), codec, true);
            segmentList.add(i, s);
        }

        int mergeFactor = job.getInt(TezJobConfig.TEZ_ENGINE_IO_SORT_FACTOR,
                TezJobConfig.DEFAULT_TEZ_ENGINE_IO_SORT_FACTOR);
        // sort the segments only if there are intermediate merges
        boolean sortSegments = segmentList.size() > mergeFactor;
        //merge
        @SuppressWarnings("unchecked")
        TezRawKeyValueIterator kvIter = TezMerger.merge(job, rfs, keyClass, valClass, codec, segmentList,
                mergeFactor, new Path(mapId.toString()),
                (RawComparator) ConfigUtils.getIntermediateOutputKeyComparator(job),
                runningTaskContext.getTaskReporter(), sortSegments, null, spilledRecordsCounter,
                sortPhase.phase());

        //write merged output to disk
        long segmentStart = finalOut.getPos();
        Writer writer = new Writer(job, finalOut, keyClass, valClass, codec, spilledRecordsCounter);
        writer.setRLE(merger.needsRLE());
        if (combineProcessor == null || numSpills < minSpillsForCombine) {
            TezMerger.writeFile(kvIter, writer, runningTaskContext.getTaskReporter(), job);
        } else {
            runCombineProcessor(kvIter, writer);
        }

        //close
        writer.close();

        sortPhase.startNextPhase();

        // record offsets
        final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(),
                writer.getCompressedLength());
        spillRec.putIndex(rec, parts);
    }

    spillRec.writeToFile(finalIndexFile, job);
    finalOut.close();
    for (int i = 0; i < numSpills; i++) {
        Path indexFilename = mapOutputFile.getSpillIndexFile(i);
        Path spillFilename = mapOutputFile.getSpillFile(i);
        rfs.delete(indexFilename, true);
        rfs.delete(spillFilename, true);
    }
}

From source file:org.apache.tez.mapreduce.processor.MapUtils.java

License:Apache License

private static void writeSplitFiles(FileSystem fs, JobConf conf, InputSplit split) throws IOException {
    Path jobSplitFile = new Path(conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR,
            MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR_DEFAULT), MRJobConfig.JOB_SPLIT);
    LOG.info("Writing split to: " + jobSplitFile);
    FSDataOutputStream out = FileSystem.create(fs, jobSplitFile, new FsPermission(JOB_FILE_PERMISSION));

    long offset = out.getPos();
    Text.writeString(out, split.getClass().getName());
    split.write(out);// w  w w .jav a 2 s.c  o m
    out.close();

    String[] locations = split.getLocations();

    SplitMetaInfo info = null;
    info = new JobSplit.SplitMetaInfo(locations, offset, split.getLength());

    Path jobSplitMetaInfoFile = new Path(conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR),
            MRJobConfig.JOB_SPLIT_METAINFO);

    FSDataOutputStream outMeta = FileSystem.create(fs, jobSplitMetaInfoFile,
            new FsPermission(JOB_FILE_PERMISSION));
    outMeta.write(SplitMetaInfoReaderTez.META_SPLIT_FILE_HEADER);
    WritableUtils.writeVInt(outMeta, SplitMetaInfoReaderTez.META_SPLIT_VERSION);
    WritableUtils.writeVInt(outMeta, 1); // Only 1 split meta info being written
    info.write(outMeta);
    outMeta.close();
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.TestMergeManager.java

License:Apache License

private SrcFileInfo createFile(Configuration conf, FileSystem fs, Path path, int numPartitions,
        int numKeysPerPartition, int startKey) throws IOException {
    FSDataOutputStream outStream = fs.create(path);
    int currentKey = startKey;
    SrcFileInfo srcFileInfo = new SrcFileInfo();
    srcFileInfo.indexedRecords = new TezIndexRecord[numPartitions];
    srcFileInfo.path = path;//from   ww  w . j  av  a  2 s  . com
    for (int i = 0; i < numPartitions; i++) {
        long pos = outStream.getPos();
        IFile.Writer writer = new IFile.Writer(conf, outStream, IntWritable.class, IntWritable.class, null,
                null, null);
        for (int j = 0; j < numKeysPerPartition; j++) {
            writer.append(new IntWritable(currentKey), new IntWritable(currentKey));
            currentKey++;
        }
        writer.close();
        srcFileInfo.indexedRecords[i] = new TezIndexRecord(pos, writer.getRawLength(),
                writer.getCompressedLength());
    }
    outStream.close();
    return srcFileInfo;
}

From source file:org.apache.tez.runtime.library.common.sort.impl.dflt.DefaultSorter.java

License:Apache License

protected void spill(int mstart, int mend) throws IOException, InterruptedException {

    //approximate the length of the output file to be the length of the
    //buffer + header lengths for the partitions
    final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart)
            + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;
    try {/*from w w  w  . j a  va  2  s. com*/
        // create spill file
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
        spillFilePaths.put(numSpills, filename);
        out = rfs.create(filename);

        int spindex = mstart;
        final InMemValBytes value = createInMemValBytes();
        boolean rle = isRLENeeded();
        for (int i = 0; i < partitions; ++i) {
            IFile.Writer writer = null;
            try {
                long segmentStart = out.getPos();
                writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, rle);
                if (combiner == null) {
                    // spill directly
                    DataInputBuffer key = new DataInputBuffer();
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        final int kvoff = offsetFor(spindex);
                        int keystart = kvmeta.get(kvoff + KEYSTART);
                        int valstart = kvmeta.get(kvoff + VALSTART);
                        key.reset(kvbuffer, keystart, valstart - keystart);
                        getVBytesForOffset(kvoff, value);
                        writer.append(key, value);
                        ++spindex;
                    }
                } else {
                    int spstart = spindex;
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        ++spindex;
                    }
                    // Note: we would like to avoid the combiner if we've fewer
                    // than some threshold of records for a partition
                    if (spstart != spindex) {
                        TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Running combine processor");
                        }
                        runCombineProcessor(kvIter, writer);
                    }
                }

                // close the writer
                writer.close();
                if (numSpills > 0) {
                    additionalSpillBytesWritten.increment(writer.getCompressedLength());
                    numAdditionalSpills.increment(1);
                    // Reset the value will be set during the final merge.
                    outputBytesWithOverheadCounter.setValue(0);
                } else {
                    // Set this up for the first write only. Subsequent ones will be handled in the final merge.
                    outputBytesWithOverheadCounter.increment(writer.getRawLength());
                }
                // record offsets
                final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(),
                        writer.getCompressedLength());
                spillRec.putIndex(rec, i);

                writer = null;
            } finally {
                if (null != writer)
                    writer.close();
            }
        }

        if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
            // create spill index file
            Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills,
                    partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            spillFileIndexPaths.put(numSpills, indexFilename);
            spillRec.writeToFile(indexFilename, conf);
        } else {
            indexCacheList.add(spillRec);
            totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        LOG.info("Finished spill " + numSpills);
        ++numSpills;
    } finally {
        if (out != null)
            out.close();
    }
}