Example usage for org.apache.hadoop.mapreduce OutputFormat getRecordWriter

List of usage examples for org.apache.hadoop.mapreduce OutputFormat getRecordWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce OutputFormat getRecordWriter.

Prototype

public abstract RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException;

Source Link

Document

Get the RecordWriter for the given task.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(String namedOutput) throws IOException, InterruptedException {
    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(namedOutput);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        TaskAttemptContext taskContext = getContext(namedOutput);

        Class<? extends OutputFormat<?, ?>> outputFormatClass;
        try {//from  w w w  .  ja  v a  2  s .c  om
            outputFormatClass = taskContext.getOutputFormatClass();
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        ClassLoader outputFormatClassLoader = outputFormatClass.getClassLoader();
        // This is needed in case the OutputFormat's classloader conflicts with the program classloader (for example,
        // TableOutputFormat).
        ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(outputFormatClassLoader);

        try {
            // We use ReflectionUtils to instantiate the OutputFormat, because it also calls setConf on the object, if it
            // is a org.apache.hadoop.conf.Configurable.
            OutputFormat<?, ?> outputFormat = ReflectionUtils.newInstance(outputFormatClass,
                    taskContext.getConfiguration());
            writer = new MeteredRecordWriter<>(outputFormat.getRecordWriter(taskContext), context);
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }

        // add the record-writer to the cache
        recordWriters.put(namedOutput, writer);
    }
    return writer;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputsMainOutputWrapper.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    OutputFormat<K, V> rootOutputFormat = getRootOutputFormat(job);
    return rootOutputFormat.getRecordWriter(job);
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void runMap(Job job, KeyValueSorter<?, ?> sorter)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    int serial = 1;
    for (InputSplit split : splits) {
        TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0);
        Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$
                    mapper.getClass().getName(), id, split.getLength()));
        }//from   ww  w. j a va2  s.  c  om
        TaskAttemptContext context = newTaskAttemptContext(conf, id);
        // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached
        OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
        OutputCommitter committer = output.getOutputCommitter(context);
        committer.setupTask(context);
        boolean succeed = false;
        try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) {
            RecordWriter<?, ?> writer;
            if (sorter != null) {
                writer = new ShuffleWriter(sorter);
            } else {
                writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            }
            try {
                Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split);
                reader.initialize(split, c);
                mapper.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
            doCommitTask(context, committer);
            succeed = true;
        } finally {
            if (succeed == false) {
                doAbortTask(context, committer);
            }
        }
    }
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
private void runReduce(Job job, KeyValueSorter<?, ?> sorter)
        throws ClassNotFoundException, IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0);
    Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$
                reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes()));
    }//from   w  w  w  . j  av  a 2 s  . c o m
    TaskAttemptContext context = newTaskAttemptContext(conf, id);
    OutputCommitter committer = output.getOutputCommitter(context);
    committer.setupTask(context);
    boolean succeed = false;
    try {
        ShuffleReader reader = new ShuffleReader(sorter, new Progress());
        try {
            RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            try {
                Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(),
                        sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator());
                reducer.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id,
                        job.getJobName()), e);
            }
        }
        doCommitTask(context, committer);
        succeed = true;
    } finally {
        if (succeed == false) {
            doAbortTask(context, committer);
        }
    }
}

From source file:com.asakusafw.runtime.stage.output.StageOutputDriver.java

License:Apache License

private ResultOutput<?> buildNormalSink(String name,
        @SuppressWarnings("rawtypes") Class<? extends OutputFormat> formatClass, Class<?> keyClass,
        Class<?> valueClass, List<Counter> counters) throws IOException, InterruptedException {
    assert context != null;
    assert name != null;
    assert formatClass != null;
    assert keyClass != null;
    assert valueClass != null;
    assert counters != null;
    Job job = JobCompatibility.newJob(context.getConfiguration());
    job.setOutputFormatClass(formatClass);
    job.setOutputKeyClass(keyClass);/*w w  w .j  a v a  2 s  .  c  o  m*/
    job.setOutputValueClass(valueClass);
    TaskAttemptContext localContext = JobCompatibility.newTaskAttemptContext(job.getConfiguration(),
            context.getTaskAttemptID());
    if (FileOutputFormat.class.isAssignableFrom(formatClass)) {
        setOutputFilePrefix(localContext, name);
    }
    OutputFormat<?, ?> format = ReflectionUtils.newInstance(formatClass, localContext.getConfiguration());
    RecordWriter<?, ?> writer = format.getRecordWriter(localContext);
    return new ResultOutput<Writable>(localContext, writer);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

public synchronized RecordWriter getRecordWriter(String baseFileName) throws IOException, InterruptedException {

    // Look for record-writer in the cache
    OutputContext context = outputContexts.get(baseFileName);

    // If not in cache, create a new one
    if (context == null) {

        context = new OutputContext();

        OutputFormat mainOutputFormat;/*from ww w  .ja  va 2 s.co m*/

        try {
            mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(this.context.getOutputFormatClass(),
                    this.context.getConfiguration()));
        } catch (ClassNotFoundException e1) {
            throw new RuntimeException(e1);
        }

        ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
                .getOutputCommitter(this.context));

        // The trick is to create a new Job for each output
        Job job = new Job(this.context.getConfiguration());
        job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
        job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
        // Check possible specific context for the output
        setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
        TaskAttemptContext taskContext;
        try {
            taskContext = TaskAttemptContextFactory.get(job.getConfiguration(),
                    this.context.getTaskAttemptID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        // First we change the output dir for the new OutputFormat that we will
        // create
        // We put it inside the main output work path -> in case the Job fails,
        // everything will be discarded
        taskContext.getConfiguration().set("mapred.output.dir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        // This is for Hadoop 2.0 :
        taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        context.taskAttemptContext = taskContext;

        // Load the OutputFormat instance
        OutputFormat outputFormat = InstancesDistributor.loadInstance(
                context.taskAttemptContext.getConfiguration(), OutputFormat.class,
                getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
        // We have to create a JobContext for meeting the contract of the
        // OutputFormat
        JobContext jobContext;
        try {
            jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        context.jobContext = jobContext;
        // The contract of the OutputFormat is to check the output specs
        outputFormat.checkOutputSpecs(jobContext);
        // We get the output committer so we can call it later
        context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
        // Save the RecordWriter to cache it
        context.recordWriter = outputFormat.getRecordWriter(taskContext);

        // if counters are enabled, wrap the writer with context
        // to increment counters
        if (countersEnabled) {
            context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName,
                    this.context);
        }

        outputContexts.put(baseFileName, context);
    }
    return context.recordWriter;
}

From source file:com.marklogic.contentpump.LocalJobRunner.java

License:Apache License

/**
 * Run the job.  Get the input splits, create map tasks and submit it to
 * the thread pool if there is one; otherwise, runs the the task one by
 * one./*from  w  w w. j a v a2  s.  c o m*/
 * 
 * @param <INKEY>
 * @param <INVALUE>
 * @param <OUTKEY>
 * @param <OUTVALUE>
 * @throws Exception
 */
@SuppressWarnings("unchecked")
public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run()
        throws Exception {
    Configuration conf = job.getConfiguration();
    InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils
            .newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = inputFormat.getSplits(job);
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // goes first
    Arrays.sort(array, new SplitLengthComparator());
    OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(job.getOutputFormatClass(), conf);
    Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass();
    Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(mapperClass, conf);
    try {
        outputFormat.checkOutputSpecs(job);
    } catch (Exception ex) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error checking output specification: ", ex);
        } else {
            LOG.error("Error checking output specification: ");
            LOG.error(ex.getMessage());
        }
        return;
    }
    conf = job.getConfiguration();
    progress = new AtomicInteger[splits.size()];
    for (int i = 0; i < splits.size(); i++) {
        progress[i] = new AtomicInteger();
    }
    Monitor monitor = new Monitor();
    monitor.start();
    reporter = new ContentPumpReporter();
    List<Future<Object>> taskList = new ArrayList<Future<Object>>();
    for (int i = 0; i < array.length; i++) {
        InputSplit split = array[i];
        if (pool != null) {
            LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>(
                    inputFormat, outputFormat, conf, i, split, reporter, progress[i]);
            availableThreads = assignThreads(i, array.length);
            Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass();
            if (availableThreads > 1 && availableThreads != threadsPerSplit) {
                // possible runtime adjustment
                if (runtimeMapperClass != (Class) MultithreadedMapper.class) {
                    runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd
                            .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads);
                }
                if (runtimeMapperClass != mapperClass) {
                    task.setMapperClass(runtimeMapperClass);
                }
                if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                    task.setThreadCount(availableThreads);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Thread Count for Split#" + i + " : " + availableThreads);
                    }
                }
            }

            if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                synchronized (pool) {
                    taskList.add(pool.submit(task));
                    pool.wait();
                }
            } else {
                pool.submit(task);
            }
        } else { // single-threaded
            JobID jid = new JobID();
            TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i);
            TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0);
            TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId);
            RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context);
            RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context);
            OutputCommitter committer = outputFormat.getOutputCommitter(context);
            TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]);

            Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId,
                    trackingReader, writer, committer, reporter, split);

            trackingReader.initialize(split, mapperContext);

            // no thread pool (only 1 thread specified)
            Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass();
            mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class);
            mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass,
                    mapperContext.getConfiguration());
            mapper.run(mapperContext);
            trackingReader.close();
            writer.close(mapperContext);
            committer.commitTask(context);
        }
    }
    // wait till all tasks are done
    if (pool != null) {
        for (Future<Object> f : taskList) {
            f.get();
        }
        pool.shutdown();
        while (!pool.awaitTermination(1, TimeUnit.DAYS))
            ;
        jobComplete.set(true);
    }
    monitor.interrupt();
    monitor.join(1000);

    // report counters
    Iterator<CounterGroup> groupIt = reporter.counters.iterator();
    while (groupIt.hasNext()) {
        CounterGroup group = groupIt.next();
        LOG.info(group.getDisplayName() + ": ");
        Iterator<Counter> counterIt = group.iterator();
        while (counterIt.hasNext()) {
            Counter counter = counterIt.next();
            LOG.info(counter.getDisplayName() + ": " + counter.getValue());
        }
    }
    LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec");
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapreduce.java

License:Apache License

/**
 * Runs mapper for the single split.//from  w  w  w  .j  av  a 2 s.  c  o m
 *
 * @param mapOutputAccumulator mapOutputAccumulator to use
 * @param split    split ot run on
 */

@Override
@SuppressWarnings("unchecked")
public void runSplit(MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split, int splitIndex)
        throws IOException, ClassNotFoundException, InterruptedException {

    TaskAttemptID taskAttemptId = hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex);
    //Setup task ID info
    TaskAttemptContext taskContext = hadoopVersionSpecificCode.createTaskAttemptContext(configuration,
            taskAttemptId);

    InputFormat inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), configuration);

    //Create RecordReader
    org.apache.hadoop.mapreduce.RecordReader<INKEY, INVALUE> input = inputFormat
            .createRecordReader((InputSplit) split, taskContext);

    //Make a mapper
    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper;
    try {
        mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor
                .newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    org.apache.hadoop.mapreduce.RecordWriter output;
    OutputCommitter committer = null;
    if (mapOnlyJob) {
        OutputFormat outputFormat = ReflectionUtils.newInstance(jobContext.getOutputFormatClass(),
                configuration);
        output = (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE>) outputFormat
                .getRecordWriter(taskContext);
        committer = outputFormat.getOutputCommitter(taskContext);
        committer.setupTask(taskContext);
    } else {
        output = new MapOutputCollector<OUTKEY, OUTVALUE>(mapOutputAccumulator);
    }

    input.initialize((InputSplit) split, taskContext);

    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context mapperContext = hadoopVersionSpecificCode
            .getMapperContext(configuration, taskAttemptId, input, output);
    mapper.run(mapperContext);

    input.close();

    output.close(mapperContext);

    if (mapOnlyJob && committer != null) {
        committer.commitTask(taskContext);
    }
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapreduce.java

License:Apache License

public ReducerWrapperMapreduce(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId,
        int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException {
    this.invocationParameters = invocationParameters;
    Configuration configuration = (Configuration) invocationParameters.getConfiguration();
    hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(),
            configuration);/*  w w w .  ja va2s  . c om*/
    JobID jobID = (JobID) invocationParameters.getJobId();

    //Setup task ID info
    TaskAttemptID id = hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition);
    JobContext jobContext = hadoopVersionSpecificCode.createJobContext(new JobConf(configuration), jobID);
    taskContext = hadoopVersionSpecificCode.createTaskAttemptContext(configuration, id);

    reducer = (org.apache.hadoop.mapreduce.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(jobContext.getReducerClass(), configuration);

    OutputFormat outputFormat = ReflectionUtils.newInstance(jobContext.getOutputFormatClass(), configuration);

    recordWriter = (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE>) outputFormat
            .getRecordWriter(taskContext);

    committer = outputFormat.getOutputCommitter(taskContext);
    committer.setupTask(taskContext);

    Class<INKEY> keyClass = (Class<INKEY>) jobContext.getMapOutputKeyClass();
    WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    Class<INVALUE> valueClass = (Class<INVALUE>) jobContext.getMapOutputValueClass();
    WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>(
            valueClass, null);

    DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region,
            appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, configuration) > 0,
            firstKeySerializer, valueSerializer, invocationParameters.getSerializationMode(),
            secondKeySerializer, keyClass, valueClass, sort,
            HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, configuration),
            1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, configuration),
            HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, configuration));
    DataGridChunkedCollectionReader<INKEY, INVALUE> transport = DataGridChunkedCollectionReader
            .getGridReader(params);

    context = hadoopVersionSpecificCode.getReducerContext(configuration, id, committer, recordWriter, transport,
            null);

}

From source file:com.yahoo.glimmer.indexing.generator.IndexRecordWriterTest.java

License:Open Source License

@Test
public void test() throws Exception {
    context.checking(new Expectations() {
        {//from  w  w  w  .j a  va 2  s.co  m
            allowing(taskContext).getConfiguration();
            will(returnValue(conf));
            allowing(taskContext).getTaskAttemptID();
            will(returnValue(taskAttemptID));
        }
    });
    OutputFormat outputFormat = new IndexRecordWriter.OutputFormat();

    conf.setStrings("RdfFieldNames", "index0", "index1");
    conf.setEnum("IndexType", RDFDocumentFactory.IndexType.VERTICAL);

    RecordWriter<IntWritable, IndexRecordWriterValue> recordWriter = outputFormat.getRecordWriter(taskContext);

    IntWritable key = new IntWritable();
    IndexRecordWriterTermValue termValue = new IndexRecordWriterTermValue();
    IndexRecordWriterDocValue docValue = new IndexRecordWriterDocValue();
    IndexRecordWriterSizeValue sizeValue = new IndexRecordWriterSizeValue();

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term1");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(0); // term1 occurs in index 0
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(3);
    termValue.setOccurrenceCount(6);
    termValue.setSumOfMaxTermPositions(15 + 12 + 18);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(11);
    docValue.addOccurrence(15);
    recordWriter.write(key, docValue);
    docValue.setDocument(4);
    docValue.clearOccerrences();
    docValue.addOccurrence(12);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    docValue.addOccurrence(17);
    docValue.addOccurrence(18);
    recordWriter.write(key, docValue);

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term2");
    termValue.setTermFrequency(2);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.clearOccerrences();
    docValue.setDocument(0); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);
    docValue.setDocument(1); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(2);
    termValue.setOccurrenceCount(4);
    termValue.setSumOfMaxTermPositions(19 + 16);
    recordWriter.write(key, termValue);

    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(19);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(13);
    docValue.addOccurrence(16);
    recordWriter.write(key, docValue);

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(1);
    termValue.setSumOfMaxTermPositions(14);
    recordWriter.write(key, termValue);
    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    recordWriter.write(key, docValue);

    // ALIGNMENT_INDEX 
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term3");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(1); // term3 occurs in index 1
    recordWriter.write(key, docValue);
    docValue.clearOccerrences();

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(2);
    termValue.setSumOfMaxTermPositions(11);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(11);
    recordWriter.write(key, docValue);

    // Doc Sizes.
    key.set(0);
    sizeValue.setDocument(0);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(3);
    sizeValue.setSize(1);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(4);
    sizeValue.setSize(10);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(2);
    recordWriter.write(key, sizeValue);

    key.set(1);
    sizeValue.setDocument(3);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(5);
    recordWriter.write(key, sizeValue);

    recordWriter.close(taskContext);

    // Check the written indexes..

    Path workPath = outputFormat.getDefaultWorkFile(taskContext, "");
    System.out.println("Default work file is " + workPath.toString());
    String dir = workPath.toUri().getPath();
    BitStreamIndex index0 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index0", true, true);
    assertEquals(8, index0.numberOfDocuments);
    assertEquals(2, index0.numberOfTerms);
    assertTrue(index0.hasPositions);
    // term1
    checkOccurrences(index0.documents(0), 3, "(3:11,15) (4:12) (7:14,17,18)");
    // term2
    checkOccurrences(index0.documents(1), 2, "(1:10,19) (7:13,16)");
    assertEquals("[3, 0, 0, 1, 10, 0, 2, 0]", index0.sizes.toString());

    BitStreamIndex index1 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index1", true, true);
    assertEquals(8, index1.numberOfDocuments);
    assertEquals(2, index1.numberOfTerms);
    assertTrue(index0.hasPositions);
    checkOccurrences(index1.documents(0), 1, "(1:14)");
    // term3
    checkOccurrences(index1.documents(1), 1, "(3:10,11)");

    BitStreamIndex indexAlignment = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/alignment", true);
    assertEquals(8, indexAlignment.numberOfDocuments);
    assertEquals(3, indexAlignment.numberOfTerms);
    assertFalse(indexAlignment.hasPositions);
    // term1
    assertEquals(1, indexAlignment.documents(0).frequency());
    // term2
    assertEquals(2, indexAlignment.documents(1).frequency());
    // term3
    assertEquals(1, indexAlignment.documents(2).frequency());
    assertEquals("[0, 0, 0, 3, 0, 0, 5, 0]", index1.sizes.toString());
}