Example usage for org.apache.hadoop.mapreduce OutputFormat getRecordWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce OutputFormat getRecordWriter.

Prototype

public abstract RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException;

Source Link

Document

Get the RecordWriter for the given task.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(String namedOutput) throws IOException, InterruptedException {
    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(namedOutput);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        TaskAttemptContext taskContext = getContext(namedOutput);

        Class<? extends OutputFormat<?, ?>> outputFormatClass;
        try {//from  w w w  .  ja  v a  2  s .c  om
            outputFormatClass = taskContext.getOutputFormatClass();
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        ClassLoader outputFormatClassLoader = outputFormatClass.getClassLoader();
        // This is needed in case the OutputFormat's classloader conflicts with the program classloader (for example,
        // TableOutputFormat).
        ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(outputFormatClassLoader);

        try {
            // We use ReflectionUtils to instantiate the OutputFormat, because it also calls setConf on the object, if it
            // is a org.apache.hadoop.conf.Configurable.
            OutputFormat<?, ?> outputFormat = ReflectionUtils.newInstance(outputFormatClass,
                    taskContext.getConfiguration());
            writer = new MeteredRecordWriter<>(outputFormat.getRecordWriter(taskContext), context);
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }

        // add the record-writer to the cache
        recordWriters.put(namedOutput, writer);
    }
    return writer;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputsMainOutputWrapper.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    OutputFormat<K, V> rootOutputFormat = getRootOutputFormat(job);
    return rootOutputFormat.getRecordWriter(job);
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void runMap(Job job, KeyValueSorter<?, ?> sorter)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    int serial = 1;
    for (InputSplit split : splits) {
        TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0);
        Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$
                    mapper.getClass().getName(), id, split.getLength()));
        }//from   ww  w. j a va2  s.  c  om
        TaskAttemptContext context = newTaskAttemptContext(conf, id);
        // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached
        OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
        OutputCommitter committer = output.getOutputCommitter(context);
        committer.setupTask(context);
        boolean succeed = false;
        try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) {
            RecordWriter<?, ?> writer;
            if (sorter != null) {
                writer = new ShuffleWriter(sorter);
            } else {
                writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            }
            try {
                Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split);
                reader.initialize(split, c);
                mapper.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
            doCommitTask(context, committer);
            succeed = true;
        } finally {
            if (succeed == false) {
                doAbortTask(context, committer);
            }
        }
    }
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
private void runReduce(Job job, KeyValueSorter<?, ?> sorter)
        throws ClassNotFoundException, IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0);
    Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$
                reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes()));
    }//from   w  w  w  . j  av  a 2 s  . c o m
    TaskAttemptContext context = newTaskAttemptContext(conf, id);
    OutputCommitter committer = output.getOutputCommitter(context);
    committer.setupTask(context);
    boolean succeed = false;
    try {
        ShuffleReader reader = new ShuffleReader(sorter, new Progress());
        try {
            RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            try {
                Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(),
                        sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator());
                reducer.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id,
                        job.getJobName()), e);
            }
        }
        doCommitTask(context, committer);
        succeed = true;
    } finally {
        if (succeed == false) {
            doAbortTask(context, committer);
        }
    }
}

From source file:com.asakusafw.runtime.stage.output.StageOutputDriver.java

License:Apache License

private ResultOutput<?> buildNormalSink(String name,
        @SuppressWarnings("rawtypes") Class<? extends OutputFormat> formatClass, Class<?> keyClass,
        Class<?> valueClass, List<Counter> counters) throws IOException, InterruptedException {
    assert context != null;
    assert name != null;
    assert formatClass != null;
    assert keyClass != null;
    assert valueClass != null;
    assert counters != null;
    Job job = JobCompatibility.newJob(context.getConfiguration());
    job.setOutputFormatClass(formatClass);
    job.setOutputKeyClass(keyClass);/*w w  w .j  a v a  2 s  .  c  o  m*/
    job.setOutputValueClass(valueClass);
    TaskAttemptContext localContext = JobCompatibility.newTaskAttemptContext(job.getConfiguration(),
            context.getTaskAttemptID());
    if (FileOutputFormat.class.isAssignableFrom(formatClass)) {
        setOutputFilePrefix(localContext, name);
    }
    OutputFormat<?, ?> format = ReflectionUtils.newInstance(formatClass, localContext.getConfiguration());
    RecordWriter<?, ?> writer = format.getRecordWriter(localContext);
    return new ResultOutput<Writable>(localContext, writer);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

public synchronized RecordWriter getRecordWriter(String baseFileName) throws IOException, InterruptedException {

    // Look for record-writer in the cache
    OutputContext context = outputContexts.get(baseFileName);

    // If not in cache, create a new one
    if (context == null) {

        context = new OutputContext();

        OutputFormat mainOutputFormat;/*from ww w  .ja  va 2 s.co m*/

        try {
            mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(this.context.getOutputFormatClass(),
                    this.context.getConfiguration()));
        } catch (ClassNotFoundException e1) {
            throw new RuntimeException(e1);
        }

        ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
                .getOutputCommitter(this.context));

        // The trick is to create a new Job for each output
        Job job = new Job(this.context.getConfiguration());
        job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
        job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
        // Check possible specific context for the output
        setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
        TaskAttemptContext taskContext;
        try {
            taskContext = TaskAttemptContextFactory.get(job.getConfiguration(),
                    this.context.getTaskAttemptID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        // First we change the output dir for the new OutputFormat that we will
        // create
        // We put it inside the main output work path -> in case the Job fails,
        // everything will be discarded
        taskContext.getConfiguration().set("mapred.output.dir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        // This is for Hadoop 2.0 :
        taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        context.taskAttemptContext = taskContext;

        // Load the OutputFormat instance
        OutputFormat outputFormat = InstancesDistributor.loadInstance(
                context.taskAttemptContext.getConfiguration(), OutputFormat.class,
                getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
        // We have to create a JobContext for meeting the contract of the
        // OutputFormat
        JobContext jobContext;
        try {
            jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        context.jobContext = jobContext;
        // The contract of the OutputFormat is to check the output specs
        outputFormat.checkOutputSpecs(jobContext);
        // We get the output committer so we can call it later
        context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
        // Save the RecordWriter to cache it
        context.recordWriter = outputFormat.getRecordWriter(taskContext);

        // if counters are enabled, wrap the writer with context
        // to increment counters
        if (countersEnabled) {
            context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName,
                    this.context);
        }

        outputContexts.put(baseFileName, context);
    }
    return context.recordWriter;
}

From source file:com.marklogic.contentpump.LocalJobRunner.java

License:Apache License

/**
 * Run the job.  Get the input splits, create map tasks and submit it to
 * the thread pool if there is one; otherwise, runs the the task one by
 * one./*from  w  w w. j a v a2  s.  c o m*/
 * 
 * @param <INKEY>
 * @param <INVALUE>
 * @param <OUTKEY>
 * @param <OUTVALUE>
 * @throws Exception
 */
@SuppressWarnings("unchecked")
public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run()
        throws Exception {
    Configuration conf = job.getConfiguration();
    InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils
            .newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = inputFormat.getSplits(job);
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // goes first
    Arrays.sort(array, new SplitLengthComparator());
    OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(job.getOutputFormatClass(), conf);
    Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass();
    Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(mapperClass, conf);
    try {
        outputFormat.checkOutputSpecs(job);
    } catch (Exception ex) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error checking output specification: ", ex);
        } else {
            LOG.error("Error checking output specification: ");
            LOG.error(ex.getMessage());
        }
        return;
    }
    conf = job.getConfiguration();
    progress = new AtomicInteger[splits.size()];
    for (int i = 0; i < splits.size(); i++) {
        progress[i] = new AtomicInteger();
    }
    Monitor monitor = new Monitor();
    monitor.start();
    reporter = new ContentPumpReporter();
    List<Future<Object>> taskList = new ArrayList<Future<Object>>();
    for (int i = 0; i < array.length; i++) {
        InputSplit split = array[i];
        if (pool != null) {
            LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>(
                    inputFormat, outputFormat, conf, i, split, reporter, progress[i]);
            availableThreads = assignThreads(i, array.length);
            Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass();
            if (availableThreads > 1 && availableThreads != threadsPerSplit) {
                // possible runtime adjustment
                if (runtimeMapperClass != (Class) MultithreadedMapper.class) {
                    runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd
                            .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads);
                }
                if (runtimeMapperClass != mapperClass) {
                    task.setMapperClass(runtimeMapperClass);
                }
                if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                    task.setThreadCount(availableThreads);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Thread Count for Split#" + i + " : " + availableThreads);
                    }
                }
            }

            if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                synchronized (pool) {
                    taskList.add(pool.submit(task));
                    pool.wait();
                }
            } else {
                pool.submit(task);
            }
        } else { // single-threaded
            JobID jid = new JobID();
            TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i);
            TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0);
            TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId);
            RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context);
            RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context);
            OutputCommitter committer = outputFormat.getOutputCommitter(context);
            TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]);

            Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId,
                    trackingReader, writer, committer, reporter, split);

            trackingReader.initialize(split, mapperContext);

            // no thread pool (only 1 thread specified)
            Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass();
            mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class);
            mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass,
                    mapperContext.getConfiguration());
            mapper.run(mapperContext);
            trackingReader.close();
            writer.close(mapperContext);
            committer.commitTask(context);
        }
    }
    // wait till all tasks are done
    if (pool != null) {
        for (Future<Object> f : taskList) {
            f.get();
        }
        pool.shutdown();
        while (!pool.awaitTermination(1, TimeUnit.DAYS))
            ;
        jobComplete.set(true);
    }
    monitor.interrupt();
    monitor.join(1000);

    // report counters
    Iterator<CounterGroup> groupIt = reporter.counters.iterator();
    while (groupIt.hasNext()) {
        CounterGroup group = groupIt.next();
        LOG.info(group.getDisplayName() + ": ");
        Iterator<Counter> counterIt = group.iterator();
        while (counterIt.hasNext()) {
            Counter counter = counterIt.next();
            LOG.info(counter.getDisplayName() + ": " + counter.getValue());
        }
    }
    LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec");
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapreduce.java

License:Apache License

/**
 * Runs mapper for the single split.//from  w  w  w  .j  av  a 2 s.  c  o m
 *
 * @param mapOutputAccumulator mapOutputAccumulator to use
 * @param split    split ot run on
 */

@Override
@SuppressWarnings("unchecked")
public void runSplit(MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split, int splitIndex)
        throws IOException, ClassNotFoundException, InterruptedException {

    TaskAttemptID taskAttemptId = hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex);
    //Setup task ID info
    TaskAttemptContext taskContext = hadoopVersionSpecificCode.createTaskAttemptContext(configuration,
            taskAttemptId);

    InputFormat inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), configuration);

    //Create RecordReader
    org.apache.hadoop.mapreduce.RecordReader<INKEY, INVALUE> input = inputFormat
            .createRecordReader((InputSplit) split, taskContext);

    //Make a mapper
    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper;
    try {
        mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor
                .newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    org.apache.hadoop.mapreduce.RecordWriter output;
    OutputCommitter committer = null;
    if (mapOnlyJob) {
        OutputFormat outputFormat = ReflectionUtils.newInstance(jobContext.getOutputFormatClass(),
                configuration);
        output = (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE>) outputFormat
                .getRecordWriter(taskContext);
        committer = outputFormat.getOutputCommitter(taskContext);
        committer.setupTask(taskContext);
    } else {
        output = new MapOutputCollector<OUTKEY, OUTVALUE>(mapOutputAccumulator);
    }

    input.initialize((InputSplit) split, taskContext);

    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context mapperContext = hadoopVersionSpecificCode
            .getMapperContext(configuration, taskAttemptId, input, output);
    mapper.run(mapperContext);

    input.close();

    output.close(mapperContext);

    if (mapOnlyJob && committer != null) {
        committer.commitTask(taskContext);
    }
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapreduce.java

License:Apache License

public ReducerWrapperMapreduce(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId,
        int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException {
    this.invocationParameters = invocationParameters;
    Configuration configuration = (Configuration) invocationParameters.getConfiguration();
    hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(),
            configuration);/*  w w w .  ja va2s  . c om*/
    JobID jobID = (JobID) invocationParameters.getJobId();

    //Setup task ID info
    TaskAttemptID id = hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition);
    JobContext jobContext = hadoopVersionSpecificCode.createJobContext(new JobConf(configuration), jobID);
    taskContext = hadoopVersionSpecificCode.createTaskAttemptContext(configuration, id);

    reducer = (org.apache.hadoop.mapreduce.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(jobContext.getReducerClass(), configuration);

    OutputFormat outputFormat = ReflectionUtils.newInstance(jobContext.getOutputFormatClass(), configuration);

    recordWriter = (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE>) outputFormat
            .getRecordWriter(taskContext);

    committer = outputFormat.getOutputCommitter(taskContext);
    committer.setupTask(taskContext);

    Class<INKEY> keyClass = (Class<INKEY>) jobContext.getMapOutputKeyClass();
    WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    Class<INVALUE> valueClass = (Class<INVALUE>) jobContext.getMapOutputValueClass();
    WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>(
            valueClass, null);

    DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region,
            appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, configuration) > 0,
            firstKeySerializer, valueSerializer, invocationParameters.getSerializationMode(),
            secondKeySerializer, keyClass, valueClass, sort,
            HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, configuration),
            1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, configuration),
            HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, configuration));
    DataGridChunkedCollectionReader<INKEY, INVALUE> transport = DataGridChunkedCollectionReader
            .getGridReader(params);

    context = hadoopVersionSpecificCode.getReducerContext(configuration, id, committer, recordWriter, transport,
            null);

}

From source file:com.yahoo.glimmer.indexing.generator.IndexRecordWriterTest.java

License:Open Source License

@Test
public void test() throws Exception {
    context.checking(new Expectations() {
        {//from  w  w  w  .j a  va 2  s.co  m
            allowing(taskContext).getConfiguration();
            will(returnValue(conf));
            allowing(taskContext).getTaskAttemptID();
            will(returnValue(taskAttemptID));
        }
    });
    OutputFormat outputFormat = new IndexRecordWriter.OutputFormat();

    conf.setStrings("RdfFieldNames", "index0", "index1");
    conf.setEnum("IndexType", RDFDocumentFactory.IndexType.VERTICAL);

    RecordWriter<IntWritable, IndexRecordWriterValue> recordWriter = outputFormat.getRecordWriter(taskContext);

    IntWritable key = new IntWritable();
    IndexRecordWriterTermValue termValue = new IndexRecordWriterTermValue();
    IndexRecordWriterDocValue docValue = new IndexRecordWriterDocValue();
    IndexRecordWriterSizeValue sizeValue = new IndexRecordWriterSizeValue();

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term1");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(0); // term1 occurs in index 0
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(3);
    termValue.setOccurrenceCount(6);
    termValue.setSumOfMaxTermPositions(15 + 12 + 18);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(11);
    docValue.addOccurrence(15);
    recordWriter.write(key, docValue);
    docValue.setDocument(4);
    docValue.clearOccerrences();
    docValue.addOccurrence(12);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    docValue.addOccurrence(17);
    docValue.addOccurrence(18);
    recordWriter.write(key, docValue);

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term2");
    termValue.setTermFrequency(2);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.clearOccerrences();
    docValue.setDocument(0); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);
    docValue.setDocument(1); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(2);
    termValue.setOccurrenceCount(4);
    termValue.setSumOfMaxTermPositions(19 + 16);
    recordWriter.write(key, termValue);

    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(19);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(13);
    docValue.addOccurrence(16);
    recordWriter.write(key, docValue);

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(1);
    termValue.setSumOfMaxTermPositions(14);
    recordWriter.write(key, termValue);
    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    recordWriter.write(key, docValue);

    // ALIGNMENT_INDEX 
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term3");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(1); // term3 occurs in index 1
    recordWriter.write(key, docValue);
    docValue.clearOccerrences();

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(2);
    termValue.setSumOfMaxTermPositions(11);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(11);
    recordWriter.write(key, docValue);

    // Doc Sizes.
    key.set(0);
    sizeValue.setDocument(0);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(3);
    sizeValue.setSize(1);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(4);
    sizeValue.setSize(10);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(2);
    recordWriter.write(key, sizeValue);

    key.set(1);
    sizeValue.setDocument(3);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(5);
    recordWriter.write(key, sizeValue);

    recordWriter.close(taskContext);

    // Check the written indexes..

    Path workPath = outputFormat.getDefaultWorkFile(taskContext, "");
    System.out.println("Default work file is " + workPath.toString());
    String dir = workPath.toUri().getPath();
    BitStreamIndex index0 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index0", true, true);
    assertEquals(8, index0.numberOfDocuments);
    assertEquals(2, index0.numberOfTerms);
    assertTrue(index0.hasPositions);
    // term1
    checkOccurrences(index0.documents(0), 3, "(3:11,15) (4:12) (7:14,17,18)");
    // term2
    checkOccurrences(index0.documents(1), 2, "(1:10,19) (7:13,16)");
    assertEquals("[3, 0, 0, 1, 10, 0, 2, 0]", index0.sizes.toString());

    BitStreamIndex index1 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index1", true, true);
    assertEquals(8, index1.numberOfDocuments);
    assertEquals(2, index1.numberOfTerms);
    assertTrue(index0.hasPositions);
    checkOccurrences(index1.documents(0), 1, "(1:14)");
    // term3
    checkOccurrences(index1.documents(1), 1, "(3:10,11)");

    BitStreamIndex indexAlignment = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/alignment", true);
    assertEquals(8, indexAlignment.numberOfDocuments);
    assertEquals(3, indexAlignment.numberOfTerms);
    assertFalse(indexAlignment.hasPositions);
    // term1
    assertEquals(1, indexAlignment.documents(0).frequency());
    // term2
    assertEquals(2, indexAlignment.documents(1).frequency());
    // term3
    assertEquals(1, indexAlignment.documents(2).frequency());
    assertEquals("[0, 0, 0, 3, 0, 0, 5, 0]", index1.sizes.toString());
}