Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs

List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs.

Prototype

public MultipleOutputs(TaskInputOutputContext<?, ?, KEYOUT, VALUEOUT> context) 

Source Link

Document

Creates and initializes multiple outputs support, it should be instantiated in the Mapper/Reducer setup method.

Usage

From source file:org.bgi.flexlab.gaea.tools.mapreduce.fastqqualitycontrol.FastqQualityControlReducer.java

License:Open Source License

@Override
protected void setup(Context context) throws IOException {
    mos = new MultipleOutputs<NullWritable, Text>(context);
    Configuration conf = context.getConfiguration();
    option = new FastqQualityControlOptions();
    option.getOptionsFromHadoopConf(conf);
    filter = new FastqQualityControlFilter(option);
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.realigner.RecalibratorContextWriter.java

License:Open Source License

@SuppressWarnings({ "rawtypes", "unchecked" })
public RecalibratorContextWriter(Context ctx, boolean multiple) {
    if (multiple)
        mos = new MultipleOutputs<NullWritable, Text>(ctx);
    this.context = ctx;
    value = new SamRecordWritable();
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcf.sort.VCFSort.java

License:Open Source License

@Override
protected void setup(Context context) throws IOException {
    Configuration conf = context.getConfiguration();
    options = new VCFSortOptions();
    options.getOptionsFromHadoopConf(conf);
    multiOutputs = options.getMultiOutputs();
    mos = new MultipleOutputs<NullWritable, VariantContextWritable>(context);
}

From source file:org.rdfhdt.mrbuilder.dictionary.DictionaryReducer.java

License:Open Source License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    this.output = new MultipleOutputs<Text, NullWritable>(context);
    super.setup(context);
}

From source file:pad.InitializationMapperClique.java

License:Apache License

/**
 * Setup method of the this InitializationMapperClique class.
 * Set up the multiple outputs variable, used to write the "real" result into the special folder.
 * @param context   context of this Job.
 * @throws IOException, InterruptedException
 */// w w  w.  j  a  va  2 s .  co  m
protected void setup(Context context) throws IOException, InterruptedException {
    this.mos = new MultipleOutputs<IntWritable, IntWritable>(context);
}

From source file:weka.distributed.hadoop.RandomizedDataChunkHadoopReducer.java

License:Open Source License

@Override
public void setup(Context context) throws IOException {
    m_mos = new MultipleOutputs<Text, Text>(context);

    Configuration conf = context.getConfiguration();

    String taskOptsS = conf.get(NUM_DATA_CHUNKS);
    String randomizeMapOpts = conf.get(RandomizedDataChunkHadoopMapper.RANDOMIZED_DATA_CHUNK_MAP_TASK_OPTIONS);
    if (taskOptsS == null || DistributedJobConfig.isEmpty(taskOptsS)) {
        throw new IOException("Number of output files/data chunks not available!!");
    }/*from w  ww.  j a va  2  s .co  m*/

    try {
        if (!DistributedJobConfig.isEmpty(randomizeMapOpts)) {
            String[] taskOpts = Utils.splitOptions(randomizeMapOpts);

            // name of the training ARFF header file
            String arffHeaderFileName = Utils.getOption("arff-header", taskOpts);
            if (DistributedJobConfig.isEmpty(arffHeaderFileName)) {
                throw new IOException("Can't continue without the name of the ARFF header file!");
            }
            m_trainingHeaderWithSummary = WekaClassifierHadoopMapper.loadTrainingHeader(arffHeaderFileName);
            m_trainingHeader = CSVToARFFHeaderReduceTask.stripSummaryAtts(m_trainingHeaderWithSummary);

            try {
                m_numberOfDataChunks = Integer.parseInt(taskOptsS);
                // m_instanceBuffer = new ArrayList<String>(m_numberOfDataChunks);
            } catch (NumberFormatException e) {
                throw new Exception(e);
            }

            WekaClassifierHadoopMapper.setClassIndex(taskOpts, m_trainingHeader,
                    !Utils.getFlag("dont-default-class-to-last", taskOpts));

        } else {
            throw new Exception("Can't continue without the name of the ARFF header file!");
        }

        int numClasses = 1;
        if (m_trainingHeader.classIndex() >= 0 && m_trainingHeader.classAttribute().isNominal()) {
            numClasses = m_trainingHeader.classAttribute().numValues();

            // only need the instances buffer if the class is nominal
            m_classInstancesBuffer = new ArrayList<List<String>>();
            for (int i = 0; i < numClasses; i++) {
                m_classInstancesBuffer.add(new ArrayList<String>());
            }
        }
        m_countsPerClass = new int[numClasses];

    } catch (Exception ex) {
        throw new IOException(ex);
    }
}