Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs.

Prototype

public MultipleOutputs(TaskInputOutputContext<?, ?, KEYOUT, VALUEOUT> context)

Source Link

Document

Creates and initializes multiple outputs support, it should be instantiated in the Mapper/Reducer setup method.

Usage

From source file:com.tdunning.plume.local.lazy.MSCRReducer.java

License:Apache License

protected void setup(Reducer<PlumeObject, PlumeObject, NullWritable, NullWritable>.Context context)
        throws IOException, InterruptedException {

    this.mos = new MultipleOutputs(context);
    this.mscr = MapRedExecutor.readMSCR(context.getConfiguration());
}

From source file:com.wipro.ats.bdre.dq.DQMapper.java

License:Apache License

@Override
public void setup(org.apache.hadoop.mapreduce.Mapper.Context context) throws IOException, InterruptedException {
    LOGGER.info("START :: DQMapper.setup(Context context)");
    Configuration conf = context.getConfiguration();
    props = getProperties.getProperties(conf.get("dq.process.id"), "dq");
    knowledgeBase = buildKnowledgeBase();
    LOGGER.debug("The Value of props is" + props.toString() + "\n package name is"
            + props.getProperty("rules.package"));
    mos = new MultipleOutputs<Text, NullWritable>(context);
    goodRecords = 0;/*ww w.  java  2s . c  o  m*/
    badRecords = 0;
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.AssocReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1);
    this.minValue = conf.getFloat(MIN_VALUE, DEFAULT_MIN_VALUE);
    String assocType = conf.get(ASSOC_METRIC, DEFAULT_ASSOC);
    if (assocType.equalsIgnoreCase("llr"))
        assocCalculator = new ConcreteLLCallback();
    else if (assocType.equalsIgnoreCase("dice"))
        assocCalculator = new DiceCallback();
    else if (assocType.equalsIgnoreCase("pmi"))
        assocCalculator = new PMICallback();
    else if (assocType.equalsIgnoreCase("chi"))
        assocCalculator = new ChiSquareCallback();

    this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS);
    log.info("NGram Total: {}, Min DICE value: {}, Emit Unigrams: {}",
            new Object[] { ngramTotal, minValue, emitUnigrams });

    if (ngramTotal == -1) {
        throw new IllegalStateException("No NGRAM_TOTAL available in job config");
    }//from   w  ww .ja va  2  s  . c  o  m
    mos = new MultipleOutputs<Text, DoubleWritable>(context);
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.deduplication.DeDuplicationTextOutputReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    multipleOutputs = new MultipleOutputs<NullWritable, List<DocumentInfo>>(context);
}

From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreationReducer.java

License:BSD License

@Override
public void setup(Context context) throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();

    String[] datasetNames = conf.get("dataset-name", "").split(",");
    String[] datasetIds = conf.get("dataset-id", "").split(",");
    for (int i = 0; i < datasetNames.length; i++) {
        int dt = Integer.parseInt(datasetIds[i]);
        idToDataset.put(dt, datasetNames[i]);

        String regThresholds = conf.get("regular-" + datasetIds[i], "");
        if (!regThresholds.equals("")) {
            HashMap<Integer, String> attRegThresholds = new HashMap<Integer, String>();
            for (String keyVals : regThresholds.split(",")) {
                String[] keyVal = keyVals.split("-");
                attRegThresholds.put(Integer.parseInt(keyVal[0]), keyVal[1]);
            }//from  www .  j  a va2 s.  c  om
            idToRegThreshold.put(dt, attRegThresholds);
        }

        String rareThresholds = conf.get("rare-" + datasetIds[i], "");
        if (!rareThresholds.equals("")) {
            HashMap<Integer, String> attRareThresholds = new HashMap<Integer, String>();
            for (String keyVals : rareThresholds.split(",")) {
                String[] keyVal = keyVals.split("-");
                attRareThresholds.put(Integer.parseInt(keyVal[0]), keyVal[1]);
            }
            idToRareThreshold.put(dt, attRareThresholds);
        }
    }

    String[] useMergeTreeStr = conf.get("use-merge-tree", "").split(",");
    for (String dt : useMergeTreeStr) {
        useMergeTree.add(dt);
    }

    out = new MultipleOutputs<AttributeResolutionWritable, TopologyTimeSeriesWritable>(context);
    //out = new MultipleOutputs<Text,Text>(context);

    String bucket = conf.get("bucket", "");

    String[] spatialResolutionArray = utils.getSpatialResolutions();
    for (int j = 0; j < spatialResolutionArray.length; j++) {
        int spatialRes = utils.spatialResolution(spatialResolutionArray[j]);

        if ((spatialRes == FrameworkUtils.NBHD) || (spatialRes == FrameworkUtils.ZIP)) {

            if (bucket.equals(""))
                s3 = false;
            Path edgesPath = null;

            // reading nodes
            if (spatialRes == FrameworkUtils.NBHD)
                edgesPath = new Path(bucket + "neighborhood-graph");
            else
                edgesPath = new Path(bucket + "zipcode-graph");

            FileSystem fs = null;

            if (s3)
                fs = FileSystem.get(edgesPath.toUri(), conf);
            else
                fs = FileSystem.get(new Configuration());

            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(edgesPath)));
            String[] s = Utilities.splitString(reader.readLine().trim());
            if (spatialRes == FrameworkUtils.NBHD)
                nvNbhd = Integer.parseInt(s[0].trim());
            else
                nvZip = Integer.parseInt(s[0].trim());

            int ne = Integer.parseInt(s[1].trim());
            for (int i = 0; i < ne; i++) {
                s = Utilities.splitString(reader.readLine().trim());
                int v1 = Integer.parseInt(s[0].trim());
                int v2 = Integer.parseInt(s[1].trim());
                if (v1 == v2) {
                    continue;
                }
                Integer[] arr = new Integer[2];
                arr[0] = v1;
                arr[1] = v2;
                if (spatialRes == FrameworkUtils.NBHD)
                    nbhdEdges.add(arr);
                else
                    zipEdges.add(arr);
            }
            reader.close();
        }
    }
}

From source file:edu.nyu.vida.data_polygamy.relationship_computation.CorrelationReducer.java

License:BSD License

@Override
public void setup(Context context) throws IOException, InterruptedException {

    out = new MultipleOutputs<Text, Text>(context);
    conf = context.getConfiguration();/*from ww w . ja  v a 2 s.  c o m*/

    String[] datasetIdsStr = conf.get("dataset-keys", "").split(",");
    String[] datasetNames = conf.get("dataset-names", "").split(",");

    if (datasetIdsStr.length != datasetNames.length) {
        System.out.println("Something went wrong... Number of ids should match number of datasets");
        System.exit(-1);
    }

    for (int i = 0; i < datasetIdsStr.length; i++) {
        int datasetId = Integer.parseInt(datasetIdsStr[i]);
        String[] datasetAggHeader = conf.get("dataset-" + datasetIdsStr[i] + "-agg", "").split(",");

        HashMap<Integer, String> headerTemp = new HashMap<Integer, String>();
        for (int j = 0; j < datasetAggHeader.length; j++) {
            int attribute = Integer
                    .parseInt(datasetAggHeader[j].substring(0, datasetAggHeader[j].indexOf("-")));
            String name = datasetAggHeader[j].substring(datasetAggHeader[j].indexOf("-") + 1,
                    datasetAggHeader[j].length());
            headerTemp.put(attribute, name);
        }

        header.put(datasetId, headerTemp);
        datasets.put(datasetId, datasetNames[i]);
    }

    String scoreThresholdStr = conf.get("score-threshold", "");
    if (!scoreThresholdStr.isEmpty()) {
        hasScoreThreshold = true;
        scoreThreshold = Math.abs(Float.parseFloat(scoreThresholdStr));
    }

    String strengthThresholdStr = conf.get("strength-threshold", "");
    if (!strengthThresholdStr.isEmpty()) {
        hasStrengthThreshold = true;
        strengthThreshold = Math.abs(Float.parseFloat(strengthThresholdStr));
    }

    removeNotSignificant = Boolean.parseBoolean(conf.get("remove-not-significant"));
    completeRandomization = Boolean.parseBoolean(conf.get("complete-random"));
    randomizationStr = conf.get("complete-random-str", "");
    outputIds = conf.getBoolean("output-ids", false);

    // nbhd grapgh
    nbhdGraph.init(true, conf);

    // zipcode graph
    zipGraph.init(false, conf);

    // grid
    gridSize = 2048;
    //gridSize = Integer.parseInt(conf.get("spatial-resolution").replace("grid", ""));
    originalGrid = new int[gridSize][gridSize];
    for (int j = 0; j < gridSize; j++) {
        for (int i = 0; i < gridSize; i++)
            originalGrid[i][j] = j * gridSize + i;
    }
}

From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.AggregationReducer.java

License:BSD License

@Override
public void setup(Context context) throws IOException, InterruptedException {
    String[] datasetNames = context.getConfiguration().get("dataset-name", "").split(",");
    String[] datasetIds = context.getConfiguration().get("dataset-id", "").split(",");
    for (int i = 0; i < datasetNames.length; i++)
        idToDataset.put(Integer.parseInt(datasetIds[i]), datasetNames[i]);
    out = new MultipleOutputs<SpatioTemporalWritable, FloatArrayWritable>(context);
    //out = new MultipleOutputs<Text,Text>(context);
}

From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniquesReducer.java

License:BSD License

@Override
public void setup(Context context) throws IOException, InterruptedException {

    out = new MultipleOutputs<Text, Text>(context);
    conf = context.getConfiguration();//from  ww  w.  j  a v  a2  s  .c om

    String[] datasetIdsStr = conf.get("dataset-keys", "").split(",");
    String[] datasetNames = conf.get("dataset-names", "").split(",");

    if (datasetIdsStr.length != datasetNames.length) {
        System.out.println("Something went wrong... Number of ids should match number of datasets");
        System.exit(-1);
    }

    for (int i = 0; i < datasetIdsStr.length; i++) {
        int datasetId = Integer.parseInt(datasetIdsStr[i]);
        String[] datasetAggHeader = conf.get("dataset-" + datasetIdsStr[i] + "-agg", "").split(",");

        HashMap<Integer, String> headerTemp = new HashMap<Integer, String>();
        for (int j = 0; j < datasetAggHeader.length; j++) {
            int attribute = Integer
                    .parseInt(datasetAggHeader[j].substring(0, datasetAggHeader[j].indexOf("-")));
            String name = datasetAggHeader[j].substring(datasetAggHeader[j].indexOf("-") + 1,
                    datasetAggHeader[j].length());
            headerTemp.put(attribute, name);
        }

        header.put(datasetId, headerTemp);
        datasets.put(datasetId, datasetNames[i]);
    }

    // nbhd grapgh
    nbhdGraph.init(true, conf);

    // zipcode graph
    zipGraph.init(false, conf);

    // grid
    gridSize = 2048;
    //gridSize = Integer.parseInt(conf.get("spatial-resolution").replace("grid", ""));
    originalGrid = new int[gridSize][gridSize];
    for (int j = 0; j < gridSize; j++) {
        for (int i = 0; i < gridSize; i++)
            originalGrid[i][j] = j * gridSize + i;
    }
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java

License:Open Source License

@Override
public void setup(Reducer<LongWritable, Range, NullWritable, RangeCount>.Context ctx) {
    mos = new MultipleOutputs<NullWritable, RangeCount>(ctx);

    for (String s : ContextUtil.getConfiguration(ctx).getStrings(SUMMARY_LEVELS_PROP)) {
        int lvl = Integer.parseInt(s);
        summaryGroupsR.add(new SummaryGroup(lvl, Summarize.getSummaryName(s, true)));
        summaryGroupsF.add(new SummaryGroup(lvl, Summarize.getSummaryName(s, false)));
    }/* w ww .j  av  a 2 s. c  o m*/
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop.ReadsFilterMapper.java

License:LGPL

@Override
protected void setup(final Context context) throws IOException, InterruptedException {

    EoulsanLogger.initConsoleHandler();//w w  w  .  java  2s. c om
    getLogger().info("Start of setup()");

    // Get configuration object
    final Configuration conf = context.getConfiguration();

    // Initialize Eoulsan Settings
    if (!EoulsanRuntime.isRuntime()) {
        HadoopEoulsanRuntime.newEoulsanRuntime(conf);
    }

    // Set the FastqFormat
    final FastqFormat fastqFormat = FastqFormat.getFormatFromName(
            conf.get(FASTQ_FORMAT_KEY, "" + EoulsanRuntime.getSettings().getDefaultFastqFormat()));
    this.read1.setFastqFormat(fastqFormat);
    this.read2.setFastqFormat(fastqFormat);

    // Counter group
    this.counterGroup = conf.get(CommonHadoop.COUNTER_GROUP_KEY);
    if (this.counterGroup == null) {
        throw new IOException("No counter group defined");
    }

    getLogger().info("Fastq format: " + fastqFormat);

    // Set the filters
    try {
        final MultiReadFilterBuilder mrfb = new MultiReadFilterBuilder();

        // Add the parameters from the job configuration to the builder
        mrfb.addParameters(jobConfToParameters(conf, READ_FILTER_PARAMETER_KEY_PREFIX));

        this.filter = mrfb.getReadFilter(new HadoopReporterIncrementer(context), this.counterGroup);

        getLogger().info("Reads filters to apply: " + Joiner.on(", ").join(this.filter.getFilterNames()));

    } catch (EoulsanException e) {
        throw new IOException(e);
    }

    // Set the output writers
    this.out = new MultipleOutputs<>(context);
    this.outputFilename1 = createOutputPath(conf, OUTPUT_FILE1_KEY);
    this.outputFilename2 = createOutputPath(conf, OUTPUT_FILE2_KEY);

    getLogger().info("End of setup()");
}