Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs

List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs.

Prototype

public MultipleOutputs(TaskInputOutputContext<?, ?, KEYOUT, VALUEOUT> context) 

Source Link

Document

Creates and initializes multiple outputs support, it should be instantiated in the Mapper/Reducer setup method.

Usage

From source file:com.tdunning.plume.local.lazy.MSCRReducer.java

License:Apache License

protected void setup(Reducer<PlumeObject, PlumeObject, NullWritable, NullWritable>.Context context)
        throws IOException, InterruptedException {

    this.mos = new MultipleOutputs(context);
    this.mscr = MapRedExecutor.readMSCR(context.getConfiguration());
}

From source file:com.wipro.ats.bdre.dq.DQMapper.java

License:Apache License

@Override
public void setup(org.apache.hadoop.mapreduce.Mapper.Context context) throws IOException, InterruptedException {
    LOGGER.info("START :: DQMapper.setup(Context context)");
    Configuration conf = context.getConfiguration();
    props = getProperties.getProperties(conf.get("dq.process.id"), "dq");
    knowledgeBase = buildKnowledgeBase();
    LOGGER.debug("The Value of props is" + props.toString() + "\n package name is"
            + props.getProperty("rules.package"));
    mos = new MultipleOutputs<Text, NullWritable>(context);
    goodRecords = 0;/*ww w.  java  2s . c  o  m*/
    badRecords = 0;
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.AssocReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1);
    this.minValue = conf.getFloat(MIN_VALUE, DEFAULT_MIN_VALUE);
    String assocType = conf.get(ASSOC_METRIC, DEFAULT_ASSOC);
    if (assocType.equalsIgnoreCase("llr"))
        assocCalculator = new ConcreteLLCallback();
    else if (assocType.equalsIgnoreCase("dice"))
        assocCalculator = new DiceCallback();
    else if (assocType.equalsIgnoreCase("pmi"))
        assocCalculator = new PMICallback();
    else if (assocType.equalsIgnoreCase("chi"))
        assocCalculator = new ChiSquareCallback();

    this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS);
    log.info("NGram Total: {}, Min DICE value: {}, Emit Unigrams: {}",
            new Object[] { ngramTotal, minValue, emitUnigrams });

    if (ngramTotal == -1) {
        throw new IllegalStateException("No NGRAM_TOTAL available in job config");
    }//from   w  ww .ja va  2  s  . c  o  m
    mos = new MultipleOutputs<Text, DoubleWritable>(context);
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.deduplication.DeDuplicationTextOutputReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    multipleOutputs = new MultipleOutputs<NullWritable, List<DocumentInfo>>(context);
}

From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreationReducer.java

License:BSD License

@Override
public void setup(Context context) throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();

    String[] datasetNames = conf.get("dataset-name", "").split(",");
    String[] datasetIds = conf.get("dataset-id", "").split(",");
    for (int i = 0; i < datasetNames.length; i++) {
        int dt = Integer.parseInt(datasetIds[i]);
        idToDataset.put(dt, datasetNames[i]);

        String regThresholds = conf.get("regular-" + datasetIds[i], "");
        if (!regThresholds.equals("")) {
            HashMap<Integer, String> attRegThresholds = new HashMap<Integer, String>();
            for (String keyVals : regThresholds.split(",")) {
                String[] keyVal = keyVals.split("-");
                attRegThresholds.put(Integer.parseInt(keyVal[0]), keyVal[1]);
            }//from  www .  j  a va2 s.  c  om
            idToRegThreshold.put(dt, attRegThresholds);
        }

        String rareThresholds = conf.get("rare-" + datasetIds[i], "");
        if (!rareThresholds.equals("")) {
            HashMap<Integer, String> attRareThresholds = new HashMap<Integer, String>();
            for (String keyVals : rareThresholds.split(",")) {
                String[] keyVal = keyVals.split("-");
                attRareThresholds.put(Integer.parseInt(keyVal[0]), keyVal[1]);
            }
            idToRareThreshold.put(dt, attRareThresholds);
        }
    }

    String[] useMergeTreeStr = conf.get("use-merge-tree", "").split(",");
    for (String dt : useMergeTreeStr) {
        useMergeTree.add(dt);
    }

    out = new MultipleOutputs<AttributeResolutionWritable, TopologyTimeSeriesWritable>(context);
    //out = new MultipleOutputs<Text,Text>(context);

    String bucket = conf.get("bucket", "");

    String[] spatialResolutionArray = utils.getSpatialResolutions();
    for (int j = 0; j < spatialResolutionArray.length; j++) {
        int spatialRes = utils.spatialResolution(spatialResolutionArray[j]);

        if ((spatialRes == FrameworkUtils.NBHD) || (spatialRes == FrameworkUtils.ZIP)) {

            if (bucket.equals(""))
                s3 = false;
            Path edgesPath = null;

            // reading nodes
            if (spatialRes == FrameworkUtils.NBHD)
                edgesPath = new Path(bucket + "neighborhood-graph");
            else
                edgesPath = new Path(bucket + "zipcode-graph");

            FileSystem fs = null;

            if (s3)
                fs = FileSystem.get(edgesPath.toUri(), conf);
            else
                fs = FileSystem.get(new Configuration());

            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(edgesPath)));
            String[] s = Utilities.splitString(reader.readLine().trim());
            if (spatialRes == FrameworkUtils.NBHD)
                nvNbhd = Integer.parseInt(s[0].trim());
            else
                nvZip = Integer.parseInt(s[0].trim());

            int ne = Integer.parseInt(s[1].trim());
            for (int i = 0; i < ne; i++) {
                s = Utilities.splitString(reader.readLine().trim());
                int v1 = Integer.parseInt(s[0].trim());
                int v2 = Integer.parseInt(s[1].trim());
                if (v1 == v2) {
                    continue;
                }
                Integer[] arr = new Integer[2];
                arr[0] = v1;
                arr[1] = v2;
                if (spatialRes == FrameworkUtils.NBHD)
                    nbhdEdges.add(arr);
                else
                    zipEdges.add(arr);
            }
            reader.close();
        }
    }
}

From source file:edu.nyu.vida.data_polygamy.relationship_computation.CorrelationReducer.java

License:BSD License

@Override
public void setup(Context context) throws IOException, InterruptedException {

    out = new MultipleOutputs<Text, Text>(context);
    conf = context.getConfiguration();/*from ww w . ja  v a 2 s.  c o m*/

    String[] datasetIdsStr = conf.get("dataset-keys", "").split(",");
    String[] datasetNames = conf.get("dataset-names", "").split(",");

    if (datasetIdsStr.length != datasetNames.length) {
        System.out.println("Something went wrong... Number of ids should match number of datasets");
        System.exit(-1);
    }

    for (int i = 0; i < datasetIdsStr.length; i++) {
        int datasetId = Integer.parseInt(datasetIdsStr[i]);
        String[] datasetAggHeader = conf.get("dataset-" + datasetIdsStr[i] + "-agg", "").split(",");

        HashMap<Integer, String> headerTemp = new HashMap<Integer, String>();
        for (int j = 0; j < datasetAggHeader.length; j++) {
            int attribute = Integer
                    .parseInt(datasetAggHeader[j].substring(0, datasetAggHeader[j].indexOf("-")));
            String name = datasetAggHeader[j].substring(datasetAggHeader[j].indexOf("-") + 1,
                    datasetAggHeader[j].length());
            headerTemp.put(attribute, name);
        }

        header.put(datasetId, headerTemp);
        datasets.put(datasetId, datasetNames[i]);
    }

    String scoreThresholdStr = conf.get("score-threshold", "");
    if (!scoreThresholdStr.isEmpty()) {
        hasScoreThreshold = true;
        scoreThreshold = Math.abs(Float.parseFloat(scoreThresholdStr));
    }

    String strengthThresholdStr = conf.get("strength-threshold", "");
    if (!strengthThresholdStr.isEmpty()) {
        hasStrengthThreshold = true;
        strengthThreshold = Math.abs(Float.parseFloat(strengthThresholdStr));
    }

    removeNotSignificant = Boolean.parseBoolean(conf.get("remove-not-significant"));
    completeRandomization = Boolean.parseBoolean(conf.get("complete-random"));
    randomizationStr = conf.get("complete-random-str", "");
    outputIds = conf.getBoolean("output-ids", false);

    // nbhd grapgh
    nbhdGraph.init(true, conf);

    // zipcode graph
    zipGraph.init(false, conf);

    // grid
    gridSize = 2048;
    //gridSize = Integer.parseInt(conf.get("spatial-resolution").replace("grid", ""));
    originalGrid = new int[gridSize][gridSize];
    for (int j = 0; j < gridSize; j++) {
        for (int i = 0; i < gridSize; i++)
            originalGrid[i][j] = j * gridSize + i;
    }
}

From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.AggregationReducer.java

License:BSD License

@Override
public void setup(Context context) throws IOException, InterruptedException {
    String[] datasetNames = context.getConfiguration().get("dataset-name", "").split(",");
    String[] datasetIds = context.getConfiguration().get("dataset-id", "").split(",");
    for (int i = 0; i < datasetNames.length; i++)
        idToDataset.put(Integer.parseInt(datasetIds[i]), datasetNames[i]);
    out = new MultipleOutputs<SpatioTemporalWritable, FloatArrayWritable>(context);
    //out = new MultipleOutputs<Text,Text>(context);
}

From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniquesReducer.java

License:BSD License

@Override
public void setup(Context context) throws IOException, InterruptedException {

    out = new MultipleOutputs<Text, Text>(context);
    conf = context.getConfiguration();//from  ww  w.  j  a v  a2  s  .c om

    String[] datasetIdsStr = conf.get("dataset-keys", "").split(",");
    String[] datasetNames = conf.get("dataset-names", "").split(",");

    if (datasetIdsStr.length != datasetNames.length) {
        System.out.println("Something went wrong... Number of ids should match number of datasets");
        System.exit(-1);
    }

    for (int i = 0; i < datasetIdsStr.length; i++) {
        int datasetId = Integer.parseInt(datasetIdsStr[i]);
        String[] datasetAggHeader = conf.get("dataset-" + datasetIdsStr[i] + "-agg", "").split(",");

        HashMap<Integer, String> headerTemp = new HashMap<Integer, String>();
        for (int j = 0; j < datasetAggHeader.length; j++) {
            int attribute = Integer
                    .parseInt(datasetAggHeader[j].substring(0, datasetAggHeader[j].indexOf("-")));
            String name = datasetAggHeader[j].substring(datasetAggHeader[j].indexOf("-") + 1,
                    datasetAggHeader[j].length());
            headerTemp.put(attribute, name);
        }

        header.put(datasetId, headerTemp);
        datasets.put(datasetId, datasetNames[i]);
    }

    // nbhd grapgh
    nbhdGraph.init(true, conf);

    // zipcode graph
    zipGraph.init(false, conf);

    // grid
    gridSize = 2048;
    //gridSize = Integer.parseInt(conf.get("spatial-resolution").replace("grid", ""));
    originalGrid = new int[gridSize][gridSize];
    for (int j = 0; j < gridSize; j++) {
        for (int i = 0; i < gridSize; i++)
            originalGrid[i][j] = j * gridSize + i;
    }
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java

License:Open Source License

@Override
public void setup(Reducer<LongWritable, Range, NullWritable, RangeCount>.Context ctx) {
    mos = new MultipleOutputs<NullWritable, RangeCount>(ctx);

    for (String s : ContextUtil.getConfiguration(ctx).getStrings(SUMMARY_LEVELS_PROP)) {
        int lvl = Integer.parseInt(s);
        summaryGroupsR.add(new SummaryGroup(lvl, Summarize.getSummaryName(s, true)));
        summaryGroupsF.add(new SummaryGroup(lvl, Summarize.getSummaryName(s, false)));
    }/* w ww .j  av  a 2 s. c  o m*/
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop.ReadsFilterMapper.java

License:LGPL

@Override
protected void setup(final Context context) throws IOException, InterruptedException {

    EoulsanLogger.initConsoleHandler();//w w  w  .  java  2s. c om
    getLogger().info("Start of setup()");

    // Get configuration object
    final Configuration conf = context.getConfiguration();

    // Initialize Eoulsan Settings
    if (!EoulsanRuntime.isRuntime()) {
        HadoopEoulsanRuntime.newEoulsanRuntime(conf);
    }

    // Set the FastqFormat
    final FastqFormat fastqFormat = FastqFormat.getFormatFromName(
            conf.get(FASTQ_FORMAT_KEY, "" + EoulsanRuntime.getSettings().getDefaultFastqFormat()));
    this.read1.setFastqFormat(fastqFormat);
    this.read2.setFastqFormat(fastqFormat);

    // Counter group
    this.counterGroup = conf.get(CommonHadoop.COUNTER_GROUP_KEY);
    if (this.counterGroup == null) {
        throw new IOException("No counter group defined");
    }

    getLogger().info("Fastq format: " + fastqFormat);

    // Set the filters
    try {
        final MultiReadFilterBuilder mrfb = new MultiReadFilterBuilder();

        // Add the parameters from the job configuration to the builder
        mrfb.addParameters(jobConfToParameters(conf, READ_FILTER_PARAMETER_KEY_PREFIX));

        this.filter = mrfb.getReadFilter(new HadoopReporterIncrementer(context), this.counterGroup);

        getLogger().info("Reads filters to apply: " + Joiner.on(", ").join(this.filter.getFilterNames()));

    } catch (EoulsanException e) {
        throw new IOException(e);
    }

    // Set the output writers
    this.out = new MultipleOutputs<>(context);
    this.outputFilename1 = createOutputPath(conf, OUTPUT_FILE1_KEY);
    this.outputFilename2 = createOutputPath(conf, OUTPUT_FILE2_KEY);

    getLogger().info("End of setup()");
}