List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs
public MultipleOutputs(TaskInputOutputContext<?, ?, KEYOUT, VALUEOUT> context)
From source file:com.tdunning.plume.local.lazy.MSCRReducer.java
License:Apache License
protected void setup(Reducer<PlumeObject, PlumeObject, NullWritable, NullWritable>.Context context) throws IOException, InterruptedException { this.mos = new MultipleOutputs(context); this.mscr = MapRedExecutor.readMSCR(context.getConfiguration()); }
From source file:com.wipro.ats.bdre.dq.DQMapper.java
License:Apache License
@Override public void setup(org.apache.hadoop.mapreduce.Mapper.Context context) throws IOException, InterruptedException { LOGGER.info("START :: DQMapper.setup(Context context)"); Configuration conf = context.getConfiguration(); props = getProperties.getProperties(conf.get("dq.process.id"), "dq"); knowledgeBase = buildKnowledgeBase(); LOGGER.debug("The Value of props is" + props.toString() + "\n package name is" + props.getProperty("rules.package")); mos = new MultipleOutputs<Text, NullWritable>(context); goodRecords = 0;/*ww w. java 2s . c o m*/ badRecords = 0; }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.AssocReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1); this.minValue = conf.getFloat(MIN_VALUE, DEFAULT_MIN_VALUE); String assocType = conf.get(ASSOC_METRIC, DEFAULT_ASSOC); if (assocType.equalsIgnoreCase("llr")) assocCalculator = new ConcreteLLCallback(); else if (assocType.equalsIgnoreCase("dice")) assocCalculator = new DiceCallback(); else if (assocType.equalsIgnoreCase("pmi")) assocCalculator = new PMICallback(); else if (assocType.equalsIgnoreCase("chi")) assocCalculator = new ChiSquareCallback(); this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS); log.info("NGram Total: {}, Min DICE value: {}, Emit Unigrams: {}", new Object[] { ngramTotal, minValue, emitUnigrams }); if (ngramTotal == -1) { throw new IllegalStateException("No NGRAM_TOTAL available in job config"); }//from w ww .ja va 2 s . c o m mos = new MultipleOutputs<Text, DoubleWritable>(context); }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.deduplication.DeDuplicationTextOutputReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { multipleOutputs = new MultipleOutputs<NullWritable, List<DocumentInfo>>(context); }
From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreationReducer.java
License:BSD License
@Override public void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String[] datasetNames = conf.get("dataset-name", "").split(","); String[] datasetIds = conf.get("dataset-id", "").split(","); for (int i = 0; i < datasetNames.length; i++) { int dt = Integer.parseInt(datasetIds[i]); idToDataset.put(dt, datasetNames[i]); String regThresholds = conf.get("regular-" + datasetIds[i], ""); if (!regThresholds.equals("")) { HashMap<Integer, String> attRegThresholds = new HashMap<Integer, String>(); for (String keyVals : regThresholds.split(",")) { String[] keyVal = keyVals.split("-"); attRegThresholds.put(Integer.parseInt(keyVal[0]), keyVal[1]); }//from www . j a va2 s. c om idToRegThreshold.put(dt, attRegThresholds); } String rareThresholds = conf.get("rare-" + datasetIds[i], ""); if (!rareThresholds.equals("")) { HashMap<Integer, String> attRareThresholds = new HashMap<Integer, String>(); for (String keyVals : rareThresholds.split(",")) { String[] keyVal = keyVals.split("-"); attRareThresholds.put(Integer.parseInt(keyVal[0]), keyVal[1]); } idToRareThreshold.put(dt, attRareThresholds); } } String[] useMergeTreeStr = conf.get("use-merge-tree", "").split(","); for (String dt : useMergeTreeStr) { useMergeTree.add(dt); } out = new MultipleOutputs<AttributeResolutionWritable, TopologyTimeSeriesWritable>(context); //out = new MultipleOutputs<Text,Text>(context); String bucket = conf.get("bucket", ""); String[] spatialResolutionArray = utils.getSpatialResolutions(); for (int j = 0; j < spatialResolutionArray.length; j++) { int spatialRes = utils.spatialResolution(spatialResolutionArray[j]); if ((spatialRes == FrameworkUtils.NBHD) || (spatialRes == FrameworkUtils.ZIP)) { if (bucket.equals("")) s3 = false; Path edgesPath = null; // reading nodes if (spatialRes == FrameworkUtils.NBHD) edgesPath = new Path(bucket + "neighborhood-graph"); else edgesPath = new Path(bucket + "zipcode-graph"); FileSystem fs = null; if (s3) fs = FileSystem.get(edgesPath.toUri(), conf); else fs = FileSystem.get(new Configuration()); BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(edgesPath))); String[] s = Utilities.splitString(reader.readLine().trim()); if (spatialRes == FrameworkUtils.NBHD) nvNbhd = Integer.parseInt(s[0].trim()); else nvZip = Integer.parseInt(s[0].trim()); int ne = Integer.parseInt(s[1].trim()); for (int i = 0; i < ne; i++) { s = Utilities.splitString(reader.readLine().trim()); int v1 = Integer.parseInt(s[0].trim()); int v2 = Integer.parseInt(s[1].trim()); if (v1 == v2) { continue; } Integer[] arr = new Integer[2]; arr[0] = v1; arr[1] = v2; if (spatialRes == FrameworkUtils.NBHD) nbhdEdges.add(arr); else zipEdges.add(arr); } reader.close(); } } }
From source file:edu.nyu.vida.data_polygamy.relationship_computation.CorrelationReducer.java
License:BSD License
@Override public void setup(Context context) throws IOException, InterruptedException { out = new MultipleOutputs<Text, Text>(context); conf = context.getConfiguration();/*from ww w . ja v a 2 s. c o m*/ String[] datasetIdsStr = conf.get("dataset-keys", "").split(","); String[] datasetNames = conf.get("dataset-names", "").split(","); if (datasetIdsStr.length != datasetNames.length) { System.out.println("Something went wrong... Number of ids should match number of datasets"); System.exit(-1); } for (int i = 0; i < datasetIdsStr.length; i++) { int datasetId = Integer.parseInt(datasetIdsStr[i]); String[] datasetAggHeader = conf.get("dataset-" + datasetIdsStr[i] + "-agg", "").split(","); HashMap<Integer, String> headerTemp = new HashMap<Integer, String>(); for (int j = 0; j < datasetAggHeader.length; j++) { int attribute = Integer .parseInt(datasetAggHeader[j].substring(0, datasetAggHeader[j].indexOf("-"))); String name = datasetAggHeader[j].substring(datasetAggHeader[j].indexOf("-") + 1, datasetAggHeader[j].length()); headerTemp.put(attribute, name); } header.put(datasetId, headerTemp); datasets.put(datasetId, datasetNames[i]); } String scoreThresholdStr = conf.get("score-threshold", ""); if (!scoreThresholdStr.isEmpty()) { hasScoreThreshold = true; scoreThreshold = Math.abs(Float.parseFloat(scoreThresholdStr)); } String strengthThresholdStr = conf.get("strength-threshold", ""); if (!strengthThresholdStr.isEmpty()) { hasStrengthThreshold = true; strengthThreshold = Math.abs(Float.parseFloat(strengthThresholdStr)); } removeNotSignificant = Boolean.parseBoolean(conf.get("remove-not-significant")); completeRandomization = Boolean.parseBoolean(conf.get("complete-random")); randomizationStr = conf.get("complete-random-str", ""); outputIds = conf.getBoolean("output-ids", false); // nbhd grapgh nbhdGraph.init(true, conf); // zipcode graph zipGraph.init(false, conf); // grid gridSize = 2048; //gridSize = Integer.parseInt(conf.get("spatial-resolution").replace("grid", "")); originalGrid = new int[gridSize][gridSize]; for (int j = 0; j < gridSize; j++) { for (int i = 0; i < gridSize; i++) originalGrid[i][j] = j * gridSize + i; } }
From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.AggregationReducer.java
License:BSD License
@Override public void setup(Context context) throws IOException, InterruptedException { String[] datasetNames = context.getConfiguration().get("dataset-name", "").split(","); String[] datasetIds = context.getConfiguration().get("dataset-id", "").split(","); for (int i = 0; i < datasetNames.length; i++) idToDataset.put(Integer.parseInt(datasetIds[i]), datasetNames[i]); out = new MultipleOutputs<SpatioTemporalWritable, FloatArrayWritable>(context); //out = new MultipleOutputs<Text,Text>(context); }
From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniquesReducer.java
License:BSD License
@Override public void setup(Context context) throws IOException, InterruptedException { out = new MultipleOutputs<Text, Text>(context); conf = context.getConfiguration();//from ww w. j a v a2 s .c om String[] datasetIdsStr = conf.get("dataset-keys", "").split(","); String[] datasetNames = conf.get("dataset-names", "").split(","); if (datasetIdsStr.length != datasetNames.length) { System.out.println("Something went wrong... Number of ids should match number of datasets"); System.exit(-1); } for (int i = 0; i < datasetIdsStr.length; i++) { int datasetId = Integer.parseInt(datasetIdsStr[i]); String[] datasetAggHeader = conf.get("dataset-" + datasetIdsStr[i] + "-agg", "").split(","); HashMap<Integer, String> headerTemp = new HashMap<Integer, String>(); for (int j = 0; j < datasetAggHeader.length; j++) { int attribute = Integer .parseInt(datasetAggHeader[j].substring(0, datasetAggHeader[j].indexOf("-"))); String name = datasetAggHeader[j].substring(datasetAggHeader[j].indexOf("-") + 1, datasetAggHeader[j].length()); headerTemp.put(attribute, name); } header.put(datasetId, headerTemp); datasets.put(datasetId, datasetNames[i]); } // nbhd grapgh nbhdGraph.init(true, conf); // zipcode graph zipGraph.init(false, conf); // grid gridSize = 2048; //gridSize = Integer.parseInt(conf.get("spatial-resolution").replace("grid", "")); originalGrid = new int[gridSize][gridSize]; for (int j = 0; j < gridSize; j++) { for (int i = 0; i < gridSize; i++) originalGrid[i][j] = j * gridSize + i; } }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java
License:Open Source License
@Override public void setup(Reducer<LongWritable, Range, NullWritable, RangeCount>.Context ctx) { mos = new MultipleOutputs<NullWritable, RangeCount>(ctx); for (String s : ContextUtil.getConfiguration(ctx).getStrings(SUMMARY_LEVELS_PROP)) { int lvl = Integer.parseInt(s); summaryGroupsR.add(new SummaryGroup(lvl, Summarize.getSummaryName(s, true))); summaryGroupsF.add(new SummaryGroup(lvl, Summarize.getSummaryName(s, false))); }/* w ww .j av a 2 s. c o m*/ }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop.ReadsFilterMapper.java
License:LGPL
@Override protected void setup(final Context context) throws IOException, InterruptedException { EoulsanLogger.initConsoleHandler();//w w w . java 2s. c om getLogger().info("Start of setup()"); // Get configuration object final Configuration conf = context.getConfiguration(); // Initialize Eoulsan Settings if (!EoulsanRuntime.isRuntime()) { HadoopEoulsanRuntime.newEoulsanRuntime(conf); } // Set the FastqFormat final FastqFormat fastqFormat = FastqFormat.getFormatFromName( conf.get(FASTQ_FORMAT_KEY, "" + EoulsanRuntime.getSettings().getDefaultFastqFormat())); this.read1.setFastqFormat(fastqFormat); this.read2.setFastqFormat(fastqFormat); // Counter group this.counterGroup = conf.get(CommonHadoop.COUNTER_GROUP_KEY); if (this.counterGroup == null) { throw new IOException("No counter group defined"); } getLogger().info("Fastq format: " + fastqFormat); // Set the filters try { final MultiReadFilterBuilder mrfb = new MultiReadFilterBuilder(); // Add the parameters from the job configuration to the builder mrfb.addParameters(jobConfToParameters(conf, READ_FILTER_PARAMETER_KEY_PREFIX)); this.filter = mrfb.getReadFilter(new HadoopReporterIncrementer(context), this.counterGroup); getLogger().info("Reads filters to apply: " + Joiner.on(", ").join(this.filter.getFilterNames())); } catch (EoulsanException e) { throw new IOException(e); } // Set the output writers this.out = new MultipleOutputs<>(context); this.outputFilename1 = createOutputPath(conf, OUTPUT_FILE1_KEY); this.outputFilename2 = createOutputPath(conf, OUTPUT_FILE2_KEY); getLogger().info("End of setup()"); }