List of usage examples for org.apache.hadoop.mapred.lib MultipleOutputs MultipleOutputs
public MultipleOutputs(JobConf job)
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static CollectMultipleConvertedOutputs getMultipleConvertedOutputs(JobConf job) { byte[] resultIndexes = MRJobConfiguration.getResultIndexes(job); Converter[] outputConverters = new Converter[resultIndexes.length]; MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length]; HashMap<Byte, ArrayList<Integer>> tagMapping = new HashMap<Byte, ArrayList<Integer>>(); for (int i = 0; i < resultIndexes.length; i++) { byte output = resultIndexes[i]; ArrayList<Integer> vec = tagMapping.get(output); if (vec == null) { vec = new ArrayList<Integer>(); tagMapping.put(output, vec); }/*www . j ava 2 s .co m*/ vec.add(i); outputConverters[i] = getOuputConverter(job, i); stats[i] = MRJobConfiguration.getMatrixCharacteristicsForOutput(job, output); } MultipleOutputs multipleOutputs = new MultipleOutputs(job); return new CollectMultipleConvertedOutputs(outputConverters, stats, multipleOutputs); }
From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.SortDictReducer.java
License:Open Source License
@Override public void configure(JobConf job) { super.configure(job); this.mos = new MultipleOutputs(job); this.hashRawVid = job.getBoolean("hashRawVid", true); }
From source file:kafka.etl.KafkaETLMapper.java
License:Apache License
@Override public void configure(JobConf conf) { try {//from w w w.ja v a 2 s . c om _props = KafkaETLUtils.getPropsFromJob(conf); String nodePath = KafkaETLCommons.getNodesPath(_props); Props nodesProps = KafkaETLUtils.readProps(nodePath); System.out.println(nodesProps); _nodes = new HashMap<Integer, URI>(); for (String key : nodesProps.stringPropertyNames()) { _nodes.put(Integer.parseInt(key), nodesProps.getUri(key)); } _bufferSize = KafkaETLCommons.getClientBufferSize(_props); _soTimeout = KafkaETLCommons.getClientTimeout(_props); System.out.println("bufferSize=" + _bufferSize); System.out.println("timeout=" + _soTimeout); _granularity = KafkaETLCommons.getGranularity(_props); _topic = KafkaETLCommons.getTopic(_props); System.out.println("topic=" + _topic); _mos = new MultipleOutputs(conf); _ignoreErrors = _props.getBoolean(KafkaETLCommons.IGNORE_ERRORS, false); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.apache.mahout.clustering.lda.cvb.CVB0PriorMapper.java
License:Apache License
@Override public void configure(org.apache.hadoop.mapred.JobConf conf) { try {//from w w w . ja v a 2 s . c o m multipleOutputs = new MultipleOutputs(conf); CVBConfig c = new CVBConfig().read(conf); double eta = c.getEta(); double alpha = c.getAlpha(); long seed = c.getRandomSeed(); random = RandomUtils.getRandom(seed); numTopics = c.getNumTopics(); int numTerms = c.getNumTerms(); int numUpdateThreads = c.getNumUpdateThreads(); int numTrainThreads = c.getNumTrainThreads(); double modelWeight = c.getModelWeight(); testFraction = c.getTestFraction(); log.info("Initializing read model"); TopicModel readModel; Path[] modelPaths = CVB0Driver.getModelPaths(conf); if (modelPaths != null && modelPaths.length > 0) { readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths); } else { log.info("No model files found"); readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null, numTrainThreads, modelWeight); } log.info("Initializing model trainer"); modelTrainer = new ModelTrainer(readModel, null, numTrainThreads, numTopics, numTerms); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.mahout.clustering.lda.cvb.PriorTrainingReducer.java
License:Apache License
@Override public void configure(JobConf conf) { try {//w w w . j a v a 2 s .co m log.info("Retrieving configuration"); multipleOutputs = new MultipleOutputs(conf); CVBConfig c = new CVBConfig().read(conf); double eta = c.getEta(); double alpha = c.getAlpha(); numTopics = c.getNumTopics(); numTerms = c.getNumTerms(); int numUpdateThreads = c.getNumUpdateThreads(); int numTrainThreads = c.getNumTrainThreads(); maxIters = c.getMaxItersPerDoc(); double modelWeight = c.getModelWeight(); onlyLabeledDocs = c.isUseOnlyLabeledDocs(); log.info("Initializing read model"); TopicModel readModel; Path[] modelPaths = CVB0Driver.getModelPaths(conf); if (modelPaths != null && modelPaths.length > 0) { readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths); } else { log.info("No model files found, starting with uniform p(term|topic) prior"); Matrix m = new DenseMatrix(numTopics, numTerms); m.assign(1.0 / numTerms); readModel = new TopicModel(m, new DenseVector(numTopics).assign(1.0), eta, alpha, null, numTrainThreads, modelWeight); } log.info("Initializing write model"); TopicModel writeModel = modelWeight == 1 ? new TopicModel(new DenseMatrix(numTopics, numTerms), new DenseVector(numTopics), eta, alpha, null, numUpdateThreads, 1.0) : readModel; log.info("Initializing model trainer"); modelTrainer = new ModelTrainer(readModel, writeModel, numTrainThreads, numTopics, numTerms); modelTrainer.start(); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.qr.QRFirstStep.java
License:Apache License
protected void setup() { int r = Integer.parseInt(jobConf.get(PROP_AROWBLOCK_SIZE)); int k = Integer.parseInt(jobConf.get(PROP_K)); int p = Integer.parseInt(jobConf.get(PROP_P)); kp = k + p;//from w ww . j ava 2s .com yLookahead = Lists.newArrayListWithCapacity(kp); qSolver = new GivensThinSolver(r, kp); outputs = new MultipleOutputs(new JobConf(jobConf)); closeables.addFirst(new Closeable() { @Override public void close() throws IOException { outputs.close(); } }); }
From source file:org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.java
License:Apache License
public static CollectMultipleConvertedOutputs getMultipleConvertedOutputs(JobConf job) { byte[] resultIndexes = MRJobConfiguration.getResultIndexes(job); Converter[] outputConverters = new Converter[resultIndexes.length]; MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length]; HashMap<Byte, ArrayList<Integer>> tagMapping = new HashMap<>(); for (int i = 0; i < resultIndexes.length; i++) { byte output = resultIndexes[i]; ArrayList<Integer> vec = tagMapping.get(output); if (vec == null) { vec = new ArrayList<>(); tagMapping.put(output, vec); }/*from w w w . j a v a2 s .c o m*/ vec.add(i); outputConverters[i] = getOuputConverter(job, i); stats[i] = MRJobConfiguration.getMatrixCharacteristicsForOutput(job, output); } MultipleOutputs multipleOutputs = new MultipleOutputs(job); return new CollectMultipleConvertedOutputs(outputConverters, stats, multipleOutputs); }
From source file:pathmerge.linear.MergePathH1Reducer.java
License:Apache License
public void configure(JobConf job) { mos = new MultipleOutputs(job); I_MERGE = Integer.parseInt(job.get("iMerge")); KMER_SIZE = job.getInt("sizeKmer", 0); outputValue = new MergePathValueWritable(); kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE); outputKmer = new VKmerBytesWritable(KMER_SIZE); }
From source file:pathmerge.log.MergePathH2Reducer.java
License:Apache License
public void configure(JobConf job) { mos = new MultipleOutputs(job); I_MERGE = Integer.parseInt(job.get("iMerge")); KMER_SIZE = job.getInt("sizeKmer", 0); outputValue = new MergePathValueWritable(); tmpOutputValue = new MergePathValueWritable(); kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE); outputKmer = new VKmerBytesWritable(KMER_SIZE); tmpKmer1 = new VKmerBytesWritable(KMER_SIZE); tmpKmer2 = new VKmerBytesWritable(KMER_SIZE); }
From source file:pathmerge.utils.SNodeInitialReducer.java
License:Apache License
public void configure(JobConf job) { mos = new MultipleOutputs(job); }