Example usage for org.apache.hadoop.mapred.lib MultipleOutputs MultipleOutputs

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred.lib MultipleOutputs MultipleOutputs.

Prototype

public MultipleOutputs(JobConf job)

Source Link

Document

Creates and initializes multiple named outputs support, it should be instantiated in the Mapper/Reducer configure method.

Usage

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static CollectMultipleConvertedOutputs getMultipleConvertedOutputs(JobConf job) {
    byte[] resultIndexes = MRJobConfiguration.getResultIndexes(job);
    Converter[] outputConverters = new Converter[resultIndexes.length];
    MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
    HashMap<Byte, ArrayList<Integer>> tagMapping = new HashMap<Byte, ArrayList<Integer>>();
    for (int i = 0; i < resultIndexes.length; i++) {
        byte output = resultIndexes[i];
        ArrayList<Integer> vec = tagMapping.get(output);
        if (vec == null) {
            vec = new ArrayList<Integer>();
            tagMapping.put(output, vec);
        }/*www  .  j  ava 2 s  .co m*/
        vec.add(i);

        outputConverters[i] = getOuputConverter(job, i);
        stats[i] = MRJobConfiguration.getMatrixCharacteristicsForOutput(job, output);
    }

    MultipleOutputs multipleOutputs = new MultipleOutputs(job);

    return new CollectMultipleConvertedOutputs(outputConverters, stats, multipleOutputs);

}

From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.SortDictReducer.java

License:Open Source License

@Override
public void configure(JobConf job) {
    super.configure(job);
    this.mos = new MultipleOutputs(job);
    this.hashRawVid = job.getBoolean("hashRawVid", true);
}

From source file:kafka.etl.KafkaETLMapper.java

License:Apache License

@Override
public void configure(JobConf conf) {
    try {//from  w w  w.ja v a  2 s .  c  om

        _props = KafkaETLUtils.getPropsFromJob(conf);
        String nodePath = KafkaETLCommons.getNodesPath(_props);
        Props nodesProps = KafkaETLUtils.readProps(nodePath);
        System.out.println(nodesProps);
        _nodes = new HashMap<Integer, URI>();
        for (String key : nodesProps.stringPropertyNames()) {
            _nodes.put(Integer.parseInt(key), nodesProps.getUri(key));
        }

        _bufferSize = KafkaETLCommons.getClientBufferSize(_props);
        _soTimeout = KafkaETLCommons.getClientTimeout(_props);

        System.out.println("bufferSize=" + _bufferSize);
        System.out.println("timeout=" + _soTimeout);

        _granularity = KafkaETLCommons.getGranularity(_props);

        _topic = KafkaETLCommons.getTopic(_props);
        System.out.println("topic=" + _topic);

        _mos = new MultipleOutputs(conf);

        _ignoreErrors = _props.getBoolean(KafkaETLCommons.IGNORE_ERRORS, false);

    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.mahout.clustering.lda.cvb.CVB0PriorMapper.java

License:Apache License

@Override
public void configure(org.apache.hadoop.mapred.JobConf conf) {
    try {//from w w  w  . ja  v  a 2  s .  c o  m
        multipleOutputs = new MultipleOutputs(conf);
        CVBConfig c = new CVBConfig().read(conf);
        double eta = c.getEta();
        double alpha = c.getAlpha();
        long seed = c.getRandomSeed();
        random = RandomUtils.getRandom(seed);
        numTopics = c.getNumTopics();
        int numTerms = c.getNumTerms();
        int numUpdateThreads = c.getNumUpdateThreads();
        int numTrainThreads = c.getNumTrainThreads();
        double modelWeight = c.getModelWeight();
        testFraction = c.getTestFraction();
        log.info("Initializing read model");
        TopicModel readModel;
        Path[] modelPaths = CVB0Driver.getModelPaths(conf);
        if (modelPaths != null && modelPaths.length > 0) {
            readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths);
        } else {
            log.info("No model files found");
            readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null,
                    numTrainThreads, modelWeight);
        }

        log.info("Initializing model trainer");
        modelTrainer = new ModelTrainer(readModel, null, numTrainThreads, numTopics, numTerms);

    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.mahout.clustering.lda.cvb.PriorTrainingReducer.java

License:Apache License

@Override
public void configure(JobConf conf) {
    try {//w w  w  . j  a  v  a 2 s  .co  m
        log.info("Retrieving configuration");
        multipleOutputs = new MultipleOutputs(conf);
        CVBConfig c = new CVBConfig().read(conf);
        double eta = c.getEta();
        double alpha = c.getAlpha();
        numTopics = c.getNumTopics();
        numTerms = c.getNumTerms();
        int numUpdateThreads = c.getNumUpdateThreads();
        int numTrainThreads = c.getNumTrainThreads();
        maxIters = c.getMaxItersPerDoc();
        double modelWeight = c.getModelWeight();
        onlyLabeledDocs = c.isUseOnlyLabeledDocs();

        log.info("Initializing read model");
        TopicModel readModel;
        Path[] modelPaths = CVB0Driver.getModelPaths(conf);
        if (modelPaths != null && modelPaths.length > 0) {
            readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths);
        } else {
            log.info("No model files found, starting with uniform p(term|topic) prior");
            Matrix m = new DenseMatrix(numTopics, numTerms);
            m.assign(1.0 / numTerms);
            readModel = new TopicModel(m, new DenseVector(numTopics).assign(1.0), eta, alpha, null,
                    numTrainThreads, modelWeight);
        }

        log.info("Initializing write model");
        TopicModel writeModel = modelWeight == 1
                ? new TopicModel(new DenseMatrix(numTopics, numTerms), new DenseVector(numTopics), eta, alpha,
                        null, numUpdateThreads, 1.0)
                : readModel;

        log.info("Initializing model trainer");
        modelTrainer = new ModelTrainer(readModel, writeModel, numTrainThreads, numTopics, numTerms);
        modelTrainer.start();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.qr.QRFirstStep.java

License:Apache License

protected void setup() {

    int r = Integer.parseInt(jobConf.get(PROP_AROWBLOCK_SIZE));
    int k = Integer.parseInt(jobConf.get(PROP_K));
    int p = Integer.parseInt(jobConf.get(PROP_P));
    kp = k + p;//from  w  ww  . j  ava 2s .com

    yLookahead = Lists.newArrayListWithCapacity(kp);
    qSolver = new GivensThinSolver(r, kp);
    outputs = new MultipleOutputs(new JobConf(jobConf));
    closeables.addFirst(new Closeable() {
        @Override
        public void close() throws IOException {
            outputs.close();
        }
    });

}

From source file:org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.java

License:Apache License

public static CollectMultipleConvertedOutputs getMultipleConvertedOutputs(JobConf job) {
    byte[] resultIndexes = MRJobConfiguration.getResultIndexes(job);
    Converter[] outputConverters = new Converter[resultIndexes.length];
    MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
    HashMap<Byte, ArrayList<Integer>> tagMapping = new HashMap<>();
    for (int i = 0; i < resultIndexes.length; i++) {
        byte output = resultIndexes[i];
        ArrayList<Integer> vec = tagMapping.get(output);
        if (vec == null) {
            vec = new ArrayList<>();
            tagMapping.put(output, vec);
        }/*from   w w w . j a  v  a2  s  .c  o  m*/
        vec.add(i);

        outputConverters[i] = getOuputConverter(job, i);
        stats[i] = MRJobConfiguration.getMatrixCharacteristicsForOutput(job, output);
    }

    MultipleOutputs multipleOutputs = new MultipleOutputs(job);

    return new CollectMultipleConvertedOutputs(outputConverters, stats, multipleOutputs);

}

From source file:pathmerge.linear.MergePathH1Reducer.java

License:Apache License

public void configure(JobConf job) {
    mos = new MultipleOutputs(job);
    I_MERGE = Integer.parseInt(job.get("iMerge"));
    KMER_SIZE = job.getInt("sizeKmer", 0);
    outputValue = new MergePathValueWritable();
    kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
    outputKmer = new VKmerBytesWritable(KMER_SIZE);
}

From source file:pathmerge.log.MergePathH2Reducer.java

License:Apache License

public void configure(JobConf job) {
    mos = new MultipleOutputs(job);
    I_MERGE = Integer.parseInt(job.get("iMerge"));
    KMER_SIZE = job.getInt("sizeKmer", 0);
    outputValue = new MergePathValueWritable();
    tmpOutputValue = new MergePathValueWritable();
    kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
    outputKmer = new VKmerBytesWritable(KMER_SIZE);
    tmpKmer1 = new VKmerBytesWritable(KMER_SIZE);
    tmpKmer2 = new VKmerBytesWritable(KMER_SIZE);
}

From source file:pathmerge.utils.SNodeInitialReducer.java

License:Apache License

public void configure(JobConf job) {
    mos = new MultipleOutputs(job);
}