Example usage for org.apache.hadoop.io DoubleWritable get

List of usage examples for org.apache.hadoop.io DoubleWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable get.

Prototype

public double get() 

Source Link

Usage

From source file:org.apache.mahout.classifier.cbayes.CBayesThetaMapper.java

License:Apache License

/**
 * We need to calculate the idf of each feature in each label
 *
 * @param key The label,feature pair (can either be the freq Count or the term Document count
 *///  w ww  . j a v  a  2s .com
@Override
public void map(Text key, DoubleWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter)
        throws IOException {

    String labelFeaturePair = key.toString();

    if (labelFeaturePair.charAt(0) == ',') { // if it is from the Sigma_j folder (feature weight Sum)
        String feature = labelFeaturePair.substring(1);
        double alpha_i = 1.0;
        for (Map.Entry<String, Double> stringDoubleEntry : labelWeightSum.entrySet()) {
            double inverseDenominator = 1.0 / (sigma_jSigma_k - stringDoubleEntry.getValue() + vocabCount);
            DoubleWritable weight = new DoubleWritable((value.get() + alpha_i) * inverseDenominator);
            output.collect(new Text((stringDoubleEntry.getKey() + ',' + feature).trim()), weight); //output Sigma_j
        }
    } else {
        String label = labelFeaturePair.split(",")[0];
        double inverseDenominator = 1.0 / (sigma_jSigma_k - labelWeightSum.get(label) + vocabCount);
        DoubleWritable weight = new DoubleWritable(-value.get() * inverseDenominator);
        output.collect(key, weight);//output -D_ij       
    }
}

From source file:org.apache.mahout.classifier.cbayes.CBayesThetaNormalizerMapper.java

License:Apache License

/**
 * We need to calculate the idf of each feature in each label
 *
 * @param key The label,feature pair (can either be the freq Count or the term Document count
 *//*from  w  ww. ja v a 2  s . co m*/
@Override
public void map(Text key, DoubleWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter)
        throws IOException {

    String labelFeaturePair = key.toString();
    if (labelFeaturePair.charAt(0) == ',') { // if it is from the Sigma_j folder

        double alpha_i = 1.0;
        for (Map.Entry<String, Double> stringDoubleEntry : labelWeightSum.entrySet()) {
            double weight = Math.log(
                    (value.get() + alpha_i) / (sigma_jSigma_k - stringDoubleEntry.getValue() + vocabCount));
            output.collect(new Text(('_' + stringDoubleEntry.getKey()).trim()), new DoubleWritable(weight)); //output Sigma_j

        }

    } else {
        String label = labelFeaturePair.split(",")[0];

        double D_ij = value.get();
        double denominator = 0.5 * ((sigma_jSigma_k / vocabCount) + (D_ij * this.labelWeightSum.size()));
        double weight = Math.log(1.0 - D_ij / denominator);
        output.collect(new Text(('_' + label).trim()), new DoubleWritable(weight));//output -D_ij

    }

}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchUtils.java

License:Apache License

protected static void WriteModelToDirectory(HmmModel model, Path modelPath, Configuration conf)
        throws IOException {

    int numHidden = model.getNrOfHiddenStates();
    int numObserved = model.getNrOfOutputStates();
    Matrix emissionMatrix = model.getEmissionMatrix();
    Matrix transitionMatrix = model.getTransitionMatrix();
    Vector initialProbability = model.getInitialProbabilities();

    MapWritable initialDistributionMap = new MapWritable();
    MapWritable transitionDistributionMap = new MapWritable();
    MapWritable emissionDistributionMap = new MapWritable();
    // delete the output directory
    HadoopUtil.delete(conf, modelPath);//ww  w  .j  a v  a  2s  . c  o m
    // create new file to store HMM
    FileSystem fs = FileSystem.get(modelPath.toUri(), conf);
    Path outFile = new Path(modelPath, "part-randomSeed");
    boolean newFile = fs.createNewFile(outFile);

    if (newFile) {
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class,
                MapWritable.class);

        try {

            // construct one MapWritable<IntWritable, DoubleWritable> object
            // and two MapWritable<Text, MapWritable<IntWritable, DoubleWritable >> objects
            for (int i = 0; i < numHidden; i++) {
                IntWritable initialDistributionKey = new IntWritable(i);
                DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i));
                log.info("BuildRandomModel Initial Distribution Map: State {} = {})",
                        initialDistributionKey.get(), initialDistributionValue.get());
                initialDistributionMap.put(initialDistributionKey, initialDistributionValue);

                Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i));
                MapWritable transitionDistributionValue = new MapWritable();
                for (int j = 0; j < numHidden; j++) {
                    IntWritable transitionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable transitionDistributionInnerValue = new DoubleWritable(
                            transitionMatrix.get(i, j));
                    log.info("BuildRandomModel Transition Distribution Map Inner: ({}, {}) = ({}, {})",
                            new Object[] { i, j, transitionDistributionInnerKey.get(),
                                    transitionDistributionInnerValue.get() });
                    transitionDistributionValue.put(transitionDistributionInnerKey,
                            transitionDistributionInnerValue);
                }
                transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue);

                Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i));
                MapWritable emissionDistributionValue = new MapWritable();
                for (int j = 0; j < numObserved; j++) {
                    IntWritable emissionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable emissionDistributionInnerValue = new DoubleWritable(
                            emissionMatrix.get(i, j));
                    log.info("BuildRandomModel Emission Distribution Map Inner: ({}, {}) = ({}, {})",
                            new Object[] { i, j, emissionDistributionInnerKey.get(),
                                    emissionDistributionInnerValue.get() });
                    emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue);
                }
                emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue);

            }

            writer.append(new Text("INITIAL"), initialDistributionMap);
            log.info("Wrote random Initial Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) {
                log.info("Writing Transition Distribution Map Key, Value = ({}, {})", transitionEntry.getKey(),
                        transitionEntry.getValue());
                writer.append(transitionEntry.getKey(), transitionEntry.getValue());
            }
            log.info("Wrote random Transition Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) {
                log.info("Writing Emission Distribution Map Key, Value = ({}, {})", emissionEntry.getKey(),
                        emissionEntry.getValue());
                writer.append(emissionEntry.getKey(), emissionEntry.getValue());
            }
            log.info("Wrote random Emission Distribution Map to {}", outFile);

        } finally {
            Closeables.closeQuietly(writer);
        }

    }

}

From source file:org.apache.mahout.clustering.lda.LDADriver.java

License:Apache License

static LDAState createState(Configuration job) throws IOException {
    String statePath = job.get(STATE_IN_KEY);
    int numTopics = Integer.parseInt(job.get(NUM_TOPICS_KEY));
    int numWords = Integer.parseInt(job.get(NUM_WORDS_KEY));
    double topicSmoothing = Double.parseDouble(job.get(TOPIC_SMOOTHING_KEY));

    Path dir = new Path(statePath);
    FileSystem fs = dir.getFileSystem(job);

    DenseMatrix pWgT = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];
    double ll = 0.0;

    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
        Path path = status.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
        while (reader.next(key, value)) {
            int topic = key.getFirst();
            int word = key.getSecond();
            if (word == TOPIC_SUM_KEY) {
                logTotals[topic] = value.get();
                if (Double.isInfinite(value.get())) {
                    throw new IllegalArgumentException();
                }/*  w w  w  .  j a  va 2  s .c om*/
            } else if (topic == LOG_LIKELIHOOD_KEY) {
                ll = value.get();
            } else {
                if (!((topic >= 0) && (word >= 0))) {
                    throw new IllegalArgumentException(topic + " " + word);
                }
                if (pWgT.getQuick(topic, word) != 0.0) {
                    throw new IllegalArgumentException();
                }
                pWgT.setQuick(topic, word, value.get());
                if (Double.isInfinite(pWgT.getQuick(topic, word))) {
                    throw new IllegalArgumentException();
                }
            }
        }
        reader.close();
    }

    return new LDAState(numTopics, numWords, topicSmoothing, pWgT, logTotals, ll);
}

From source file:org.apache.mahout.clustering.lda.LDADriver.java

License:Apache License

private double findLL(Path statePath, Configuration job) throws IOException {
    FileSystem fs = statePath.getFileSystem(job);

    double ll = 0.0;

    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(statePath, "part-*"))) {
        Path path = status.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
        while (reader.next(key, value)) {
            if (key.getFirst() == LOG_LIKELIHOOD_KEY) {
                ll = value.get();
                break;
            }//from  w  w  w . ja v a2  s  .c o  m
        }
        reader.close();
    }

    return ll;
}

From source file:org.apache.mahout.clustering.lda.LDAMapper.java

License:Apache License

@Override
protected void map(WritableComparable<?> key, VectorWritable wordCountsWritable, Context context)
        throws IOException, InterruptedException {
    Vector wordCounts = wordCountsWritable.get();
    LDAInference.InferredDocument doc;/*w  w  w  . ja va  2s  .  co  m*/
    try {
        doc = infer.infer(wordCounts);
    } catch (ArrayIndexOutOfBoundsException e1) {
        throw new IllegalStateException("This is probably because the --numWords argument is set too small.  \n"
                + "\tIt needs to be >= than the number of words (terms actually) in the corpus and can be \n"
                + "\tlarger if some storage inefficiency can be tolerated.", e1);
    }

    double[] logTotals = new double[state.getNumTopics()];
    Arrays.fill(logTotals, Double.NEGATIVE_INFINITY);

    // Output sufficient statistics for each word. == pseudo-log counts.
    DoubleWritable v = new DoubleWritable();
    for (Iterator<Vector.Element> iter = wordCounts.iterateNonZero(); iter.hasNext();) {
        Vector.Element e = iter.next();
        int w = e.index();

        for (int k = 0; k < state.getNumTopics(); ++k) {
            v.set(doc.phi(k, w) + Math.log(e.get()));

            IntPairWritable kw = new IntPairWritable(k, w);

            // ouput (topic, word)'s logProb contribution
            context.write(kw, v);
            logTotals[k] = LDAUtil.logSum(logTotals[k], v.get());
        }
    }

    // Output the totals for the statistics. This is to make
    // normalizing a lot easier.
    for (int k = 0; k < state.getNumTopics(); ++k) {
        IntPairWritable kw = new IntPairWritable(k, LDADriver.TOPIC_SUM_KEY);
        v.set(logTotals[k]);
        assert !Double.isNaN(v.get());
        context.write(kw, v);
    }
    IntPairWritable llk = new IntPairWritable(LDADriver.LOG_LIKELIHOOD_KEY, LDADriver.LOG_LIKELIHOOD_KEY);
    // Output log-likelihoods.
    v.set(doc.getLogLikelihood());
    context.write(llk, v);
}

From source file:org.apache.mahout.clustering.lda.LDAReducer.java

License:Apache License

@Override
public void reduce(IntPairWritable topicWord, Iterable<DoubleWritable> values, Context context)
        throws java.io.IOException, InterruptedException {

    // sum likelihoods
    if (topicWord.getSecond() == LDADriver.LOG_LIKELIHOOD_KEY) {
        double accum = 0.0;
        for (DoubleWritable vw : values) {
            double v = vw.get();
            if (Double.isNaN(v)) {
                throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
            }/*from ww  w .j a va2 s .  c o  m*/
            accum += v;
        }
        context.write(topicWord, new DoubleWritable(accum));
    } else { // log sum sufficient statistics.
        double accum = Double.NEGATIVE_INFINITY;
        for (DoubleWritable vw : values) {
            double v = vw.get();
            if (Double.isNaN(v)) {
                throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
            }
            accum = LDAUtil.logSum(accum, v);
            if (Double.isNaN(accum)) {
                throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
            }
        }
        context.write(topicWord, new DoubleWritable(accum));
    }

}

From source file:org.apache.mahout.clustering.lda.LDAWordTopicMapper.java

License:Apache License

@Override
protected void map(WritableComparable<?> key, VectorWritable wordCountsWritable, Context context)
        throws IOException, InterruptedException {
    Vector wordCounts = wordCountsWritable.get();
    LDAInference.InferredDocument doc;/*  w  ww . j  a v a2s  . c  om*/
    try {
        doc = infer.infer(wordCounts);
    } catch (ArrayIndexOutOfBoundsException e1) {
        throw new IllegalStateException("This is probably because the --numWords argument is set too small.  \n"
                + "\tIt needs to be >= than the number of words (terms actually) in the corpus and can be \n"
                + "\tlarger if some storage inefficiency can be tolerated.", e1);
    }

    double[] logTotals = new double[state.getNumTopics()];
    Arrays.fill(logTotals, Double.NEGATIVE_INFINITY);

    // Output sufficient statistics for each word. == pseudo-log counts.
    DoubleWritable v = new DoubleWritable();
    for (Iterator<Vector.Element> iter = wordCounts.iterateNonZero(); iter.hasNext();) {
        Vector.Element e = iter.next();
        int w = e.index();

        for (int k = 0; k < state.getNumTopics(); ++k) {
            v.set(doc.phi(k, w) + Math.log(e.get()));

            IntPairWritable kw = new IntPairWritable(k, w);

            // output (topic, word)'s logProb contribution
            context.write(kw, v);
            logTotals[k] = LDAUtil.logSum(logTotals[k], v.get());
        }
    }

    // Output the totals for the statistics. This is to make
    // normalizing a lot easier.
    for (int k = 0; k < state.getNumTopics(); ++k) {
        IntPairWritable kw = new IntPairWritable(k, LDADriver.TOPIC_SUM_KEY);
        v.set(logTotals[k]);
        assert !Double.isNaN(v.get());
        context.write(kw, v);
    }
    IntPairWritable llk = new IntPairWritable(LDADriver.LOG_LIKELIHOOD_KEY, LDADriver.LOG_LIKELIHOOD_KEY);
    // Output log-likelihoods.
    v.set(doc.getLogLikelihood());
    context.write(llk, v);
}

From source file:org.apache.mahout.ga.watchmaker.OutputUtils.java

License:Apache License

/**
 * Reads back the evaluations.//w ww .j  a v a 2 s.c o  m
 * 
 * @param outpath
 *          output <code>Path</code>
 * @param evaluations
 *          List of evaluations
 */
public static void importEvaluations(FileSystem fs, Configuration conf, Path outpath, List<Double> evaluations)
        throws IOException {
    Sorter sorter = new Sorter(fs, LongWritable.class, DoubleWritable.class, conf);

    // merge and sort the outputs
    Path[] outfiles = listOutputFiles(fs, outpath);
    Path output = new Path(outpath, "output.sorted");
    sorter.merge(outfiles, output);

    // import the evaluations
    LongWritable key = new LongWritable();
    DoubleWritable value = new DoubleWritable();
    Reader reader = new Reader(fs, output, conf);
    try {
        while (reader.next(key, value)) {
            evaluations.add(value.get());
        }
    } finally {
        reader.close();
    }
}

From source file:org.apache.mahout.math.hadoop.stats.StandardDeviationCalculatorReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<DoubleWritable> values, Context context)
        throws IOException, InterruptedException {
    double sum = 0.0;
    for (DoubleWritable value : values) {
        sum += value.get();
    }/*from   w  w  w  .  j a va 2  s .  co m*/
    context.write(key, new DoubleWritable(sum));
}