Example usage for org.apache.hadoop.io DoubleWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable get.

Prototype

public double get()

Source Link

Usage

From source file:org.apache.mahout.classifier.cbayes.CBayesThetaMapper.java

License:Apache License

/**
 * We need to calculate the idf of each feature in each label
 *
 * @param key The label,feature pair (can either be the freq Count or the term Document count
 *///  w ww  . j a v  a  2s .com
@Override
public void map(Text key, DoubleWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter)
        throws IOException {

    String labelFeaturePair = key.toString();

    if (labelFeaturePair.charAt(0) == ',') { // if it is from the Sigma_j folder (feature weight Sum)
        String feature = labelFeaturePair.substring(1);
        double alpha_i = 1.0;
        for (Map.Entry<String, Double> stringDoubleEntry : labelWeightSum.entrySet()) {
            double inverseDenominator = 1.0 / (sigma_jSigma_k - stringDoubleEntry.getValue() + vocabCount);
            DoubleWritable weight = new DoubleWritable((value.get() + alpha_i) * inverseDenominator);
            output.collect(new Text((stringDoubleEntry.getKey() + ',' + feature).trim()), weight); //output Sigma_j
        }
    } else {
        String label = labelFeaturePair.split(",")[0];
        double inverseDenominator = 1.0 / (sigma_jSigma_k - labelWeightSum.get(label) + vocabCount);
        DoubleWritable weight = new DoubleWritable(-value.get() * inverseDenominator);
        output.collect(key, weight);//output -D_ij       
    }
}

From source file:org.apache.mahout.classifier.cbayes.CBayesThetaNormalizerMapper.java

License:Apache License

/**
 * We need to calculate the idf of each feature in each label
 *
 * @param key The label,feature pair (can either be the freq Count or the term Document count
 *//*from  w  ww. ja v a 2  s . co m*/
@Override
public void map(Text key, DoubleWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter)
        throws IOException {

    String labelFeaturePair = key.toString();
    if (labelFeaturePair.charAt(0) == ',') { // if it is from the Sigma_j folder

        double alpha_i = 1.0;
        for (Map.Entry<String, Double> stringDoubleEntry : labelWeightSum.entrySet()) {
            double weight = Math.log(
                    (value.get() + alpha_i) / (sigma_jSigma_k - stringDoubleEntry.getValue() + vocabCount));
            output.collect(new Text(('_' + stringDoubleEntry.getKey()).trim()), new DoubleWritable(weight)); //output Sigma_j

        }

    } else {
        String label = labelFeaturePair.split(",")[0];

        double D_ij = value.get();
        double denominator = 0.5 * ((sigma_jSigma_k / vocabCount) + (D_ij * this.labelWeightSum.size()));
        double weight = Math.log(1.0 - D_ij / denominator);
        output.collect(new Text(('_' + label).trim()), new DoubleWritable(weight));//output -D_ij

    }

}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchUtils.java

License:Apache License

protected static void WriteModelToDirectory(HmmModel model, Path modelPath, Configuration conf)
        throws IOException {

    int numHidden = model.getNrOfHiddenStates();
    int numObserved = model.getNrOfOutputStates();
    Matrix emissionMatrix = model.getEmissionMatrix();
    Matrix transitionMatrix = model.getTransitionMatrix();
    Vector initialProbability = model.getInitialProbabilities();

    MapWritable initialDistributionMap = new MapWritable();
    MapWritable transitionDistributionMap = new MapWritable();
    MapWritable emissionDistributionMap = new MapWritable();
    // delete the output directory
    HadoopUtil.delete(conf, modelPath);//ww  w  .j  a v  a  2s  . c  o m
    // create new file to store HMM
    FileSystem fs = FileSystem.get(modelPath.toUri(), conf);
    Path outFile = new Path(modelPath, "part-randomSeed");
    boolean newFile = fs.createNewFile(outFile);

    if (newFile) {
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class,
                MapWritable.class);

        try {

            // construct one MapWritable<IntWritable, DoubleWritable> object
            // and two MapWritable<Text, MapWritable<IntWritable, DoubleWritable >> objects
            for (int i = 0; i < numHidden; i++) {
                IntWritable initialDistributionKey = new IntWritable(i);
                DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i));
                log.info("BuildRandomModel Initial Distribution Map: State {} = {})",
                        initialDistributionKey.get(), initialDistributionValue.get());
                initialDistributionMap.put(initialDistributionKey, initialDistributionValue);

                Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i));
                MapWritable transitionDistributionValue = new MapWritable();
                for (int j = 0; j < numHidden; j++) {
                    IntWritable transitionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable transitionDistributionInnerValue = new DoubleWritable(
                            transitionMatrix.get(i, j));
                    log.info("BuildRandomModel Transition Distribution Map Inner: ({}, {}) = ({}, {})",
                            new Object[] { i, j, transitionDistributionInnerKey.get(),
                                    transitionDistributionInnerValue.get() });
                    transitionDistributionValue.put(transitionDistributionInnerKey,
                            transitionDistributionInnerValue);
                }
                transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue);

                Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i));
                MapWritable emissionDistributionValue = new MapWritable();
                for (int j = 0; j < numObserved; j++) {
                    IntWritable emissionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable emissionDistributionInnerValue = new DoubleWritable(
                            emissionMatrix.get(i, j));
                    log.info("BuildRandomModel Emission Distribution Map Inner: ({}, {}) = ({}, {})",
                            new Object[] { i, j, emissionDistributionInnerKey.get(),
                                    emissionDistributionInnerValue.get() });
                    emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue);
                }
                emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue);

            }

            writer.append(new Text("INITIAL"), initialDistributionMap);
            log.info("Wrote random Initial Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) {
                log.info("Writing Transition Distribution Map Key, Value = ({}, {})", transitionEntry.getKey(),
                        transitionEntry.getValue());
                writer.append(transitionEntry.getKey(), transitionEntry.getValue());
            }
            log.info("Wrote random Transition Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) {
                log.info("Writing Emission Distribution Map Key, Value = ({}, {})", emissionEntry.getKey(),
                        emissionEntry.getValue());
                writer.append(emissionEntry.getKey(), emissionEntry.getValue());
            }
            log.info("Wrote random Emission Distribution Map to {}", outFile);

        } finally {
            Closeables.closeQuietly(writer);
        }

    }

}

From source file:org.apache.mahout.clustering.lda.LDADriver.java

License:Apache License

static LDAState createState(Configuration job) throws IOException {
    String statePath = job.get(STATE_IN_KEY);
    int numTopics = Integer.parseInt(job.get(NUM_TOPICS_KEY));
    int numWords = Integer.parseInt(job.get(NUM_WORDS_KEY));
    double topicSmoothing = Double.parseDouble(job.get(TOPIC_SMOOTHING_KEY));

    Path dir = new Path(statePath);
    FileSystem fs = dir.getFileSystem(job);

    DenseMatrix pWgT = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];
    double ll = 0.0;

    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
        Path path = status.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
        while (reader.next(key, value)) {
            int topic = key.getFirst();
            int word = key.getSecond();
            if (word == TOPIC_SUM_KEY) {
                logTotals[topic] = value.get();
                if (Double.isInfinite(value.get())) {
                    throw new IllegalArgumentException();
                }/*  w w  w  .  j a  va 2  s .c om*/
            } else if (topic == LOG_LIKELIHOOD_KEY) {
                ll = value.get();
            } else {
                if (!((topic >= 0) && (word >= 0))) {
                    throw new IllegalArgumentException(topic + " " + word);
                }
                if (pWgT.getQuick(topic, word) != 0.0) {
                    throw new IllegalArgumentException();
                }
                pWgT.setQuick(topic, word, value.get());
                if (Double.isInfinite(pWgT.getQuick(topic, word))) {
                    throw new IllegalArgumentException();
                }
            }
        }
        reader.close();
    }

    return new LDAState(numTopics, numWords, topicSmoothing, pWgT, logTotals, ll);
}

From source file:org.apache.mahout.clustering.lda.LDADriver.java

License:Apache License

private double findLL(Path statePath, Configuration job) throws IOException {
    FileSystem fs = statePath.getFileSystem(job);

    double ll = 0.0;

    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(statePath, "part-*"))) {
        Path path = status.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
        while (reader.next(key, value)) {
            if (key.getFirst() == LOG_LIKELIHOOD_KEY) {
                ll = value.get();
                break;
            }//from  w  w  w . ja v a2  s  .c o  m
        }
        reader.close();
    }

    return ll;
}

From source file:org.apache.mahout.clustering.lda.LDAMapper.java

License:Apache License

@Override
protected void map(WritableComparable<?> key, VectorWritable wordCountsWritable, Context context)
        throws IOException, InterruptedException {
    Vector wordCounts = wordCountsWritable.get();
    LDAInference.InferredDocument doc;/*w  w  w  . ja va  2s  .  co  m*/
    try {
        doc = infer.infer(wordCounts);
    } catch (ArrayIndexOutOfBoundsException e1) {
        throw new IllegalStateException("This is probably because the --numWords argument is set too small.  \n"
                + "\tIt needs to be >= than the number of words (terms actually) in the corpus and can be \n"
                + "\tlarger if some storage inefficiency can be tolerated.", e1);
    }

    double[] logTotals = new double[state.getNumTopics()];
    Arrays.fill(logTotals, Double.NEGATIVE_INFINITY);

    // Output sufficient statistics for each word. == pseudo-log counts.
    DoubleWritable v = new DoubleWritable();
    for (Iterator<Vector.Element> iter = wordCounts.iterateNonZero(); iter.hasNext();) {
        Vector.Element e = iter.next();
        int w = e.index();

        for (int k = 0; k < state.getNumTopics(); ++k) {
            v.set(doc.phi(k, w) + Math.log(e.get()));

            IntPairWritable kw = new IntPairWritable(k, w);

            // ouput (topic, word)'s logProb contribution
            context.write(kw, v);
            logTotals[k] = LDAUtil.logSum(logTotals[k], v.get());
        }
    }

    // Output the totals for the statistics. This is to make
    // normalizing a lot easier.
    for (int k = 0; k < state.getNumTopics(); ++k) {
        IntPairWritable kw = new IntPairWritable(k, LDADriver.TOPIC_SUM_KEY);
        v.set(logTotals[k]);
        assert !Double.isNaN(v.get());
        context.write(kw, v);
    }
    IntPairWritable llk = new IntPairWritable(LDADriver.LOG_LIKELIHOOD_KEY, LDADriver.LOG_LIKELIHOOD_KEY);
    // Output log-likelihoods.
    v.set(doc.getLogLikelihood());
    context.write(llk, v);
}

From source file:org.apache.mahout.clustering.lda.LDAReducer.java

License:Apache License

@Override
public void reduce(IntPairWritable topicWord, Iterable<DoubleWritable> values, Context context)
        throws java.io.IOException, InterruptedException {

    // sum likelihoods
    if (topicWord.getSecond() == LDADriver.LOG_LIKELIHOOD_KEY) {
        double accum = 0.0;
        for (DoubleWritable vw : values) {
            double v = vw.get();
            if (Double.isNaN(v)) {
                throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
            }/*from ww  w .j a va2 s .  c o  m*/
            accum += v;
        }
        context.write(topicWord, new DoubleWritable(accum));
    } else { // log sum sufficient statistics.
        double accum = Double.NEGATIVE_INFINITY;
        for (DoubleWritable vw : values) {
            double v = vw.get();
            if (Double.isNaN(v)) {
                throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
            }
            accum = LDAUtil.logSum(accum, v);
            if (Double.isNaN(accum)) {
                throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
            }
        }
        context.write(topicWord, new DoubleWritable(accum));
    }

}

From source file:org.apache.mahout.clustering.lda.LDAWordTopicMapper.java

License:Apache License

@Override
protected void map(WritableComparable<?> key, VectorWritable wordCountsWritable, Context context)
        throws IOException, InterruptedException {
    Vector wordCounts = wordCountsWritable.get();
    LDAInference.InferredDocument doc;/*  w  ww . j  a v a2s  . c  om*/
    try {
        doc = infer.infer(wordCounts);
    } catch (ArrayIndexOutOfBoundsException e1) {
        throw new IllegalStateException("This is probably because the --numWords argument is set too small.  \n"
                + "\tIt needs to be >= than the number of words (terms actually) in the corpus and can be \n"
                + "\tlarger if some storage inefficiency can be tolerated.", e1);
    }

    double[] logTotals = new double[state.getNumTopics()];
    Arrays.fill(logTotals, Double.NEGATIVE_INFINITY);

    // Output sufficient statistics for each word. == pseudo-log counts.
    DoubleWritable v = new DoubleWritable();
    for (Iterator<Vector.Element> iter = wordCounts.iterateNonZero(); iter.hasNext();) {
        Vector.Element e = iter.next();
        int w = e.index();

        for (int k = 0; k < state.getNumTopics(); ++k) {
            v.set(doc.phi(k, w) + Math.log(e.get()));

            IntPairWritable kw = new IntPairWritable(k, w);

            // output (topic, word)'s logProb contribution
            context.write(kw, v);
            logTotals[k] = LDAUtil.logSum(logTotals[k], v.get());
        }
    }

    // Output the totals for the statistics. This is to make
    // normalizing a lot easier.
    for (int k = 0; k < state.getNumTopics(); ++k) {
        IntPairWritable kw = new IntPairWritable(k, LDADriver.TOPIC_SUM_KEY);
        v.set(logTotals[k]);
        assert !Double.isNaN(v.get());
        context.write(kw, v);
    }
    IntPairWritable llk = new IntPairWritable(LDADriver.LOG_LIKELIHOOD_KEY, LDADriver.LOG_LIKELIHOOD_KEY);
    // Output log-likelihoods.
    v.set(doc.getLogLikelihood());
    context.write(llk, v);
}

From source file:org.apache.mahout.ga.watchmaker.OutputUtils.java

License:Apache License

/**
 * Reads back the evaluations.//w ww .j  a v a 2 s.c o  m
 * 
 * @param outpath
 *          output <code>Path</code>
 * @param evaluations
 *          List of evaluations
 */
public static void importEvaluations(FileSystem fs, Configuration conf, Path outpath, List<Double> evaluations)
        throws IOException {
    Sorter sorter = new Sorter(fs, LongWritable.class, DoubleWritable.class, conf);

    // merge and sort the outputs
    Path[] outfiles = listOutputFiles(fs, outpath);
    Path output = new Path(outpath, "output.sorted");
    sorter.merge(outfiles, output);

    // import the evaluations
    LongWritable key = new LongWritable();
    DoubleWritable value = new DoubleWritable();
    Reader reader = new Reader(fs, output, conf);
    try {
        while (reader.next(key, value)) {
            evaluations.add(value.get());
        }
    } finally {
        reader.close();
    }
}

From source file:org.apache.mahout.math.hadoop.stats.StandardDeviationCalculatorReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<DoubleWritable> values, Context context)
        throws IOException, InterruptedException {
    double sum = 0.0;
    for (DoubleWritable value : values) {
        sum += value.get();
    }/*from   w  w  w  .  j a va 2  s .  co m*/
    context.write(key, new DoubleWritable(sum));
}