Example usage for org.apache.hadoop.io DoubleWritable DoubleWritable

List of usage examples for org.apache.hadoop.io DoubleWritable DoubleWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable DoubleWritable.

Prototype

public DoubleWritable(double value) 

Source Link

Usage

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchMapper.java

License:Apache License

@Override
public void map(LongWritable seqID, IntArrayWritable seq, Context context)
        throws IOException, InterruptedException {

    MapWritable initialDistributionStripe = new MapWritable();
    MapWritable transitionDistributionStripe = new MapWritable();
    MapWritable emissionDistributionStripe = new MapWritable();

    //IntArrayWritable[] writableSequence = (IntArrayWritable[])seq.get();
    //int[] sequence = new int[seq.get().length];
    int[] sequence = new int[seq.get().length];

    int n = 0;/*from w w w.j a  v a  2s.c  o m*/
    for (Writable val : seq.get()) {
        sequence[n] = ((IntWritable) val).get();
        n++;
    }

    for (int k = 0; k < sequence.length; k++) {
        log.info("Sequence Array {}", Integer.toString(sequence[k]));
    }

    Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence, false);
    for (int i = 0; i < alphaFactors.numRows(); i++) {
        for (int j = 0; j < alphaFactors.numCols(); j++) {
            log.info("Alpha Factors Matrix entry ({}, {}) = {}", new Object[] { i, j, alphaFactors.get(i, j) });
        }
    }

    Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence, false);
    for (int i = 0; i < betaFactors.numRows(); i++) {
        for (int j = 0; j < betaFactors.numCols(); j++) {
            log.info("Beta Factors Matrix entry ({}, {}) = {}", new Object[] { i, j, betaFactors.get(i, j) });
        }

        //Initial Distribution
        for (int q = 0; q < nrOfHiddenStates; q++) {
            double alpha_1_q = alphaFactors.get(1, q);
            double beta_1_q = betaFactors.get(1, q);
            initialDistributionStripe.put(new IntWritable(q), new DoubleWritable(alpha_1_q * beta_1_q));
        }

        //Emission Distribution
        /*
        Matrix emissionMatrix = new DenseMatrix(nrOfHiddenStates, sequence.length);
        for (int t = 0; t < sequence.length; t++) {
        HashMap<Integer, Double> innerMap = new HashMap<Integer, Double>();
        for (int q = 0; q < nrOfHiddenStates; q++) {
          double alpha_t_q = alphaFactors.get(t, q);
          double beta_t_q  = betaFactors.get(t, q);
          //innerMap.put(q, alpha_t_q * beta_t_q);
          emissionMatrix.set(q, t, alpha_t_q * beta_t_q);
          }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
        Map innerEmissionMap = new MapWritable();
        for (int xt = 0; xt < sequence.length; xt++) {
          innerEmissionMap.put(new IntWritable(xt), new DoubleWritable(emissionMatrix.get(q, xt)));
        }
        emissionDistributionStripe.put(new IntWritable(q), (MapWritable)innerEmissionMap);
        }
        */

        double[][] emissionMatrix = new double[nrOfHiddenStates][nrOfEmittedStates];

        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfEmittedStates; x++) {
                emissionMatrix[q][x] = 0.0;
            }
        }

        for (int t = 0; t < sequence.length; t++) {
            //HashMap<Integer, Double> innerMap = new HashMap<Integer, Double>();
            for (int q = 0; q < nrOfHiddenStates; q++) {
                double alpha_t_q = alphaFactors.get(t, q);
                double beta_t_q = betaFactors.get(t, q);
                //innerMap.put(q, alpha_t_q * beta_t_q);
                //emissionMatrix.set(q, t, alpha_t_q * beta_t_q);
                emissionMatrix[q][sequence[t]] += alpha_t_q * beta_t_q;
            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            Map innerEmissionMap = new MapWritable();
            for (int xt = 0; xt < sequence.length; xt++) {
                innerEmissionMap.put(new IntWritable(sequence[xt]),
                        new DoubleWritable(emissionMatrix[q][sequence[xt]]));
            }
            emissionDistributionStripe.put(new IntWritable(q), (MapWritable) innerEmissionMap);
        }

        //Transition Distribution
        double[][] transitionMatrix = new double[nrOfHiddenStates][nrOfHiddenStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfHiddenStates; x++) {
                transitionMatrix[q][x] = 0.0;
            }
        }

        for (int t = 0; t < sequence.length - 1; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {
                for (int r = 0; r < nrOfHiddenStates; r++) {
                    double alpha_t_q = alphaFactors.get(t, q);
                    double A_q_r = Model.getTransitionMatrix().get(q, r);
                    double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]);
                    double beta_tplus1_r = betaFactors.get(t + 1, r);
                    double transitionProb = alpha_t_q * A_q_r * B_r_xtplus1 * beta_tplus1_r;
                    log.info("Putting into Inner Map of Transition Distribution. Key = {}, Value = {}", q,
                            transitionProb);
                    transitionMatrix[q][r] += transitionProb;
                }
            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            Map innerTransitionMap = new MapWritable();
            for (int r = 0; r < nrOfHiddenStates; r++) {
                innerTransitionMap.put(new IntWritable(r), new DoubleWritable(transitionMatrix[q][r]));
            }
            transitionDistributionStripe.put(new IntWritable(q), (MapWritable) innerTransitionMap);
        }

        context.write(new Text("INITIAL"), initialDistributionStripe);
        log.info("Context Writing from Mapper the Initial Distribution Stripe. Size = {}  Entries = {}",
                Integer.toString(initialDistributionStripe.size()),
                Integer.toString(initialDistributionStripe.entrySet().size()));
        for (int q = 0; q < nrOfHiddenStates; q++) {
            context.write(new Text("EMIT_" + Integer.toString(q)),
                    (MapWritable) emissionDistributionStripe.get(new IntWritable(q)));
            log.info("Context Writing from Mapper the Emission Distribution Stripe. State = {}  Entries = {}",
                    Integer.toString(q), Integer.toString(
                            ((MapWritable) emissionDistributionStripe.get(new IntWritable(q))).size()));
            for (MapWritable.Entry<Writable, Writable> entry : ((MapWritable) emissionDistributionStripe
                    .get(new IntWritable(q))).entrySet()) {
                log.info("Emission Distribution Stripe Details. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) entry.getKey()).get()),
                        Double.toString(((DoubleWritable) entry.getValue()).get()));
            }
            context.write(new Text("TRANSIT_" + Integer.toString(q)),
                    (MapWritable) transitionDistributionStripe.get(new IntWritable(q)));
            log.info("Context Writing from Mapper the Transition Distribution Stripe. State = {}  Entries = {}",
                    Integer.toString(q), Integer.toString(
                            ((MapWritable) transitionDistributionStripe.get(new IntWritable(q))).size()));
            for (MapWritable.Entry<Writable, Writable> entry : ((MapWritable) transitionDistributionStripe
                    .get(new IntWritable(q))).entrySet()) {
                log.info("Transition Distribution Stripe Details. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) entry.getKey()).get()),
                        Double.toString(((DoubleWritable) entry.getValue()).get()));
            }
        }

    }
}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    log.info("Entering Reducer. Key = {}", key.toString());
    MapWritable sumOfStripes = new MapWritable();
    MapWritable finalStripe = new MapWritable();
    boolean isInitial = false;
    boolean isTransit = false;
    boolean isEmit = false;
    int stateID = -1;

    if (key.charAt(0) == 'I') {
        isInitial = true;/*from www .j a  v  a2s . co m*/
    } else if (key.charAt(0) == 'E') {
        isEmit = true;
        stateID = Character.getNumericValue(key.charAt(5));
    } else if (key.charAt(0) == 'T') {
        isTransit = true;
        stateID = Character.getNumericValue(key.charAt(8));
    } else {
        throw new IllegalStateException("Baum Welch Reducer Error Determining the Key Type");
    }

    if (isInitial) {
        ;
        Double[] val = new Double[nrOfHiddenStates];
        for (int i = 0; i < nrOfHiddenStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Initial Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Initial Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfHiddenStates; i++) {
            log.info("Reducer adding to sumOfStripes for Initial. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else if (isEmit) {
        Iterator<MapWritable> it = stripes.iterator();
        int seqlength = it.next().size();
        Double[] val = new Double[nrOfEmittedStates];
        for (int i = 0; i < nrOfEmittedStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Emission Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Emission Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfEmittedStates; i++) {
            log.info("Reducer adding to sumOfStripes for Emission. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else if (isTransit) {
        Double[] val = new Double[nrOfHiddenStates];
        for (int i = 0; i < nrOfHiddenStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Transition Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Transition Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfHiddenStates; i++) {
            log.info("Reducer adding to sumOfStripes for Transition. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else {
        throw new IllegalStateException("Baum Welch Reducer Error: Unable to aggregate distribution stripes.");
    }

    double sum = 0.0;
    for (MapWritable.Entry<Writable, Writable> sumEntry : sumOfStripes.entrySet()) {
        sum += ((DoubleWritable) sumEntry.getValue()).get();
    }

    //DoubleWritable normalizedSum = new DoubleWritable(0.0);
    //double[] innerValues = new double[sumOfStripes.size()];
    int index = 0;
    MapWritable distributionStripe = new MapWritable();
    for (MapWritable.Entry<Writable, Writable> sumEntry : sumOfStripes.entrySet()) {
        IntWritable state = (IntWritable) sumEntry.getKey();
        double innerValue = ((DoubleWritable) sumEntry.getValue()).get();
        double normalizedSum = innerValue / sum;
        //innerValues[index++] = normalizedSum;
        distributionStripe.put(state, new DoubleWritable(normalizedSum));
        //finalStripe.put(((IntWritable)sumEntry.getKey()), val);
    }

    log.info("Reducer Writing:  Key = {} Value (Stripe) Size = {}", key.toString(), finalStripe.size());
    for (MapWritable.Entry<Writable, Writable> entry : finalStripe.entrySet()) {
        log.info("Distribution Stripe Detail Key = {}, Value ={}", ((IntWritable) entry.getKey()).get(),
                ((DoubleWritable) entry.getValue()).get());
    }
    context.write(key, distributionStripe);

}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchUtils.java

License:Apache License

protected static void WriteModelToDirectory(HmmModel model, Path modelPath, Configuration conf)
        throws IOException {

    int numHidden = model.getNrOfHiddenStates();
    int numObserved = model.getNrOfOutputStates();
    Matrix emissionMatrix = model.getEmissionMatrix();
    Matrix transitionMatrix = model.getTransitionMatrix();
    Vector initialProbability = model.getInitialProbabilities();

    MapWritable initialDistributionMap = new MapWritable();
    MapWritable transitionDistributionMap = new MapWritable();
    MapWritable emissionDistributionMap = new MapWritable();
    // delete the output directory
    HadoopUtil.delete(conf, modelPath);/*from  ww  w .java2 s.c  o m*/
    // create new file to store HMM
    FileSystem fs = FileSystem.get(modelPath.toUri(), conf);
    Path outFile = new Path(modelPath, "part-randomSeed");
    boolean newFile = fs.createNewFile(outFile);

    if (newFile) {
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class,
                MapWritable.class);

        try {

            // construct one MapWritable<IntWritable, DoubleWritable> object
            // and two MapWritable<Text, MapWritable<IntWritable, DoubleWritable >> objects
            for (int i = 0; i < numHidden; i++) {
                IntWritable initialDistributionKey = new IntWritable(i);
                DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i));
                log.info("BuildRandomModel Initial Distribution Map: State {} = {})",
                        initialDistributionKey.get(), initialDistributionValue.get());
                initialDistributionMap.put(initialDistributionKey, initialDistributionValue);

                Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i));
                MapWritable transitionDistributionValue = new MapWritable();
                for (int j = 0; j < numHidden; j++) {
                    IntWritable transitionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable transitionDistributionInnerValue = new DoubleWritable(
                            transitionMatrix.get(i, j));
                    log.info("BuildRandomModel Transition Distribution Map Inner: ({}, {}) = ({}, {})",
                            new Object[] { i, j, transitionDistributionInnerKey.get(),
                                    transitionDistributionInnerValue.get() });
                    transitionDistributionValue.put(transitionDistributionInnerKey,
                            transitionDistributionInnerValue);
                }
                transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue);

                Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i));
                MapWritable emissionDistributionValue = new MapWritable();
                for (int j = 0; j < numObserved; j++) {
                    IntWritable emissionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable emissionDistributionInnerValue = new DoubleWritable(
                            emissionMatrix.get(i, j));
                    log.info("BuildRandomModel Emission Distribution Map Inner: ({}, {}) = ({}, {})",
                            new Object[] { i, j, emissionDistributionInnerKey.get(),
                                    emissionDistributionInnerValue.get() });
                    emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue);
                }
                emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue);

            }

            writer.append(new Text("INITIAL"), initialDistributionMap);
            log.info("Wrote random Initial Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) {
                log.info("Writing Transition Distribution Map Key, Value = ({}, {})", transitionEntry.getKey(),
                        transitionEntry.getValue());
                writer.append(transitionEntry.getKey(), transitionEntry.getValue());
            }
            log.info("Wrote random Transition Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) {
                log.info("Writing Emission Distribution Map Key, Value = ({}, {})", emissionEntry.getKey(),
                        emissionEntry.getValue());
                writer.append(emissionEntry.getKey(), emissionEntry.getValue());
            }
            log.info("Wrote random Emission Distribution Map to {}", outFile);

        } finally {
            Closeables.closeQuietly(writer);
        }

    }

}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchCombiner.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    MapWritable sumOfStripes = new MapWritable();

    if (scaling.equals("logscaling")) {
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                } else {
                    double sumSripesVal = ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    if (sumSripesVal > Double.NEGATIVE_INFINITY) {
                        val = val + Math.log(1 + Math.exp(sumSripesVal - val));
                    }//from   w  ww .j av a  2  s.c o  m
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }
    } else if (scaling.equals("rescaling")) {
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                if (key.charAt(0) == (int) 'I') {

                    double val = ((DoubleWritable) e.getValue()).get();
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                    } else {
                        val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                        sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                    }
                } else {
                    double[] pr = BaumWelchUtils.toDoublePair(((BytesWritable) e.getValue()).getBytes());
                    double num = pr[0];
                    double denom = pr[1];
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (BytesWritable) e.getValue());
                    } else {
                        double[] pr1 = BaumWelchUtils
                                .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                        num += pr1[0];
                        denom += pr1[1];
                        byte[] doublePair1 = BaumWelchUtils.doublePairToByteArray(num, denom);
                        sumOfStripes.put((IntWritable) e.getKey(), new BytesWritable(doublePair1));
                    }
                }
            }
        }
    } else {
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                } else {
                    val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }
    }
    context.write(key, sumOfStripes);
}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchMapper.java

License:Apache License

@Override
public void map(LongWritable seqID, VectorWritable seq, Context context)
        throws IOException, InterruptedException {

    MapWritable initialDistributionStripe = new MapWritable();
    HashMap<Integer, MapWritable> transitionDistributionStripe = new HashMap<Integer, MapWritable>();
    HashMap<Integer, MapWritable> emissionDistributionStripe = new HashMap<Integer, MapWritable>();

    Vector vec = seq.get();//  ww  w  .  j  a va2  s. com
    log.info("Sequence Length = {}", vec.size());
    int[] sequence = new int[vec.size()];

    int n = 0;

    for (int idx = 0; idx < vec.size(); idx++) {
        int val = (int) (vec.getElement(idx)).get();
        sequence[n] = val;
        n++;
    }

    if (scaling == HmmAlgorithms.ScalingMethod.LOGSCALING) {
        Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.LOGSCALING, null);
        Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.LOGSCALING, null);

        //Initial Distribution
        for (int q = 0; q < nrOfHiddenStates; q++) {
            double alpha_1_q = alphaFactors.get(0, q);
            double beta_1_q = betaFactors.get(0, q);
            if ((alpha_1_q + beta_1_q) > Double.NEGATIVE_INFINITY) {
                initialDistributionStripe.put(new IntWritable(q), new DoubleWritable(alpha_1_q + beta_1_q));
            }
        }

        //Transition Distribution
        double[][] transitionMatrix = new double[nrOfHiddenStates][nrOfHiddenStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfHiddenStates; x++) {
                transitionMatrix[q][x] = Double.NEGATIVE_INFINITY;
            }
        }

        for (int t = 0; t < sequence.length - 1; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {
                for (int r = 0; r < nrOfHiddenStates; r++) {
                    double alpha_t_q = alphaFactors.get(t, q);
                    double A_q_r = Model.getTransitionMatrix().get(q, r) > 0
                            ? Math.log(Model.getTransitionMatrix().get(q, r))
                            : Double.NEGATIVE_INFINITY;
                    double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]) > 0
                            ? Math.log(Model.getEmissionMatrix().get(r, sequence[t + 1]))
                            : Double.NEGATIVE_INFINITY;
                    double beta_tplus1_r = betaFactors.get(t + 1, r);
                    double transitionProb = alpha_t_q + A_q_r + B_r_xtplus1 + beta_tplus1_r;
                    if (transitionProb > Double.NEGATIVE_INFINITY) {
                        transitionMatrix[q][r] = transitionProb
                                + Math.log(1 + Math.exp(transitionMatrix[q][r] - transitionProb));
                    }
                }
            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfHiddenStates; r++) {
                if (transitionMatrix[q][r] > Double.NEGATIVE_INFINITY) {
                    innerMap.put(new IntWritable(r), new DoubleWritable(transitionMatrix[q][r]));
                }
            }
            transitionDistributionStripe.put(q, innerMap);
        }

        //Emission distribution
        double[][] emissionMatrix = new double[nrOfHiddenStates][nrOfEmittedStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfEmittedStates; x++) {
                emissionMatrix[q][x] = Double.NEGATIVE_INFINITY;
            }
        }
        for (int t = 0; t < sequence.length; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {
                double alpha_t_q = alphaFactors.get(t, q);
                double beta_t_q = betaFactors.get(t, q);
                double sum = alpha_t_q + beta_t_q;
                double max = sum > emissionMatrix[q][sequence[t]] ? sum : emissionMatrix[q][sequence[t]];
                if (sum > Double.NEGATIVE_INFINITY) {
                    emissionMatrix[q][sequence[t]] = sum
                            + Math.log(1 + Math.exp(emissionMatrix[q][sequence[t]] - sum));
                }

            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfEmittedStates; r++) {
                if (emissionMatrix[q][r] > Double.NEGATIVE_INFINITY) {
                    innerMap.put(new IntWritable(r), new DoubleWritable(emissionMatrix[q][r]));
                }
            }
            emissionDistributionStripe.put(q, innerMap);
        }
    } else if (scaling == HmmAlgorithms.ScalingMethod.RESCALING) {
        double[] scalingFactors = new double[vec.size()];

        Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.RESCALING, scalingFactors);
        Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.RESCALING, scalingFactors);

        //Initial Distribution
        for (int q = 0; q < nrOfHiddenStates; q++) {
            double alpha_1_q = alphaFactors.get(0, q);
            double beta_1_q = betaFactors.get(0, q);
            initialDistributionStripe.put(new IntWritable(q),
                    new DoubleWritable(alpha_1_q * beta_1_q / scalingFactors[0]));
        }

        //Transition Distribution
        double[][] transitionMatrixNum = new double[nrOfHiddenStates][nrOfHiddenStates];
        double[][] transitionMatrixDenom = new double[nrOfHiddenStates][nrOfHiddenStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfHiddenStates; x++) {
                transitionMatrixNum[q][x] = 0.0;
                transitionMatrixDenom[q][x] = 0.0;
            }
        }

        for (int t = 0; t < sequence.length - 1; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {
                for (int r = 0; r < nrOfHiddenStates; r++) {
                    double alpha_t_q = alphaFactors.get(t, q);
                    double A_q_r = Model.getTransitionMatrix().get(q, r);
                    double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]);
                    double beta_tplus1_r = betaFactors.get(t + 1, r);
                    double beta_t_q = betaFactors.get(t, q);
                    double transitionProbNum = alpha_t_q * A_q_r * B_r_xtplus1 * beta_tplus1_r;
                    double transitionProbDenom = alpha_t_q * beta_t_q / scalingFactors[t];
                    transitionMatrixNum[q][r] += transitionProbNum;
                    transitionMatrixDenom[q][r] += transitionProbDenom;
                }
            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfHiddenStates; r++) {
                byte[] doublePair = BaumWelchUtils.doublePairToByteArray(transitionMatrixNum[q][r],
                        transitionMatrixDenom[q][r]);
                innerMap.put(new IntWritable(r), new BytesWritable(doublePair));
            }
            transitionDistributionStripe.put(q, innerMap);
        }

        //Emission distribution
        double[][] emissionMatrixNum = new double[nrOfHiddenStates][nrOfEmittedStates];
        double[][] emissionMatrixDenom = new double[nrOfHiddenStates][nrOfEmittedStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfEmittedStates; x++) {
                emissionMatrixNum[q][x] = 0.0;
                emissionMatrixDenom[q][x] = 0.0;
            }
        }

        for (int q = 0; q < nrOfHiddenStates; ++q) {
            for (int j = 0; j < nrOfEmittedStates; ++j) {
                double temp = 0;
                double temp1 = 0;
                for (int t = 0; t < sequence.length; ++t) {
                    // delta tensor
                    if (sequence[t] == j) {
                        temp += alphaFactors.get(t, q) * betaFactors.get(t, q) / scalingFactors[t];
                    }
                    temp1 += alphaFactors.get(t, q) * betaFactors.get(t, q) / scalingFactors[t];
                }
                emissionMatrixNum[q][j] += temp;
                emissionMatrixDenom[q][j] += temp1;
            }
        }

        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfEmittedStates; r++) {
                byte[] doublePair = BaumWelchUtils.doublePairToByteArray(emissionMatrixNum[q][r],
                        emissionMatrixDenom[q][r]);
                innerMap.put(new IntWritable(r), new BytesWritable(doublePair));
            }
            emissionDistributionStripe.put(q, innerMap);
        }
    } else {
        Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.NOSCALING, null);
        Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.NOSCALING, null);

        //Initial Distribution
        for (int q = 0; q < nrOfHiddenStates; q++) {
            double alpha_1_q = alphaFactors.get(0, q);
            double beta_1_q = betaFactors.get(0, q);
            initialDistributionStripe.put(new IntWritable(q), new DoubleWritable(alpha_1_q * beta_1_q));
        }

        //Transition Distribution
        double[][] transitionMatrix = new double[nrOfHiddenStates][nrOfHiddenStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfHiddenStates; x++) {
                transitionMatrix[q][x] = 0.0;
            }
        }

        for (int t = 0; t < sequence.length - 1; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {
                for (int r = 0; r < nrOfHiddenStates; r++) {
                    double alpha_t_q = alphaFactors.get(t, q);
                    double A_q_r = Model.getTransitionMatrix().get(q, r);
                    double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]);
                    double beta_tplus1_r = betaFactors.get(t + 1, r);
                    double transitionProb = alpha_t_q * A_q_r * B_r_xtplus1 * beta_tplus1_r;
                    transitionMatrix[q][r] += transitionProb;
                }
            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfHiddenStates; r++) {
                innerMap.put(new IntWritable(r), new DoubleWritable(transitionMatrix[q][r]));
            }
            transitionDistributionStripe.put(q, innerMap);
        }

        //Emission distribution
        double[][] emissionMatrix = new double[nrOfHiddenStates][nrOfEmittedStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfEmittedStates; x++) {
                emissionMatrix[q][x] = 0.0;
            }
        }
        for (int t = 0; t < sequence.length; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {

                double alpha_t_q = alphaFactors.get(t, q);
                double beta_t_q = betaFactors.get(t, q);
                emissionMatrix[q][sequence[t]] += alpha_t_q * beta_t_q;

            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfEmittedStates; r++) {
                innerMap.put(new IntWritable(r), new DoubleWritable(emissionMatrix[q][r]));
            }
            emissionDistributionStripe.put(q, innerMap);
        }

    }

    //push out the associative arrays
    context.write(new Text("INITIAL"), initialDistributionStripe);
    for (int q = 0; q < nrOfHiddenStates; q++) {
        context.write(new Text("EMIT_" + Integer.toString(q)), emissionDistributionStripe.get(q));
        context.write(new Text("TRANSIT_" + Integer.toString(q)), transitionDistributionStripe.get(q));
    }

}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    MapWritable sumOfStripes = new MapWritable();

    // Finish the Expectation Step by aggregating all posterior probabilities for one key
    if (scaling.equals("logscaling")) {
        double totalValSum = Double.NEGATIVE_INFINITY;
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                double max = totalValSum > val ? totalValSum : val;
                totalValSum = max + Math.log(Math.exp(totalValSum - max) + Math.exp(val - max));
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                } else {
                    double sumSripesVal = ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    if (sumSripesVal > Double.NEGATIVE_INFINITY) {
                        val = val + Math.log(1 + Math.exp(sumSripesVal - val));
                    }//from w ww. j a v  a 2s.c o  m
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }

        //normalize the aggregate
        for (Map.Entry e : sumOfStripes.entrySet()) {
            double val = ((DoubleWritable) e.getValue()).get();
            if (totalValSum > Double.NEGATIVE_INFINITY) {
                val = val - totalValSum;
            }
            sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(Math.exp(val)));
        }
    } else if (scaling.equals("rescaling")) {
        double totalValSum = 0.0;

        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                if (key.charAt(0) == (int) 'I') {
                    double val = ((DoubleWritable) e.getValue()).get();
                    totalValSum += val;
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                    } else {
                        val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                        sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                    }
                } else {
                    double[] pr = BaumWelchUtils.toDoublePair(((BytesWritable) e.getValue()).getBytes());
                    double num = pr[0];
                    double denom = pr[1];
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (BytesWritable) e.getValue());
                    } else {
                        double[] pr1 = BaumWelchUtils
                                .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                        num += pr1[0];
                        denom += pr1[1];
                        byte[] doublePair1 = BaumWelchUtils.doublePairToByteArray(num, denom);
                        sumOfStripes.put((IntWritable) e.getKey(), new BytesWritable(doublePair1));
                    }
                }
            }
        }

        if (key.charAt(0) == (int) 'I') {
            //normalize the aggregate
            for (Map.Entry e : sumOfStripes.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                if (totalValSum > 0) {
                    val /= totalValSum;
                }
                sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
            }

        } else {
            // compute the probabilities
            for (Map.Entry e : sumOfStripes.entrySet()) {
                double[] pr1 = BaumWelchUtils
                        .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(pr1[0] / pr1[1]));
            }
        }
    } else {
        double totalValSum = 0.0;

        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                int state = ((IntWritable) e.getKey()).get();
                double val = ((DoubleWritable) e.getValue()).get();
                totalValSum += val;
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                } else {
                    val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }

        //normalize the aggregate
        for (Map.Entry e : sumOfStripes.entrySet()) {
            double val = ((DoubleWritable) e.getValue()).get();
            if (totalValSum > 0) {
                val /= totalValSum;
            }
            sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
        }
    }

    //Write the distribution parameter vector to HDFS for the next iteration
    context.write(key, sumOfStripes);

}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchUtils.java

License:Apache License

/**
 * Encodes a particular HmmModel as a Sequence File and write it to the specified location.
 *
 * @param model     HmmModel to be encoded
 * @param modelPath Location to store the encoded model
 * @param conf      Configuration object
 * @throws IOException/*from   w w w . j  av  a  2s  . c  o  m*/
 */

protected static void writeModelToDirectory(HmmModel model, Path modelPath, Configuration conf)
        throws IOException {

    int numHidden = model.getNrOfHiddenStates();
    int numObserved = model.getNrOfOutputStates();
    Matrix emissionMatrix = model.getEmissionMatrix();
    Matrix transitionMatrix = model.getTransitionMatrix();
    Vector initialProbability = model.getInitialProbabilities();

    MapWritable initialDistributionMap = new MapWritable();
    MapWritable transitionDistributionMap = new MapWritable();
    MapWritable emissionDistributionMap = new MapWritable();
    // delete the output directory
    HadoopUtil.delete(conf, modelPath);
    // create new file to store HMM
    FileSystem fs = FileSystem.get(modelPath.toUri(), conf);
    Path outFile = new Path(modelPath, "part-randomSeed");
    boolean newFile = fs.createNewFile(outFile);

    if (newFile) {
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class,
                MapWritable.class);

        try {
            for (int i = 0; i < numHidden; i++) {
                IntWritable initialDistributionKey = new IntWritable(i);
                DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i));
                initialDistributionMap.put(initialDistributionKey, initialDistributionValue);

                Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i));
                MapWritable transitionDistributionValue = new MapWritable();
                for (int j = 0; j < numHidden; j++) {
                    IntWritable transitionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable transitionDistributionInnerValue = new DoubleWritable(
                            transitionMatrix.get(i, j));
                    transitionDistributionValue.put(transitionDistributionInnerKey,
                            transitionDistributionInnerValue);
                }
                transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue);

                Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i));
                MapWritable emissionDistributionValue = new MapWritable();
                for (int j = 0; j < numObserved; j++) {
                    IntWritable emissionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable emissionDistributionInnerValue = new DoubleWritable(
                            emissionMatrix.get(i, j));
                    emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue);
                }
                emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue);
            }

            writer.append(new Text("INITIAL"), initialDistributionMap);
            log.info("Wrote random Initial Distribution Map to {}", outFile);
            for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) {

                writer.append(transitionEntry.getKey(), transitionEntry.getValue());
            }
            log.info("Wrote random Transition Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) {
                writer.append(emissionEntry.getKey(), emissionEntry.getValue());
            }
            log.info("Wrote random Emission Distribution Map to {}", outFile);

        } finally {
            Closeables.closeQuietly(writer);
        }

    }

}

From source file:org.apache.mahout.clustering.lda.LDAReducer.java

License:Apache License

@Override
public void reduce(IntPairWritable topicWord, Iterable<DoubleWritable> values, Context context)
        throws java.io.IOException, InterruptedException {

    // sum likelihoods
    if (topicWord.getSecond() == LDADriver.LOG_LIKELIHOOD_KEY) {
        double accum = 0.0;
        for (DoubleWritable vw : values) {
            double v = vw.get();
            if (Double.isNaN(v)) {
                throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
            }/*from  w w  w . j  a  v a  2 s  .  c  o  m*/
            accum += v;
        }
        context.write(topicWord, new DoubleWritable(accum));
    } else { // log sum sufficient statistics.
        double accum = Double.NEGATIVE_INFINITY;
        for (DoubleWritable vw : values) {
            double v = vw.get();
            if (Double.isNaN(v)) {
                throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
            }
            accum = LDAUtil.logSum(accum, v);
            if (Double.isNaN(accum)) {
                throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
            }
        }
        context.write(topicWord, new DoubleWritable(accum));
    }

}

From source file:org.apache.mahout.ga.watchmaker.EvalMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    Object candidate = StringUtils.fromString(value.toString());

    double fitness = evaluator.getFitness(candidate, null);

    context.write(key, new DoubleWritable(fitness));
}

From source file:org.apache.mahout.math.hadoop.similarity.TestVectorDistanceSimilarityJob.java

License:Apache License

@Test
public void testVectorDistanceMapper() throws Exception {
    Mapper<WritableComparable<?>, VectorWritable, StringTuple, DoubleWritable>.Context context = EasyMock
            .createMock(Mapper.Context.class);
    StringTuple tuple = new StringTuple();
    tuple.add("foo");
    tuple.add("123");
    context.write(tuple, new DoubleWritable(Math.sqrt(2.0)));
    tuple = new StringTuple();
    tuple.add("foo2");
    tuple.add("123");
    context.write(tuple, new DoubleWritable(1));

    EasyMock.replay(context);/*from  www. jav  a 2s . c  o m*/

    Vector vector = new RandomAccessSparseVector(2);
    vector.set(0, 2);
    vector.set(1, 2);

    VectorDistanceMapper mapper = new VectorDistanceMapper();
    setField(mapper, "measure", new EuclideanDistanceMeasure());
    Collection<NamedVector> seedVectors = Lists.newArrayList();
    Vector seed1 = new RandomAccessSparseVector(2);
    seed1.set(0, 1);
    seed1.set(1, 1);
    Vector seed2 = new RandomAccessSparseVector(2);
    seed2.set(0, 2);
    seed2.set(1, 1);

    seedVectors.add(new NamedVector(seed1, "foo"));
    seedVectors.add(new NamedVector(seed2, "foo2"));
    setField(mapper, "seedVectors", seedVectors);

    mapper.map(new IntWritable(123), new VectorWritable(vector), context);

    EasyMock.verify(context);
}