Example usage for org.apache.hadoop.io MapWritable put

List of usage examples for org.apache.hadoop.io MapWritable put

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapWritable put.

Prototype

@Override
    public Writable put(Writable key, Writable value) 

Source Link

Usage

From source file:org.apache.hama.ml.recommendation.cf.OnlineTrainBSP.java

License:Apache License

private void sendRequiredFeatures(BSPPeer<Text, VectorWritable, Text, VectorWritable, MapWritable> peer)
        throws IOException, SyncException, InterruptedException {

    MapWritable msg = null;//from  ww w. j  a v  a  2s .c  o  m
    int senderId = 0;

    while ((msg = peer.getCurrentMessage()) != null) {
        senderId = ((IntWritable) msg.get(OnlineCF.Settings.MSG_SENDER_ID)).get();
        MapWritable resp = new MapWritable();
        if (msg.containsKey(OnlineCF.Settings.MSG_INP_ITEM_FEATURES)) {
            // send item feature
            String itemId = ((Text) msg.get(OnlineCF.Settings.MSG_INP_ITEM_FEATURES)).toString().substring(1);
            resp.put(OnlineCF.Settings.MSG_INP_ITEM_FEATURES, new Text(itemId));
            resp.put(OnlineCF.Settings.MSG_VALUE, inpItemsFeatures.get(itemId));
        } else if (msg.containsKey(OnlineCF.Settings.MSG_INP_USER_FEATURES)) {
            // send user feature
            String userId = ((Text) msg.get(OnlineCF.Settings.MSG_INP_USER_FEATURES)).toString().substring(1);
            resp.put(OnlineCF.Settings.MSG_INP_USER_FEATURES, new Text(userId));
            resp.put(OnlineCF.Settings.MSG_VALUE, inpUsersFeatures.get(userId));
        }
        peer.send(peer.getPeerName(senderId), resp);
    }
}

From source file:org.apache.hama.ml.recommendation.cf.OnlineTrainBSP.java

License:Apache License

private void askForFeatures(BSPPeer<Text, VectorWritable, Text, VectorWritable, MapWritable> peer,
        HashSet<Text> requiredUserFeatures, HashSet<Text> requiredItemFeatures)
        throws IOException, SyncException, InterruptedException {
    int peerCount = peer.getNumPeers();
    int peerId = peer.getPeerIndex();

    if (requiredUserFeatures != null) {
        Iterator<Text> iter = requiredUserFeatures.iterator();
        Text key = null;//from   w  w  w .  j  a  v a  2  s . co  m
        while (iter.hasNext()) {
            MapWritable msg = new MapWritable();
            key = iter.next();
            msg.put(OnlineCF.Settings.MSG_INP_USER_FEATURES, key);
            msg.put(OnlineCF.Settings.MSG_SENDER_ID, new IntWritable(peerId));
            peer.send(peer.getPeerName(key.hashCode() % peerCount), msg);
        }
    }

    if (requiredItemFeatures != null) {
        Iterator<Text> iter = requiredItemFeatures.iterator();
        Text key = null;
        while (iter.hasNext()) {
            MapWritable msg = new MapWritable();
            key = iter.next();
            msg.put(OnlineCF.Settings.MSG_INP_ITEM_FEATURES, key);
            msg.put(OnlineCF.Settings.MSG_SENDER_ID, new IntWritable(peerId));
            peer.send(peer.getPeerName(key.hashCode() % peerCount), msg);
        }
    }
}

From source file:org.apache.hive.storage.jdbc.JdbcRecordReader.java

License:Apache License

@Override
public boolean next(LongWritable key, MapWritable value) throws IOException {
    try {/* www. ja va  2  s . c  o  m*/
        LOGGER.debug("JdbcRecordReader.next called");
        if (dbAccessor == null) {
            dbAccessor = DatabaseAccessorFactory.getAccessor(conf);
            iterator = dbAccessor.getRecordIterator(conf, split.getLimit(), split.getOffset());
        }

        if (iterator.hasNext()) {
            LOGGER.debug("JdbcRecordReader has more records to read.");
            key.set(pos);
            pos++;
            Map<String, Object> record = iterator.next();
            if ((record != null) && (!record.isEmpty())) {
                for (Entry<String, Object> entry : record.entrySet()) {
                    value.put(new Text(entry.getKey()), entry.getValue() == null ? NullWritable.get()
                            : new ObjectWritable(entry.getValue()));
                }
                return true;
            } else {
                LOGGER.debug("JdbcRecordReader got null record.");
                return false;
            }
        } else {
            LOGGER.debug("JdbcRecordReader has no more records to read.");
            return false;
        }
    } catch (Exception e) {
        LOGGER.error("An error occurred while reading the next record from DB.", e);
        return false;
    }
}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchCombiner.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    log.info("Entering Reducer. Key = {}", key.toString());
    MapWritable sumOfStripes = new MapWritable();
    MapWritable finalStripe = new MapWritable();
    boolean isInitial = false;
    boolean isTransit = false;
    boolean isEmit = false;

    if (key.charAt(0) == 'I') {
        isInitial = true;//from   w  w w .ja v a 2 s.  com
    } else if (key.charAt(0) == 'E') {
        isEmit = true;
    } else if (key.charAt(0) == 'T') {
        isTransit = true;
    } else {
        throw new IllegalStateException("Baum Welch Reducer Error Determining the Key Type");
    }

    if (isInitial) {
        Double[] val = new Double[nrOfHiddenStates];
        for (int i = 0; i < nrOfHiddenStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Initial Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Initial Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfHiddenStates; i++) {
            log.info("Reducer adding to sumOfStripes for Initial. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else if (isEmit) {
        Iterator<MapWritable> it = stripes.iterator();
        int seqlength = it.next().size();
        Double[] val = new Double[nrOfEmittedStates];
        for (int i = 0; i < nrOfEmittedStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Emission Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Emission Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfEmittedStates; i++) {
            log.info("Reducer adding to sumOfStripes for Emission. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else if (isTransit) {
        Double[] val = new Double[nrOfHiddenStates];
        for (int i = 0; i < nrOfHiddenStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Transition Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Transition Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfHiddenStates; i++) {
            log.info("Reducer adding to sumOfStripes for Transition. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else {
        throw new IllegalStateException("Baum Welch Reducer Error: Unable to aggregate distribution stripes.");
    }

    context.write(key, sumOfStripes);

}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchMapper.java

License:Apache License

@Override
public void map(LongWritable seqID, IntArrayWritable seq, Context context)
        throws IOException, InterruptedException {

    MapWritable initialDistributionStripe = new MapWritable();
    MapWritable transitionDistributionStripe = new MapWritable();
    MapWritable emissionDistributionStripe = new MapWritable();

    //IntArrayWritable[] writableSequence = (IntArrayWritable[])seq.get();
    //int[] sequence = new int[seq.get().length];
    int[] sequence = new int[seq.get().length];

    int n = 0;//from   www.j a v  a  2 s . c  o m
    for (Writable val : seq.get()) {
        sequence[n] = ((IntWritable) val).get();
        n++;
    }

    for (int k = 0; k < sequence.length; k++) {
        log.info("Sequence Array {}", Integer.toString(sequence[k]));
    }

    Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence, false);
    for (int i = 0; i < alphaFactors.numRows(); i++) {
        for (int j = 0; j < alphaFactors.numCols(); j++) {
            log.info("Alpha Factors Matrix entry ({}, {}) = {}", new Object[] { i, j, alphaFactors.get(i, j) });
        }
    }

    Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence, false);
    for (int i = 0; i < betaFactors.numRows(); i++) {
        for (int j = 0; j < betaFactors.numCols(); j++) {
            log.info("Beta Factors Matrix entry ({}, {}) = {}", new Object[] { i, j, betaFactors.get(i, j) });
        }

        //Initial Distribution
        for (int q = 0; q < nrOfHiddenStates; q++) {
            double alpha_1_q = alphaFactors.get(1, q);
            double beta_1_q = betaFactors.get(1, q);
            initialDistributionStripe.put(new IntWritable(q), new DoubleWritable(alpha_1_q * beta_1_q));
        }

        //Emission Distribution
        /*
        Matrix emissionMatrix = new DenseMatrix(nrOfHiddenStates, sequence.length);
        for (int t = 0; t < sequence.length; t++) {
        HashMap<Integer, Double> innerMap = new HashMap<Integer, Double>();
        for (int q = 0; q < nrOfHiddenStates; q++) {
          double alpha_t_q = alphaFactors.get(t, q);
          double beta_t_q  = betaFactors.get(t, q);
          //innerMap.put(q, alpha_t_q * beta_t_q);
          emissionMatrix.set(q, t, alpha_t_q * beta_t_q);
          }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
        Map innerEmissionMap = new MapWritable();
        for (int xt = 0; xt < sequence.length; xt++) {
          innerEmissionMap.put(new IntWritable(xt), new DoubleWritable(emissionMatrix.get(q, xt)));
        }
        emissionDistributionStripe.put(new IntWritable(q), (MapWritable)innerEmissionMap);
        }
        */

        double[][] emissionMatrix = new double[nrOfHiddenStates][nrOfEmittedStates];

        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfEmittedStates; x++) {
                emissionMatrix[q][x] = 0.0;
            }
        }

        for (int t = 0; t < sequence.length; t++) {
            //HashMap<Integer, Double> innerMap = new HashMap<Integer, Double>();
            for (int q = 0; q < nrOfHiddenStates; q++) {
                double alpha_t_q = alphaFactors.get(t, q);
                double beta_t_q = betaFactors.get(t, q);
                //innerMap.put(q, alpha_t_q * beta_t_q);
                //emissionMatrix.set(q, t, alpha_t_q * beta_t_q);
                emissionMatrix[q][sequence[t]] += alpha_t_q * beta_t_q;
            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            Map innerEmissionMap = new MapWritable();
            for (int xt = 0; xt < sequence.length; xt++) {
                innerEmissionMap.put(new IntWritable(sequence[xt]),
                        new DoubleWritable(emissionMatrix[q][sequence[xt]]));
            }
            emissionDistributionStripe.put(new IntWritable(q), (MapWritable) innerEmissionMap);
        }

        //Transition Distribution
        double[][] transitionMatrix = new double[nrOfHiddenStates][nrOfHiddenStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfHiddenStates; x++) {
                transitionMatrix[q][x] = 0.0;
            }
        }

        for (int t = 0; t < sequence.length - 1; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {
                for (int r = 0; r < nrOfHiddenStates; r++) {
                    double alpha_t_q = alphaFactors.get(t, q);
                    double A_q_r = Model.getTransitionMatrix().get(q, r);
                    double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]);
                    double beta_tplus1_r = betaFactors.get(t + 1, r);
                    double transitionProb = alpha_t_q * A_q_r * B_r_xtplus1 * beta_tplus1_r;
                    log.info("Putting into Inner Map of Transition Distribution. Key = {}, Value = {}", q,
                            transitionProb);
                    transitionMatrix[q][r] += transitionProb;
                }
            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            Map innerTransitionMap = new MapWritable();
            for (int r = 0; r < nrOfHiddenStates; r++) {
                innerTransitionMap.put(new IntWritable(r), new DoubleWritable(transitionMatrix[q][r]));
            }
            transitionDistributionStripe.put(new IntWritable(q), (MapWritable) innerTransitionMap);
        }

        context.write(new Text("INITIAL"), initialDistributionStripe);
        log.info("Context Writing from Mapper the Initial Distribution Stripe. Size = {}  Entries = {}",
                Integer.toString(initialDistributionStripe.size()),
                Integer.toString(initialDistributionStripe.entrySet().size()));
        for (int q = 0; q < nrOfHiddenStates; q++) {
            context.write(new Text("EMIT_" + Integer.toString(q)),
                    (MapWritable) emissionDistributionStripe.get(new IntWritable(q)));
            log.info("Context Writing from Mapper the Emission Distribution Stripe. State = {}  Entries = {}",
                    Integer.toString(q), Integer.toString(
                            ((MapWritable) emissionDistributionStripe.get(new IntWritable(q))).size()));
            for (MapWritable.Entry<Writable, Writable> entry : ((MapWritable) emissionDistributionStripe
                    .get(new IntWritable(q))).entrySet()) {
                log.info("Emission Distribution Stripe Details. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) entry.getKey()).get()),
                        Double.toString(((DoubleWritable) entry.getValue()).get()));
            }
            context.write(new Text("TRANSIT_" + Integer.toString(q)),
                    (MapWritable) transitionDistributionStripe.get(new IntWritable(q)));
            log.info("Context Writing from Mapper the Transition Distribution Stripe. State = {}  Entries = {}",
                    Integer.toString(q), Integer.toString(
                            ((MapWritable) transitionDistributionStripe.get(new IntWritable(q))).size()));
            for (MapWritable.Entry<Writable, Writable> entry : ((MapWritable) transitionDistributionStripe
                    .get(new IntWritable(q))).entrySet()) {
                log.info("Transition Distribution Stripe Details. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) entry.getKey()).get()),
                        Double.toString(((DoubleWritable) entry.getValue()).get()));
            }
        }

    }
}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    log.info("Entering Reducer. Key = {}", key.toString());
    MapWritable sumOfStripes = new MapWritable();
    MapWritable finalStripe = new MapWritable();
    boolean isInitial = false;
    boolean isTransit = false;
    boolean isEmit = false;
    int stateID = -1;

    if (key.charAt(0) == 'I') {
        isInitial = true;/*from   www  .  j  a  va2 s.c  o  m*/
    } else if (key.charAt(0) == 'E') {
        isEmit = true;
        stateID = Character.getNumericValue(key.charAt(5));
    } else if (key.charAt(0) == 'T') {
        isTransit = true;
        stateID = Character.getNumericValue(key.charAt(8));
    } else {
        throw new IllegalStateException("Baum Welch Reducer Error Determining the Key Type");
    }

    if (isInitial) {
        ;
        Double[] val = new Double[nrOfHiddenStates];
        for (int i = 0; i < nrOfHiddenStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Initial Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Initial Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfHiddenStates; i++) {
            log.info("Reducer adding to sumOfStripes for Initial. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else if (isEmit) {
        Iterator<MapWritable> it = stripes.iterator();
        int seqlength = it.next().size();
        Double[] val = new Double[nrOfEmittedStates];
        for (int i = 0; i < nrOfEmittedStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Emission Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Emission Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfEmittedStates; i++) {
            log.info("Reducer adding to sumOfStripes for Emission. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else if (isTransit) {
        Double[] val = new Double[nrOfHiddenStates];
        for (int i = 0; i < nrOfHiddenStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Transition Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Transition Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfHiddenStates; i++) {
            log.info("Reducer adding to sumOfStripes for Transition. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else {
        throw new IllegalStateException("Baum Welch Reducer Error: Unable to aggregate distribution stripes.");
    }

    double sum = 0.0;
    for (MapWritable.Entry<Writable, Writable> sumEntry : sumOfStripes.entrySet()) {
        sum += ((DoubleWritable) sumEntry.getValue()).get();
    }

    //DoubleWritable normalizedSum = new DoubleWritable(0.0);
    //double[] innerValues = new double[sumOfStripes.size()];
    int index = 0;
    MapWritable distributionStripe = new MapWritable();
    for (MapWritable.Entry<Writable, Writable> sumEntry : sumOfStripes.entrySet()) {
        IntWritable state = (IntWritable) sumEntry.getKey();
        double innerValue = ((DoubleWritable) sumEntry.getValue()).get();
        double normalizedSum = innerValue / sum;
        //innerValues[index++] = normalizedSum;
        distributionStripe.put(state, new DoubleWritable(normalizedSum));
        //finalStripe.put(((IntWritable)sumEntry.getKey()), val);
    }

    log.info("Reducer Writing:  Key = {} Value (Stripe) Size = {}", key.toString(), finalStripe.size());
    for (MapWritable.Entry<Writable, Writable> entry : finalStripe.entrySet()) {
        log.info("Distribution Stripe Detail Key = {}, Value ={}", ((IntWritable) entry.getKey()).get(),
                ((DoubleWritable) entry.getValue()).get());
    }
    context.write(key, distributionStripe);

}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchUtils.java

License:Apache License

protected static void WriteModelToDirectory(HmmModel model, Path modelPath, Configuration conf)
        throws IOException {

    int numHidden = model.getNrOfHiddenStates();
    int numObserved = model.getNrOfOutputStates();
    Matrix emissionMatrix = model.getEmissionMatrix();
    Matrix transitionMatrix = model.getTransitionMatrix();
    Vector initialProbability = model.getInitialProbabilities();

    MapWritable initialDistributionMap = new MapWritable();
    MapWritable transitionDistributionMap = new MapWritable();
    MapWritable emissionDistributionMap = new MapWritable();
    // delete the output directory
    HadoopUtil.delete(conf, modelPath);/*w  ww  .  j  a v  a2  s  .com*/
    // create new file to store HMM
    FileSystem fs = FileSystem.get(modelPath.toUri(), conf);
    Path outFile = new Path(modelPath, "part-randomSeed");
    boolean newFile = fs.createNewFile(outFile);

    if (newFile) {
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class,
                MapWritable.class);

        try {

            // construct one MapWritable<IntWritable, DoubleWritable> object
            // and two MapWritable<Text, MapWritable<IntWritable, DoubleWritable >> objects
            for (int i = 0; i < numHidden; i++) {
                IntWritable initialDistributionKey = new IntWritable(i);
                DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i));
                log.info("BuildRandomModel Initial Distribution Map: State {} = {})",
                        initialDistributionKey.get(), initialDistributionValue.get());
                initialDistributionMap.put(initialDistributionKey, initialDistributionValue);

                Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i));
                MapWritable transitionDistributionValue = new MapWritable();
                for (int j = 0; j < numHidden; j++) {
                    IntWritable transitionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable transitionDistributionInnerValue = new DoubleWritable(
                            transitionMatrix.get(i, j));
                    log.info("BuildRandomModel Transition Distribution Map Inner: ({}, {}) = ({}, {})",
                            new Object[] { i, j, transitionDistributionInnerKey.get(),
                                    transitionDistributionInnerValue.get() });
                    transitionDistributionValue.put(transitionDistributionInnerKey,
                            transitionDistributionInnerValue);
                }
                transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue);

                Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i));
                MapWritable emissionDistributionValue = new MapWritable();
                for (int j = 0; j < numObserved; j++) {
                    IntWritable emissionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable emissionDistributionInnerValue = new DoubleWritable(
                            emissionMatrix.get(i, j));
                    log.info("BuildRandomModel Emission Distribution Map Inner: ({}, {}) = ({}, {})",
                            new Object[] { i, j, emissionDistributionInnerKey.get(),
                                    emissionDistributionInnerValue.get() });
                    emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue);
                }
                emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue);

            }

            writer.append(new Text("INITIAL"), initialDistributionMap);
            log.info("Wrote random Initial Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) {
                log.info("Writing Transition Distribution Map Key, Value = ({}, {})", transitionEntry.getKey(),
                        transitionEntry.getValue());
                writer.append(transitionEntry.getKey(), transitionEntry.getValue());
            }
            log.info("Wrote random Transition Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) {
                log.info("Writing Emission Distribution Map Key, Value = ({}, {})", emissionEntry.getKey(),
                        emissionEntry.getValue());
                writer.append(emissionEntry.getKey(), emissionEntry.getValue());
            }
            log.info("Wrote random Emission Distribution Map to {}", outFile);

        } finally {
            Closeables.closeQuietly(writer);
        }

    }

}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchCombiner.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    MapWritable sumOfStripes = new MapWritable();

    if (scaling.equals("logscaling")) {
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                } else {
                    double sumSripesVal = ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    if (sumSripesVal > Double.NEGATIVE_INFINITY) {
                        val = val + Math.log(1 + Math.exp(sumSripesVal - val));
                    }/*from   www. ja  v a2 s.  c  o m*/
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }
    } else if (scaling.equals("rescaling")) {
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                if (key.charAt(0) == (int) 'I') {

                    double val = ((DoubleWritable) e.getValue()).get();
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                    } else {
                        val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                        sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                    }
                } else {
                    double[] pr = BaumWelchUtils.toDoublePair(((BytesWritable) e.getValue()).getBytes());
                    double num = pr[0];
                    double denom = pr[1];
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (BytesWritable) e.getValue());
                    } else {
                        double[] pr1 = BaumWelchUtils
                                .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                        num += pr1[0];
                        denom += pr1[1];
                        byte[] doublePair1 = BaumWelchUtils.doublePairToByteArray(num, denom);
                        sumOfStripes.put((IntWritable) e.getKey(), new BytesWritable(doublePair1));
                    }
                }
            }
        }
    } else {
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                } else {
                    val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }
    }
    context.write(key, sumOfStripes);
}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchMapper.java

License:Apache License

@Override
public void map(LongWritable seqID, VectorWritable seq, Context context)
        throws IOException, InterruptedException {

    MapWritable initialDistributionStripe = new MapWritable();
    HashMap<Integer, MapWritable> transitionDistributionStripe = new HashMap<Integer, MapWritable>();
    HashMap<Integer, MapWritable> emissionDistributionStripe = new HashMap<Integer, MapWritable>();

    Vector vec = seq.get();//from   w w w. ja  v  a2s  . co  m
    log.info("Sequence Length = {}", vec.size());
    int[] sequence = new int[vec.size()];

    int n = 0;

    for (int idx = 0; idx < vec.size(); idx++) {
        int val = (int) (vec.getElement(idx)).get();
        sequence[n] = val;
        n++;
    }

    if (scaling == HmmAlgorithms.ScalingMethod.LOGSCALING) {
        Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.LOGSCALING, null);
        Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.LOGSCALING, null);

        //Initial Distribution
        for (int q = 0; q < nrOfHiddenStates; q++) {
            double alpha_1_q = alphaFactors.get(0, q);
            double beta_1_q = betaFactors.get(0, q);
            if ((alpha_1_q + beta_1_q) > Double.NEGATIVE_INFINITY) {
                initialDistributionStripe.put(new IntWritable(q), new DoubleWritable(alpha_1_q + beta_1_q));
            }
        }

        //Transition Distribution
        double[][] transitionMatrix = new double[nrOfHiddenStates][nrOfHiddenStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfHiddenStates; x++) {
                transitionMatrix[q][x] = Double.NEGATIVE_INFINITY;
            }
        }

        for (int t = 0; t < sequence.length - 1; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {
                for (int r = 0; r < nrOfHiddenStates; r++) {
                    double alpha_t_q = alphaFactors.get(t, q);
                    double A_q_r = Model.getTransitionMatrix().get(q, r) > 0
                            ? Math.log(Model.getTransitionMatrix().get(q, r))
                            : Double.NEGATIVE_INFINITY;
                    double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]) > 0
                            ? Math.log(Model.getEmissionMatrix().get(r, sequence[t + 1]))
                            : Double.NEGATIVE_INFINITY;
                    double beta_tplus1_r = betaFactors.get(t + 1, r);
                    double transitionProb = alpha_t_q + A_q_r + B_r_xtplus1 + beta_tplus1_r;
                    if (transitionProb > Double.NEGATIVE_INFINITY) {
                        transitionMatrix[q][r] = transitionProb
                                + Math.log(1 + Math.exp(transitionMatrix[q][r] - transitionProb));
                    }
                }
            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfHiddenStates; r++) {
                if (transitionMatrix[q][r] > Double.NEGATIVE_INFINITY) {
                    innerMap.put(new IntWritable(r), new DoubleWritable(transitionMatrix[q][r]));
                }
            }
            transitionDistributionStripe.put(q, innerMap);
        }

        //Emission distribution
        double[][] emissionMatrix = new double[nrOfHiddenStates][nrOfEmittedStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfEmittedStates; x++) {
                emissionMatrix[q][x] = Double.NEGATIVE_INFINITY;
            }
        }
        for (int t = 0; t < sequence.length; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {
                double alpha_t_q = alphaFactors.get(t, q);
                double beta_t_q = betaFactors.get(t, q);
                double sum = alpha_t_q + beta_t_q;
                double max = sum > emissionMatrix[q][sequence[t]] ? sum : emissionMatrix[q][sequence[t]];
                if (sum > Double.NEGATIVE_INFINITY) {
                    emissionMatrix[q][sequence[t]] = sum
                            + Math.log(1 + Math.exp(emissionMatrix[q][sequence[t]] - sum));
                }

            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfEmittedStates; r++) {
                if (emissionMatrix[q][r] > Double.NEGATIVE_INFINITY) {
                    innerMap.put(new IntWritable(r), new DoubleWritable(emissionMatrix[q][r]));
                }
            }
            emissionDistributionStripe.put(q, innerMap);
        }
    } else if (scaling == HmmAlgorithms.ScalingMethod.RESCALING) {
        double[] scalingFactors = new double[vec.size()];

        Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.RESCALING, scalingFactors);
        Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.RESCALING, scalingFactors);

        //Initial Distribution
        for (int q = 0; q < nrOfHiddenStates; q++) {
            double alpha_1_q = alphaFactors.get(0, q);
            double beta_1_q = betaFactors.get(0, q);
            initialDistributionStripe.put(new IntWritable(q),
                    new DoubleWritable(alpha_1_q * beta_1_q / scalingFactors[0]));
        }

        //Transition Distribution
        double[][] transitionMatrixNum = new double[nrOfHiddenStates][nrOfHiddenStates];
        double[][] transitionMatrixDenom = new double[nrOfHiddenStates][nrOfHiddenStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfHiddenStates; x++) {
                transitionMatrixNum[q][x] = 0.0;
                transitionMatrixDenom[q][x] = 0.0;
            }
        }

        for (int t = 0; t < sequence.length - 1; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {
                for (int r = 0; r < nrOfHiddenStates; r++) {
                    double alpha_t_q = alphaFactors.get(t, q);
                    double A_q_r = Model.getTransitionMatrix().get(q, r);
                    double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]);
                    double beta_tplus1_r = betaFactors.get(t + 1, r);
                    double beta_t_q = betaFactors.get(t, q);
                    double transitionProbNum = alpha_t_q * A_q_r * B_r_xtplus1 * beta_tplus1_r;
                    double transitionProbDenom = alpha_t_q * beta_t_q / scalingFactors[t];
                    transitionMatrixNum[q][r] += transitionProbNum;
                    transitionMatrixDenom[q][r] += transitionProbDenom;
                }
            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfHiddenStates; r++) {
                byte[] doublePair = BaumWelchUtils.doublePairToByteArray(transitionMatrixNum[q][r],
                        transitionMatrixDenom[q][r]);
                innerMap.put(new IntWritable(r), new BytesWritable(doublePair));
            }
            transitionDistributionStripe.put(q, innerMap);
        }

        //Emission distribution
        double[][] emissionMatrixNum = new double[nrOfHiddenStates][nrOfEmittedStates];
        double[][] emissionMatrixDenom = new double[nrOfHiddenStates][nrOfEmittedStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfEmittedStates; x++) {
                emissionMatrixNum[q][x] = 0.0;
                emissionMatrixDenom[q][x] = 0.0;
            }
        }

        for (int q = 0; q < nrOfHiddenStates; ++q) {
            for (int j = 0; j < nrOfEmittedStates; ++j) {
                double temp = 0;
                double temp1 = 0;
                for (int t = 0; t < sequence.length; ++t) {
                    // delta tensor
                    if (sequence[t] == j) {
                        temp += alphaFactors.get(t, q) * betaFactors.get(t, q) / scalingFactors[t];
                    }
                    temp1 += alphaFactors.get(t, q) * betaFactors.get(t, q) / scalingFactors[t];
                }
                emissionMatrixNum[q][j] += temp;
                emissionMatrixDenom[q][j] += temp1;
            }
        }

        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfEmittedStates; r++) {
                byte[] doublePair = BaumWelchUtils.doublePairToByteArray(emissionMatrixNum[q][r],
                        emissionMatrixDenom[q][r]);
                innerMap.put(new IntWritable(r), new BytesWritable(doublePair));
            }
            emissionDistributionStripe.put(q, innerMap);
        }
    } else {
        Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.NOSCALING, null);
        Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence,
                HmmAlgorithms.ScalingMethod.NOSCALING, null);

        //Initial Distribution
        for (int q = 0; q < nrOfHiddenStates; q++) {
            double alpha_1_q = alphaFactors.get(0, q);
            double beta_1_q = betaFactors.get(0, q);
            initialDistributionStripe.put(new IntWritable(q), new DoubleWritable(alpha_1_q * beta_1_q));
        }

        //Transition Distribution
        double[][] transitionMatrix = new double[nrOfHiddenStates][nrOfHiddenStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfHiddenStates; x++) {
                transitionMatrix[q][x] = 0.0;
            }
        }

        for (int t = 0; t < sequence.length - 1; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {
                for (int r = 0; r < nrOfHiddenStates; r++) {
                    double alpha_t_q = alphaFactors.get(t, q);
                    double A_q_r = Model.getTransitionMatrix().get(q, r);
                    double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]);
                    double beta_tplus1_r = betaFactors.get(t + 1, r);
                    double transitionProb = alpha_t_q * A_q_r * B_r_xtplus1 * beta_tplus1_r;
                    transitionMatrix[q][r] += transitionProb;
                }
            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfHiddenStates; r++) {
                innerMap.put(new IntWritable(r), new DoubleWritable(transitionMatrix[q][r]));
            }
            transitionDistributionStripe.put(q, innerMap);
        }

        //Emission distribution
        double[][] emissionMatrix = new double[nrOfHiddenStates][nrOfEmittedStates];
        for (int q = 0; q < nrOfHiddenStates; q++) {
            for (int x = 0; x < nrOfEmittedStates; x++) {
                emissionMatrix[q][x] = 0.0;
            }
        }
        for (int t = 0; t < sequence.length; t++) {
            for (int q = 0; q < nrOfHiddenStates; q++) {

                double alpha_t_q = alphaFactors.get(t, q);
                double beta_t_q = betaFactors.get(t, q);
                emissionMatrix[q][sequence[t]] += alpha_t_q * beta_t_q;

            }
        }
        for (int q = 0; q < nrOfHiddenStates; q++) {
            MapWritable innerMap = new MapWritable();
            for (int r = 0; r < nrOfEmittedStates; r++) {
                innerMap.put(new IntWritable(r), new DoubleWritable(emissionMatrix[q][r]));
            }
            emissionDistributionStripe.put(q, innerMap);
        }

    }

    //push out the associative arrays
    context.write(new Text("INITIAL"), initialDistributionStripe);
    for (int q = 0; q < nrOfHiddenStates; q++) {
        context.write(new Text("EMIT_" + Integer.toString(q)), emissionDistributionStripe.get(q));
        context.write(new Text("TRANSIT_" + Integer.toString(q)), transitionDistributionStripe.get(q));
    }

}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    MapWritable sumOfStripes = new MapWritable();

    // Finish the Expectation Step by aggregating all posterior probabilities for one key
    if (scaling.equals("logscaling")) {
        double totalValSum = Double.NEGATIVE_INFINITY;
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                double max = totalValSum > val ? totalValSum : val;
                totalValSum = max + Math.log(Math.exp(totalValSum - max) + Math.exp(val - max));
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                } else {
                    double sumSripesVal = ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    if (sumSripesVal > Double.NEGATIVE_INFINITY) {
                        val = val + Math.log(1 + Math.exp(sumSripesVal - val));
                    }//  w  w w  .ja v  a 2s  .  c  o m
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }

        //normalize the aggregate
        for (Map.Entry e : sumOfStripes.entrySet()) {
            double val = ((DoubleWritable) e.getValue()).get();
            if (totalValSum > Double.NEGATIVE_INFINITY) {
                val = val - totalValSum;
            }
            sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(Math.exp(val)));
        }
    } else if (scaling.equals("rescaling")) {
        double totalValSum = 0.0;

        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                if (key.charAt(0) == (int) 'I') {
                    double val = ((DoubleWritable) e.getValue()).get();
                    totalValSum += val;
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                    } else {
                        val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                        sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                    }
                } else {
                    double[] pr = BaumWelchUtils.toDoublePair(((BytesWritable) e.getValue()).getBytes());
                    double num = pr[0];
                    double denom = pr[1];
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (BytesWritable) e.getValue());
                    } else {
                        double[] pr1 = BaumWelchUtils
                                .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                        num += pr1[0];
                        denom += pr1[1];
                        byte[] doublePair1 = BaumWelchUtils.doublePairToByteArray(num, denom);
                        sumOfStripes.put((IntWritable) e.getKey(), new BytesWritable(doublePair1));
                    }
                }
            }
        }

        if (key.charAt(0) == (int) 'I') {
            //normalize the aggregate
            for (Map.Entry e : sumOfStripes.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                if (totalValSum > 0) {
                    val /= totalValSum;
                }
                sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
            }

        } else {
            // compute the probabilities
            for (Map.Entry e : sumOfStripes.entrySet()) {
                double[] pr1 = BaumWelchUtils
                        .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(pr1[0] / pr1[1]));
            }
        }
    } else {
        double totalValSum = 0.0;

        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                int state = ((IntWritable) e.getKey()).get();
                double val = ((DoubleWritable) e.getValue()).get();
                totalValSum += val;
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                } else {
                    val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }

        //normalize the aggregate
        for (Map.Entry e : sumOfStripes.entrySet()) {
            double val = ((DoubleWritable) e.getValue()).get();
            if (totalValSum > 0) {
                val /= totalValSum;
            }
            sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
        }
    }

    //Write the distribution parameter vector to HDFS for the next iteration
    context.write(key, sumOfStripes);

}