List of usage examples for org.apache.hadoop.io MapWritable MapWritable
public MapWritable()
From source file:org.apache.hama.ml.recommendation.cf.OnlineTrainBSP.java
License:Apache License
private void sendRequiredFeatures(BSPPeer<Text, VectorWritable, Text, VectorWritable, MapWritable> peer) throws IOException, SyncException, InterruptedException { MapWritable msg = null;/* www . j a va2 s .co m*/ int senderId = 0; while ((msg = peer.getCurrentMessage()) != null) { senderId = ((IntWritable) msg.get(OnlineCF.Settings.MSG_SENDER_ID)).get(); MapWritable resp = new MapWritable(); if (msg.containsKey(OnlineCF.Settings.MSG_INP_ITEM_FEATURES)) { // send item feature String itemId = ((Text) msg.get(OnlineCF.Settings.MSG_INP_ITEM_FEATURES)).toString().substring(1); resp.put(OnlineCF.Settings.MSG_INP_ITEM_FEATURES, new Text(itemId)); resp.put(OnlineCF.Settings.MSG_VALUE, inpItemsFeatures.get(itemId)); } else if (msg.containsKey(OnlineCF.Settings.MSG_INP_USER_FEATURES)) { // send user feature String userId = ((Text) msg.get(OnlineCF.Settings.MSG_INP_USER_FEATURES)).toString().substring(1); resp.put(OnlineCF.Settings.MSG_INP_USER_FEATURES, new Text(userId)); resp.put(OnlineCF.Settings.MSG_VALUE, inpUsersFeatures.get(userId)); } peer.send(peer.getPeerName(senderId), resp); } }
From source file:org.apache.hama.ml.recommendation.cf.OnlineTrainBSP.java
License:Apache License
private void askForFeatures(BSPPeer<Text, VectorWritable, Text, VectorWritable, MapWritable> peer, HashSet<Text> requiredUserFeatures, HashSet<Text> requiredItemFeatures) throws IOException, SyncException, InterruptedException { int peerCount = peer.getNumPeers(); int peerId = peer.getPeerIndex(); if (requiredUserFeatures != null) { Iterator<Text> iter = requiredUserFeatures.iterator(); Text key = null;/* ww w . j av a2 s .c o m*/ while (iter.hasNext()) { MapWritable msg = new MapWritable(); key = iter.next(); msg.put(OnlineCF.Settings.MSG_INP_USER_FEATURES, key); msg.put(OnlineCF.Settings.MSG_SENDER_ID, new IntWritable(peerId)); peer.send(peer.getPeerName(key.hashCode() % peerCount), msg); } } if (requiredItemFeatures != null) { Iterator<Text> iter = requiredItemFeatures.iterator(); Text key = null; while (iter.hasNext()) { MapWritable msg = new MapWritable(); key = iter.next(); msg.put(OnlineCF.Settings.MSG_INP_ITEM_FEATURES, key); msg.put(OnlineCF.Settings.MSG_SENDER_ID, new IntWritable(peerId)); peer.send(peer.getPeerName(key.hashCode() % peerCount), msg); } } }
From source file:org.apache.hive.storage.jdbc.JdbcRecordReader.java
License:Apache License
@Override public MapWritable createValue() { return new MapWritable(); }
From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchCombiner.java
License:Apache License
@Override protected void reduce(Text key, Iterable<MapWritable> stripes, Context context) throws IOException, InterruptedException { log.info("Entering Reducer. Key = {}", key.toString()); MapWritable sumOfStripes = new MapWritable(); MapWritable finalStripe = new MapWritable(); boolean isInitial = false; boolean isTransit = false; boolean isEmit = false; if (key.charAt(0) == 'I') { isInitial = true;/*from www . j a v a 2 s .co m*/ } else if (key.charAt(0) == 'E') { isEmit = true; } else if (key.charAt(0) == 'T') { isTransit = true; } else { throw new IllegalStateException("Baum Welch Reducer Error Determining the Key Type"); } if (isInitial) { Double[] val = new Double[nrOfHiddenStates]; for (int i = 0; i < nrOfHiddenStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Initial Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Initial Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfHiddenStates; i++) { log.info("Reducer adding to sumOfStripes for Initial. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else if (isEmit) { Iterator<MapWritable> it = stripes.iterator(); int seqlength = it.next().size(); Double[] val = new Double[nrOfEmittedStates]; for (int i = 0; i < nrOfEmittedStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Emission Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Emission Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfEmittedStates; i++) { log.info("Reducer adding to sumOfStripes for Emission. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else if (isTransit) { Double[] val = new Double[nrOfHiddenStates]; for (int i = 0; i < nrOfHiddenStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Transition Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Transition Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfHiddenStates; i++) { log.info("Reducer adding to sumOfStripes for Transition. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else { throw new IllegalStateException("Baum Welch Reducer Error: Unable to aggregate distribution stripes."); } context.write(key, sumOfStripes); }
From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchMapper.java
License:Apache License
@Override public void map(LongWritable seqID, IntArrayWritable seq, Context context) throws IOException, InterruptedException { MapWritable initialDistributionStripe = new MapWritable(); MapWritable transitionDistributionStripe = new MapWritable(); MapWritable emissionDistributionStripe = new MapWritable(); //IntArrayWritable[] writableSequence = (IntArrayWritable[])seq.get(); //int[] sequence = new int[seq.get().length]; int[] sequence = new int[seq.get().length]; int n = 0;/*from w ww. j av a 2s .c o m*/ for (Writable val : seq.get()) { sequence[n] = ((IntWritable) val).get(); n++; } for (int k = 0; k < sequence.length; k++) { log.info("Sequence Array {}", Integer.toString(sequence[k])); } Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence, false); for (int i = 0; i < alphaFactors.numRows(); i++) { for (int j = 0; j < alphaFactors.numCols(); j++) { log.info("Alpha Factors Matrix entry ({}, {}) = {}", new Object[] { i, j, alphaFactors.get(i, j) }); } } Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence, false); for (int i = 0; i < betaFactors.numRows(); i++) { for (int j = 0; j < betaFactors.numCols(); j++) { log.info("Beta Factors Matrix entry ({}, {}) = {}", new Object[] { i, j, betaFactors.get(i, j) }); } //Initial Distribution for (int q = 0; q < nrOfHiddenStates; q++) { double alpha_1_q = alphaFactors.get(1, q); double beta_1_q = betaFactors.get(1, q); initialDistributionStripe.put(new IntWritable(q), new DoubleWritable(alpha_1_q * beta_1_q)); } //Emission Distribution /* Matrix emissionMatrix = new DenseMatrix(nrOfHiddenStates, sequence.length); for (int t = 0; t < sequence.length; t++) { HashMap<Integer, Double> innerMap = new HashMap<Integer, Double>(); for (int q = 0; q < nrOfHiddenStates; q++) { double alpha_t_q = alphaFactors.get(t, q); double beta_t_q = betaFactors.get(t, q); //innerMap.put(q, alpha_t_q * beta_t_q); emissionMatrix.set(q, t, alpha_t_q * beta_t_q); } } for (int q = 0; q < nrOfHiddenStates; q++) { Map innerEmissionMap = new MapWritable(); for (int xt = 0; xt < sequence.length; xt++) { innerEmissionMap.put(new IntWritable(xt), new DoubleWritable(emissionMatrix.get(q, xt))); } emissionDistributionStripe.put(new IntWritable(q), (MapWritable)innerEmissionMap); } */ double[][] emissionMatrix = new double[nrOfHiddenStates][nrOfEmittedStates]; for (int q = 0; q < nrOfHiddenStates; q++) { for (int x = 0; x < nrOfEmittedStates; x++) { emissionMatrix[q][x] = 0.0; } } for (int t = 0; t < sequence.length; t++) { //HashMap<Integer, Double> innerMap = new HashMap<Integer, Double>(); for (int q = 0; q < nrOfHiddenStates; q++) { double alpha_t_q = alphaFactors.get(t, q); double beta_t_q = betaFactors.get(t, q); //innerMap.put(q, alpha_t_q * beta_t_q); //emissionMatrix.set(q, t, alpha_t_q * beta_t_q); emissionMatrix[q][sequence[t]] += alpha_t_q * beta_t_q; } } for (int q = 0; q < nrOfHiddenStates; q++) { Map innerEmissionMap = new MapWritable(); for (int xt = 0; xt < sequence.length; xt++) { innerEmissionMap.put(new IntWritable(sequence[xt]), new DoubleWritable(emissionMatrix[q][sequence[xt]])); } emissionDistributionStripe.put(new IntWritable(q), (MapWritable) innerEmissionMap); } //Transition Distribution double[][] transitionMatrix = new double[nrOfHiddenStates][nrOfHiddenStates]; for (int q = 0; q < nrOfHiddenStates; q++) { for (int x = 0; x < nrOfHiddenStates; x++) { transitionMatrix[q][x] = 0.0; } } for (int t = 0; t < sequence.length - 1; t++) { for (int q = 0; q < nrOfHiddenStates; q++) { for (int r = 0; r < nrOfHiddenStates; r++) { double alpha_t_q = alphaFactors.get(t, q); double A_q_r = Model.getTransitionMatrix().get(q, r); double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]); double beta_tplus1_r = betaFactors.get(t + 1, r); double transitionProb = alpha_t_q * A_q_r * B_r_xtplus1 * beta_tplus1_r; log.info("Putting into Inner Map of Transition Distribution. Key = {}, Value = {}", q, transitionProb); transitionMatrix[q][r] += transitionProb; } } } for (int q = 0; q < nrOfHiddenStates; q++) { Map innerTransitionMap = new MapWritable(); for (int r = 0; r < nrOfHiddenStates; r++) { innerTransitionMap.put(new IntWritable(r), new DoubleWritable(transitionMatrix[q][r])); } transitionDistributionStripe.put(new IntWritable(q), (MapWritable) innerTransitionMap); } context.write(new Text("INITIAL"), initialDistributionStripe); log.info("Context Writing from Mapper the Initial Distribution Stripe. Size = {} Entries = {}", Integer.toString(initialDistributionStripe.size()), Integer.toString(initialDistributionStripe.entrySet().size())); for (int q = 0; q < nrOfHiddenStates; q++) { context.write(new Text("EMIT_" + Integer.toString(q)), (MapWritable) emissionDistributionStripe.get(new IntWritable(q))); log.info("Context Writing from Mapper the Emission Distribution Stripe. State = {} Entries = {}", Integer.toString(q), Integer.toString( ((MapWritable) emissionDistributionStripe.get(new IntWritable(q))).size())); for (MapWritable.Entry<Writable, Writable> entry : ((MapWritable) emissionDistributionStripe .get(new IntWritable(q))).entrySet()) { log.info("Emission Distribution Stripe Details. Key = {} Value = {} ", Integer.toString(((IntWritable) entry.getKey()).get()), Double.toString(((DoubleWritable) entry.getValue()).get())); } context.write(new Text("TRANSIT_" + Integer.toString(q)), (MapWritable) transitionDistributionStripe.get(new IntWritable(q))); log.info("Context Writing from Mapper the Transition Distribution Stripe. State = {} Entries = {}", Integer.toString(q), Integer.toString( ((MapWritable) transitionDistributionStripe.get(new IntWritable(q))).size())); for (MapWritable.Entry<Writable, Writable> entry : ((MapWritable) transitionDistributionStripe .get(new IntWritable(q))).entrySet()) { log.info("Transition Distribution Stripe Details. Key = {} Value = {} ", Integer.toString(((IntWritable) entry.getKey()).get()), Double.toString(((DoubleWritable) entry.getValue()).get())); } } } }
From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<MapWritable> stripes, Context context) throws IOException, InterruptedException { log.info("Entering Reducer. Key = {}", key.toString()); MapWritable sumOfStripes = new MapWritable(); MapWritable finalStripe = new MapWritable(); boolean isInitial = false; boolean isTransit = false; boolean isEmit = false; int stateID = -1; if (key.charAt(0) == 'I') { isInitial = true;/*from w ww . j av a 2s . c om*/ } else if (key.charAt(0) == 'E') { isEmit = true; stateID = Character.getNumericValue(key.charAt(5)); } else if (key.charAt(0) == 'T') { isTransit = true; stateID = Character.getNumericValue(key.charAt(8)); } else { throw new IllegalStateException("Baum Welch Reducer Error Determining the Key Type"); } if (isInitial) { ; Double[] val = new Double[nrOfHiddenStates]; for (int i = 0; i < nrOfHiddenStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Initial Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Initial Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfHiddenStates; i++) { log.info("Reducer adding to sumOfStripes for Initial. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else if (isEmit) { Iterator<MapWritable> it = stripes.iterator(); int seqlength = it.next().size(); Double[] val = new Double[nrOfEmittedStates]; for (int i = 0; i < nrOfEmittedStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Emission Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Emission Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfEmittedStates; i++) { log.info("Reducer adding to sumOfStripes for Emission. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else if (isTransit) { Double[] val = new Double[nrOfHiddenStates]; for (int i = 0; i < nrOfHiddenStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Transition Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Transition Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfHiddenStates; i++) { log.info("Reducer adding to sumOfStripes for Transition. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else { throw new IllegalStateException("Baum Welch Reducer Error: Unable to aggregate distribution stripes."); } double sum = 0.0; for (MapWritable.Entry<Writable, Writable> sumEntry : sumOfStripes.entrySet()) { sum += ((DoubleWritable) sumEntry.getValue()).get(); } //DoubleWritable normalizedSum = new DoubleWritable(0.0); //double[] innerValues = new double[sumOfStripes.size()]; int index = 0; MapWritable distributionStripe = new MapWritable(); for (MapWritable.Entry<Writable, Writable> sumEntry : sumOfStripes.entrySet()) { IntWritable state = (IntWritable) sumEntry.getKey(); double innerValue = ((DoubleWritable) sumEntry.getValue()).get(); double normalizedSum = innerValue / sum; //innerValues[index++] = normalizedSum; distributionStripe.put(state, new DoubleWritable(normalizedSum)); //finalStripe.put(((IntWritable)sumEntry.getKey()), val); } log.info("Reducer Writing: Key = {} Value (Stripe) Size = {}", key.toString(), finalStripe.size()); for (MapWritable.Entry<Writable, Writable> entry : finalStripe.entrySet()) { log.info("Distribution Stripe Detail Key = {}, Value ={}", ((IntWritable) entry.getKey()).get(), ((DoubleWritable) entry.getValue()).get()); } context.write(key, distributionStripe); }
From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchUtils.java
License:Apache License
protected static void WriteModelToDirectory(HmmModel model, Path modelPath, Configuration conf) throws IOException { int numHidden = model.getNrOfHiddenStates(); int numObserved = model.getNrOfOutputStates(); Matrix emissionMatrix = model.getEmissionMatrix(); Matrix transitionMatrix = model.getTransitionMatrix(); Vector initialProbability = model.getInitialProbabilities(); MapWritable initialDistributionMap = new MapWritable(); MapWritable transitionDistributionMap = new MapWritable(); MapWritable emissionDistributionMap = new MapWritable(); // delete the output directory HadoopUtil.delete(conf, modelPath);//from www.j a v a2 s . c o m // create new file to store HMM FileSystem fs = FileSystem.get(modelPath.toUri(), conf); Path outFile = new Path(modelPath, "part-randomSeed"); boolean newFile = fs.createNewFile(outFile); if (newFile) { SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class, MapWritable.class); try { // construct one MapWritable<IntWritable, DoubleWritable> object // and two MapWritable<Text, MapWritable<IntWritable, DoubleWritable >> objects for (int i = 0; i < numHidden; i++) { IntWritable initialDistributionKey = new IntWritable(i); DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i)); log.info("BuildRandomModel Initial Distribution Map: State {} = {})", initialDistributionKey.get(), initialDistributionValue.get()); initialDistributionMap.put(initialDistributionKey, initialDistributionValue); Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i)); MapWritable transitionDistributionValue = new MapWritable(); for (int j = 0; j < numHidden; j++) { IntWritable transitionDistributionInnerKey = new IntWritable(j); DoubleWritable transitionDistributionInnerValue = new DoubleWritable( transitionMatrix.get(i, j)); log.info("BuildRandomModel Transition Distribution Map Inner: ({}, {}) = ({}, {})", new Object[] { i, j, transitionDistributionInnerKey.get(), transitionDistributionInnerValue.get() }); transitionDistributionValue.put(transitionDistributionInnerKey, transitionDistributionInnerValue); } transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue); Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i)); MapWritable emissionDistributionValue = new MapWritable(); for (int j = 0; j < numObserved; j++) { IntWritable emissionDistributionInnerKey = new IntWritable(j); DoubleWritable emissionDistributionInnerValue = new DoubleWritable( emissionMatrix.get(i, j)); log.info("BuildRandomModel Emission Distribution Map Inner: ({}, {}) = ({}, {})", new Object[] { i, j, emissionDistributionInnerKey.get(), emissionDistributionInnerValue.get() }); emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue); } emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue); } writer.append(new Text("INITIAL"), initialDistributionMap); log.info("Wrote random Initial Distribution Map to {}", outFile); for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) { log.info("Writing Transition Distribution Map Key, Value = ({}, {})", transitionEntry.getKey(), transitionEntry.getValue()); writer.append(transitionEntry.getKey(), transitionEntry.getValue()); } log.info("Wrote random Transition Distribution Map to {}", outFile); for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) { log.info("Writing Emission Distribution Map Key, Value = ({}, {})", emissionEntry.getKey(), emissionEntry.getValue()); writer.append(emissionEntry.getKey(), emissionEntry.getValue()); } log.info("Wrote random Emission Distribution Map to {}", outFile); } finally { Closeables.closeQuietly(writer); } } }
From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchCombiner.java
License:Apache License
@Override protected void reduce(Text key, Iterable<MapWritable> stripes, Context context) throws IOException, InterruptedException { MapWritable sumOfStripes = new MapWritable(); if (scaling.equals("logscaling")) { for (MapWritable stripe : stripes) { for (Map.Entry e : stripe.entrySet()) { double val = ((DoubleWritable) e.getValue()).get(); if (!sumOfStripes.containsKey(e.getKey())) { sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val)); } else { double sumSripesVal = ((DoubleWritable) sumOfStripes.get(e.getKey())).get(); if (sumSripesVal > Double.NEGATIVE_INFINITY) { val = val + Math.log(1 + Math.exp(sumSripesVal - val)); }/*from w w w . ja v a 2 s. c om*/ sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val)); } } } } else if (scaling.equals("rescaling")) { for (MapWritable stripe : stripes) { for (Map.Entry e : stripe.entrySet()) { if (key.charAt(0) == (int) 'I') { double val = ((DoubleWritable) e.getValue()).get(); if (!sumOfStripes.containsKey(e.getKey())) { sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue()); } else { val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get(); sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val)); } } else { double[] pr = BaumWelchUtils.toDoublePair(((BytesWritable) e.getValue()).getBytes()); double num = pr[0]; double denom = pr[1]; if (!sumOfStripes.containsKey(e.getKey())) { sumOfStripes.put((IntWritable) e.getKey(), (BytesWritable) e.getValue()); } else { double[] pr1 = BaumWelchUtils .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes()); num += pr1[0]; denom += pr1[1]; byte[] doublePair1 = BaumWelchUtils.doublePairToByteArray(num, denom); sumOfStripes.put((IntWritable) e.getKey(), new BytesWritable(doublePair1)); } } } } } else { for (MapWritable stripe : stripes) { for (Map.Entry e : stripe.entrySet()) { double val = ((DoubleWritable) e.getValue()).get(); if (!sumOfStripes.containsKey(e.getKey())) { sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue()); } else { val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get(); sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val)); } } } } context.write(key, sumOfStripes); }
From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchMapper.java
License:Apache License
@Override public void map(LongWritable seqID, VectorWritable seq, Context context) throws IOException, InterruptedException { MapWritable initialDistributionStripe = new MapWritable(); HashMap<Integer, MapWritable> transitionDistributionStripe = new HashMap<Integer, MapWritable>(); HashMap<Integer, MapWritable> emissionDistributionStripe = new HashMap<Integer, MapWritable>(); Vector vec = seq.get();/*from w ww . j a va2 s . co m*/ log.info("Sequence Length = {}", vec.size()); int[] sequence = new int[vec.size()]; int n = 0; for (int idx = 0; idx < vec.size(); idx++) { int val = (int) (vec.getElement(idx)).get(); sequence[n] = val; n++; } if (scaling == HmmAlgorithms.ScalingMethod.LOGSCALING) { Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence, HmmAlgorithms.ScalingMethod.LOGSCALING, null); Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence, HmmAlgorithms.ScalingMethod.LOGSCALING, null); //Initial Distribution for (int q = 0; q < nrOfHiddenStates; q++) { double alpha_1_q = alphaFactors.get(0, q); double beta_1_q = betaFactors.get(0, q); if ((alpha_1_q + beta_1_q) > Double.NEGATIVE_INFINITY) { initialDistributionStripe.put(new IntWritable(q), new DoubleWritable(alpha_1_q + beta_1_q)); } } //Transition Distribution double[][] transitionMatrix = new double[nrOfHiddenStates][nrOfHiddenStates]; for (int q = 0; q < nrOfHiddenStates; q++) { for (int x = 0; x < nrOfHiddenStates; x++) { transitionMatrix[q][x] = Double.NEGATIVE_INFINITY; } } for (int t = 0; t < sequence.length - 1; t++) { for (int q = 0; q < nrOfHiddenStates; q++) { for (int r = 0; r < nrOfHiddenStates; r++) { double alpha_t_q = alphaFactors.get(t, q); double A_q_r = Model.getTransitionMatrix().get(q, r) > 0 ? Math.log(Model.getTransitionMatrix().get(q, r)) : Double.NEGATIVE_INFINITY; double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]) > 0 ? Math.log(Model.getEmissionMatrix().get(r, sequence[t + 1])) : Double.NEGATIVE_INFINITY; double beta_tplus1_r = betaFactors.get(t + 1, r); double transitionProb = alpha_t_q + A_q_r + B_r_xtplus1 + beta_tplus1_r; if (transitionProb > Double.NEGATIVE_INFINITY) { transitionMatrix[q][r] = transitionProb + Math.log(1 + Math.exp(transitionMatrix[q][r] - transitionProb)); } } } } for (int q = 0; q < nrOfHiddenStates; q++) { MapWritable innerMap = new MapWritable(); for (int r = 0; r < nrOfHiddenStates; r++) { if (transitionMatrix[q][r] > Double.NEGATIVE_INFINITY) { innerMap.put(new IntWritable(r), new DoubleWritable(transitionMatrix[q][r])); } } transitionDistributionStripe.put(q, innerMap); } //Emission distribution double[][] emissionMatrix = new double[nrOfHiddenStates][nrOfEmittedStates]; for (int q = 0; q < nrOfHiddenStates; q++) { for (int x = 0; x < nrOfEmittedStates; x++) { emissionMatrix[q][x] = Double.NEGATIVE_INFINITY; } } for (int t = 0; t < sequence.length; t++) { for (int q = 0; q < nrOfHiddenStates; q++) { double alpha_t_q = alphaFactors.get(t, q); double beta_t_q = betaFactors.get(t, q); double sum = alpha_t_q + beta_t_q; double max = sum > emissionMatrix[q][sequence[t]] ? sum : emissionMatrix[q][sequence[t]]; if (sum > Double.NEGATIVE_INFINITY) { emissionMatrix[q][sequence[t]] = sum + Math.log(1 + Math.exp(emissionMatrix[q][sequence[t]] - sum)); } } } for (int q = 0; q < nrOfHiddenStates; q++) { MapWritable innerMap = new MapWritable(); for (int r = 0; r < nrOfEmittedStates; r++) { if (emissionMatrix[q][r] > Double.NEGATIVE_INFINITY) { innerMap.put(new IntWritable(r), new DoubleWritable(emissionMatrix[q][r])); } } emissionDistributionStripe.put(q, innerMap); } } else if (scaling == HmmAlgorithms.ScalingMethod.RESCALING) { double[] scalingFactors = new double[vec.size()]; Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence, HmmAlgorithms.ScalingMethod.RESCALING, scalingFactors); Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence, HmmAlgorithms.ScalingMethod.RESCALING, scalingFactors); //Initial Distribution for (int q = 0; q < nrOfHiddenStates; q++) { double alpha_1_q = alphaFactors.get(0, q); double beta_1_q = betaFactors.get(0, q); initialDistributionStripe.put(new IntWritable(q), new DoubleWritable(alpha_1_q * beta_1_q / scalingFactors[0])); } //Transition Distribution double[][] transitionMatrixNum = new double[nrOfHiddenStates][nrOfHiddenStates]; double[][] transitionMatrixDenom = new double[nrOfHiddenStates][nrOfHiddenStates]; for (int q = 0; q < nrOfHiddenStates; q++) { for (int x = 0; x < nrOfHiddenStates; x++) { transitionMatrixNum[q][x] = 0.0; transitionMatrixDenom[q][x] = 0.0; } } for (int t = 0; t < sequence.length - 1; t++) { for (int q = 0; q < nrOfHiddenStates; q++) { for (int r = 0; r < nrOfHiddenStates; r++) { double alpha_t_q = alphaFactors.get(t, q); double A_q_r = Model.getTransitionMatrix().get(q, r); double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]); double beta_tplus1_r = betaFactors.get(t + 1, r); double beta_t_q = betaFactors.get(t, q); double transitionProbNum = alpha_t_q * A_q_r * B_r_xtplus1 * beta_tplus1_r; double transitionProbDenom = alpha_t_q * beta_t_q / scalingFactors[t]; transitionMatrixNum[q][r] += transitionProbNum; transitionMatrixDenom[q][r] += transitionProbDenom; } } } for (int q = 0; q < nrOfHiddenStates; q++) { MapWritable innerMap = new MapWritable(); for (int r = 0; r < nrOfHiddenStates; r++) { byte[] doublePair = BaumWelchUtils.doublePairToByteArray(transitionMatrixNum[q][r], transitionMatrixDenom[q][r]); innerMap.put(new IntWritable(r), new BytesWritable(doublePair)); } transitionDistributionStripe.put(q, innerMap); } //Emission distribution double[][] emissionMatrixNum = new double[nrOfHiddenStates][nrOfEmittedStates]; double[][] emissionMatrixDenom = new double[nrOfHiddenStates][nrOfEmittedStates]; for (int q = 0; q < nrOfHiddenStates; q++) { for (int x = 0; x < nrOfEmittedStates; x++) { emissionMatrixNum[q][x] = 0.0; emissionMatrixDenom[q][x] = 0.0; } } for (int q = 0; q < nrOfHiddenStates; ++q) { for (int j = 0; j < nrOfEmittedStates; ++j) { double temp = 0; double temp1 = 0; for (int t = 0; t < sequence.length; ++t) { // delta tensor if (sequence[t] == j) { temp += alphaFactors.get(t, q) * betaFactors.get(t, q) / scalingFactors[t]; } temp1 += alphaFactors.get(t, q) * betaFactors.get(t, q) / scalingFactors[t]; } emissionMatrixNum[q][j] += temp; emissionMatrixDenom[q][j] += temp1; } } for (int q = 0; q < nrOfHiddenStates; q++) { MapWritable innerMap = new MapWritable(); for (int r = 0; r < nrOfEmittedStates; r++) { byte[] doublePair = BaumWelchUtils.doublePairToByteArray(emissionMatrixNum[q][r], emissionMatrixDenom[q][r]); innerMap.put(new IntWritable(r), new BytesWritable(doublePair)); } emissionDistributionStripe.put(q, innerMap); } } else { Matrix alphaFactors = HmmAlgorithms.forwardAlgorithm(Model, sequence, HmmAlgorithms.ScalingMethod.NOSCALING, null); Matrix betaFactors = HmmAlgorithms.backwardAlgorithm(Model, sequence, HmmAlgorithms.ScalingMethod.NOSCALING, null); //Initial Distribution for (int q = 0; q < nrOfHiddenStates; q++) { double alpha_1_q = alphaFactors.get(0, q); double beta_1_q = betaFactors.get(0, q); initialDistributionStripe.put(new IntWritable(q), new DoubleWritable(alpha_1_q * beta_1_q)); } //Transition Distribution double[][] transitionMatrix = new double[nrOfHiddenStates][nrOfHiddenStates]; for (int q = 0; q < nrOfHiddenStates; q++) { for (int x = 0; x < nrOfHiddenStates; x++) { transitionMatrix[q][x] = 0.0; } } for (int t = 0; t < sequence.length - 1; t++) { for (int q = 0; q < nrOfHiddenStates; q++) { for (int r = 0; r < nrOfHiddenStates; r++) { double alpha_t_q = alphaFactors.get(t, q); double A_q_r = Model.getTransitionMatrix().get(q, r); double B_r_xtplus1 = Model.getEmissionMatrix().get(r, sequence[t + 1]); double beta_tplus1_r = betaFactors.get(t + 1, r); double transitionProb = alpha_t_q * A_q_r * B_r_xtplus1 * beta_tplus1_r; transitionMatrix[q][r] += transitionProb; } } } for (int q = 0; q < nrOfHiddenStates; q++) { MapWritable innerMap = new MapWritable(); for (int r = 0; r < nrOfHiddenStates; r++) { innerMap.put(new IntWritable(r), new DoubleWritable(transitionMatrix[q][r])); } transitionDistributionStripe.put(q, innerMap); } //Emission distribution double[][] emissionMatrix = new double[nrOfHiddenStates][nrOfEmittedStates]; for (int q = 0; q < nrOfHiddenStates; q++) { for (int x = 0; x < nrOfEmittedStates; x++) { emissionMatrix[q][x] = 0.0; } } for (int t = 0; t < sequence.length; t++) { for (int q = 0; q < nrOfHiddenStates; q++) { double alpha_t_q = alphaFactors.get(t, q); double beta_t_q = betaFactors.get(t, q); emissionMatrix[q][sequence[t]] += alpha_t_q * beta_t_q; } } for (int q = 0; q < nrOfHiddenStates; q++) { MapWritable innerMap = new MapWritable(); for (int r = 0; r < nrOfEmittedStates; r++) { innerMap.put(new IntWritable(r), new DoubleWritable(emissionMatrix[q][r])); } emissionDistributionStripe.put(q, innerMap); } } //push out the associative arrays context.write(new Text("INITIAL"), initialDistributionStripe); for (int q = 0; q < nrOfHiddenStates; q++) { context.write(new Text("EMIT_" + Integer.toString(q)), emissionDistributionStripe.get(q)); context.write(new Text("TRANSIT_" + Integer.toString(q)), transitionDistributionStripe.get(q)); } }
From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<MapWritable> stripes, Context context) throws IOException, InterruptedException { MapWritable sumOfStripes = new MapWritable(); // Finish the Expectation Step by aggregating all posterior probabilities for one key if (scaling.equals("logscaling")) { double totalValSum = Double.NEGATIVE_INFINITY; for (MapWritable stripe : stripes) { for (Map.Entry e : stripe.entrySet()) { double val = ((DoubleWritable) e.getValue()).get(); double max = totalValSum > val ? totalValSum : val; totalValSum = max + Math.log(Math.exp(totalValSum - max) + Math.exp(val - max)); if (!sumOfStripes.containsKey(e.getKey())) { sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val)); } else { double sumSripesVal = ((DoubleWritable) sumOfStripes.get(e.getKey())).get(); if (sumSripesVal > Double.NEGATIVE_INFINITY) { val = val + Math.log(1 + Math.exp(sumSripesVal - val)); }/* w w w. jav a 2 s . co m*/ sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val)); } } } //normalize the aggregate for (Map.Entry e : sumOfStripes.entrySet()) { double val = ((DoubleWritable) e.getValue()).get(); if (totalValSum > Double.NEGATIVE_INFINITY) { val = val - totalValSum; } sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(Math.exp(val))); } } else if (scaling.equals("rescaling")) { double totalValSum = 0.0; for (MapWritable stripe : stripes) { for (Map.Entry e : stripe.entrySet()) { if (key.charAt(0) == (int) 'I') { double val = ((DoubleWritable) e.getValue()).get(); totalValSum += val; if (!sumOfStripes.containsKey(e.getKey())) { sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue()); } else { val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get(); sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val)); } } else { double[] pr = BaumWelchUtils.toDoublePair(((BytesWritable) e.getValue()).getBytes()); double num = pr[0]; double denom = pr[1]; if (!sumOfStripes.containsKey(e.getKey())) { sumOfStripes.put((IntWritable) e.getKey(), (BytesWritable) e.getValue()); } else { double[] pr1 = BaumWelchUtils .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes()); num += pr1[0]; denom += pr1[1]; byte[] doublePair1 = BaumWelchUtils.doublePairToByteArray(num, denom); sumOfStripes.put((IntWritable) e.getKey(), new BytesWritable(doublePair1)); } } } } if (key.charAt(0) == (int) 'I') { //normalize the aggregate for (Map.Entry e : sumOfStripes.entrySet()) { double val = ((DoubleWritable) e.getValue()).get(); if (totalValSum > 0) { val /= totalValSum; } sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val)); } } else { // compute the probabilities for (Map.Entry e : sumOfStripes.entrySet()) { double[] pr1 = BaumWelchUtils .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes()); sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(pr1[0] / pr1[1])); } } } else { double totalValSum = 0.0; for (MapWritable stripe : stripes) { for (Map.Entry e : stripe.entrySet()) { int state = ((IntWritable) e.getKey()).get(); double val = ((DoubleWritable) e.getValue()).get(); totalValSum += val; if (!sumOfStripes.containsKey(e.getKey())) { sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue()); } else { val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get(); sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val)); } } } //normalize the aggregate for (Map.Entry e : sumOfStripes.entrySet()) { double val = ((DoubleWritable) e.getValue()).get(); if (totalValSum > 0) { val /= totalValSum; } sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val)); } } //Write the distribution parameter vector to HDFS for the next iteration context.write(key, sumOfStripes); }