Example usage for org.apache.hadoop.io Text charAt

List of usage examples for org.apache.hadoop.io Text charAt

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text charAt.

Prototype

public int charAt(int position) 

Source Link

Document

Returns the Unicode Scalar Value (32-bit integer value) for the character at position.

Usage

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchUtils.java

License:Apache License

public static HmmModel CreateHmmModel(int nrOfHiddenStates, int nrOfOutputStates, Path modelPath,
        Configuration conf) throws IOException {

    log.info("Entering Create Hmm Model. Model Path = {}", modelPath.toUri());
    Vector initialProbabilities = new DenseVector(nrOfHiddenStates);
    Matrix transitionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfHiddenStates);
    Matrix emissionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfOutputStates);

    // Get the path location where the seq files encoding model are stored
    Path modelFilesPath = new Path(modelPath, "*");
    log.info("Create Hmm Model. ModelFiles Path = {}", modelFilesPath.toUri());
    Collection<Path> result = new ArrayList<Path>();

    // get all filtered file names in result list
    FileSystem fs = modelFilesPath.getFileSystem(conf);
    log.info("Create Hmm Model. File System = {}", fs);
    FileStatus[] matches = fs.listStatus(
            FileUtil.stat2Paths(fs.globStatus(modelFilesPath, PathFilters.partFilter())),
            PathFilters.partFilter());//  w ww .  ja  va2  s. c o m

    for (FileStatus match : matches) {
        log.info("CreateHmmmModel Adding File Match {}", match.getPath().toString());
        result.add(fs.makeQualified(match.getPath()));
    }

    // iterate through the result path list
    for (Path path : result) {
        for (Pair<Writable, MapWritable> pair : new SequenceFileIterable<Writable, MapWritable>(path, true,
                conf)) {
            Text key = (Text) pair.getFirst();
            log.info("CreateHmmModel Matching Seq File Key = {}", key);
            MapWritable valueMap = pair.getSecond();
            if (key.charAt(0) == 'I') {
                // initial distribution stripe
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    log.info("CreateHmmModel Initial Prob Adding  Key, Value  = ({} {})",
                            ((IntWritable) entry.getKey()).get(), ((DoubleWritable) entry.getValue()).get());
                    initialProbabilities.set(((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else if (key.charAt(0) == 'T') {
                // transition distribution stripe
                // key is of the form TRANSIT_0, TRANSIT_1 etc
                // the number after _ is the state ID at char number 11
                int stateID = Character.getNumericValue(key.charAt(8));
                log.info("CreateHmmModel stateID = key.charAt(8) = {}", stateID);
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    log.info("CreateHmmModel Transition Matrix ({}, {}) = {}", new Object[] { stateID,
                            ((IntWritable) entry.getKey()).get(), ((DoubleWritable) entry.getValue()).get() });
                    transitionMatrix.set(stateID, ((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else if (key.charAt(0) == 'E') {
                // emission distribution stripe
                // key is of the form EMIT_0, EMIT_1 etc
                // the number after _ is the state ID at char number 5
                int stateID = Character.getNumericValue(key.charAt(5));
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    log.info("CreateHmmModel Emission Matrix ({}, {}) = {}", new Object[] { stateID,
                            ((IntWritable) entry.getKey()).get(), ((DoubleWritable) entry.getValue()).get() });
                    emissionMatrix.set(stateID, ((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else {
                throw new IllegalStateException("Error creating HmmModel from Sequence File Path");
            }
        }
    }
    HmmModel model = new HmmModel(transitionMatrix, emissionMatrix, initialProbabilities);
    HmmUtils.validate(model);
    return model;
}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchCombiner.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    MapWritable sumOfStripes = new MapWritable();

    if (scaling.equals("logscaling")) {
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                } else {
                    double sumSripesVal = ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    if (sumSripesVal > Double.NEGATIVE_INFINITY) {
                        val = val + Math.log(1 + Math.exp(sumSripesVal - val));
                    }/*  ww w .ja va2s  .  com*/
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }
    } else if (scaling.equals("rescaling")) {
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                if (key.charAt(0) == (int) 'I') {

                    double val = ((DoubleWritable) e.getValue()).get();
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                    } else {
                        val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                        sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                    }
                } else {
                    double[] pr = BaumWelchUtils.toDoublePair(((BytesWritable) e.getValue()).getBytes());
                    double num = pr[0];
                    double denom = pr[1];
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (BytesWritable) e.getValue());
                    } else {
                        double[] pr1 = BaumWelchUtils
                                .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                        num += pr1[0];
                        denom += pr1[1];
                        byte[] doublePair1 = BaumWelchUtils.doublePairToByteArray(num, denom);
                        sumOfStripes.put((IntWritable) e.getKey(), new BytesWritable(doublePair1));
                    }
                }
            }
        }
    } else {
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                } else {
                    val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }
    }
    context.write(key, sumOfStripes);
}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    MapWritable sumOfStripes = new MapWritable();

    // Finish the Expectation Step by aggregating all posterior probabilities for one key
    if (scaling.equals("logscaling")) {
        double totalValSum = Double.NEGATIVE_INFINITY;
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                double max = totalValSum > val ? totalValSum : val;
                totalValSum = max + Math.log(Math.exp(totalValSum - max) + Math.exp(val - max));
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                } else {
                    double sumSripesVal = ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    if (sumSripesVal > Double.NEGATIVE_INFINITY) {
                        val = val + Math.log(1 + Math.exp(sumSripesVal - val));
                    }/*w  ww  .  j av a  2  s  .  co  m*/
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }

        //normalize the aggregate
        for (Map.Entry e : sumOfStripes.entrySet()) {
            double val = ((DoubleWritable) e.getValue()).get();
            if (totalValSum > Double.NEGATIVE_INFINITY) {
                val = val - totalValSum;
            }
            sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(Math.exp(val)));
        }
    } else if (scaling.equals("rescaling")) {
        double totalValSum = 0.0;

        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                if (key.charAt(0) == (int) 'I') {
                    double val = ((DoubleWritable) e.getValue()).get();
                    totalValSum += val;
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                    } else {
                        val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                        sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                    }
                } else {
                    double[] pr = BaumWelchUtils.toDoublePair(((BytesWritable) e.getValue()).getBytes());
                    double num = pr[0];
                    double denom = pr[1];
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (BytesWritable) e.getValue());
                    } else {
                        double[] pr1 = BaumWelchUtils
                                .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                        num += pr1[0];
                        denom += pr1[1];
                        byte[] doublePair1 = BaumWelchUtils.doublePairToByteArray(num, denom);
                        sumOfStripes.put((IntWritable) e.getKey(), new BytesWritable(doublePair1));
                    }
                }
            }
        }

        if (key.charAt(0) == (int) 'I') {
            //normalize the aggregate
            for (Map.Entry e : sumOfStripes.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                if (totalValSum > 0) {
                    val /= totalValSum;
                }
                sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
            }

        } else {
            // compute the probabilities
            for (Map.Entry e : sumOfStripes.entrySet()) {
                double[] pr1 = BaumWelchUtils
                        .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(pr1[0] / pr1[1]));
            }
        }
    } else {
        double totalValSum = 0.0;

        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                int state = ((IntWritable) e.getKey()).get();
                double val = ((DoubleWritable) e.getValue()).get();
                totalValSum += val;
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                } else {
                    val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }

        //normalize the aggregate
        for (Map.Entry e : sumOfStripes.entrySet()) {
            double val = ((DoubleWritable) e.getValue()).get();
            if (totalValSum > 0) {
                val /= totalValSum;
            }
            sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
        }
    }

    //Write the distribution parameter vector to HDFS for the next iteration
    context.write(key, sumOfStripes);

}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchUtils.java

License:Apache License

/**
 * Converts the sequence files present in a directory to a {@link HmmModel} model.
 *
 * @param nrOfHiddenStates Number of hidden states
 * @param nrOfOutputStates Number of output states
 * @param modelPath        Location of the sequence files containing the model's distributions
 * @param conf             Configuration object
 * @return HmmModel the encoded model//from www  .  j a  v a 2 s. c om
 * @throws IOException
 */
public static HmmModel createHmmModel(int nrOfHiddenStates, int nrOfOutputStates, Path modelPath,
        Configuration conf) throws IOException {

    log.info("Entering Create Hmm Model. Model Path = {}", modelPath.toUri());
    Vector initialProbabilities = new DenseVector(nrOfHiddenStates);
    Matrix transitionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfHiddenStates);
    Matrix emissionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfOutputStates);

    // Get the path location where the seq files encoding model are stored
    Path modelFilesPath = new Path(modelPath, "*");

    Collection<Path> result = new ArrayList<Path>();

    // get all filtered file names in result list
    FileSystem fs = modelFilesPath.getFileSystem(conf);
    FileStatus[] matches = fs.listStatus(
            FileUtil.stat2Paths(fs.globStatus(modelFilesPath, PathFilters.partFilter())),
            PathFilters.partFilter());

    for (FileStatus match : matches) {
        result.add(fs.makeQualified(match.getPath()));
    }

    // iterate through the result path list
    for (Path path : result) {
        for (Pair<Writable, MapWritable> pair : new SequenceFileIterable<Writable, MapWritable>(path, true,
                conf)) {
            Text key = (Text) pair.getFirst();
            MapWritable valueMap = pair.getSecond();
            if (key.charAt(0) == (int) 'I') {
                // initial distribution stripe
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    initialProbabilities.set(((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else if (key.charAt(0) == (int) 'T') {
                // transition distribution stripe
                // key is of the form TRANSIT_0, TRANSIT_1 etc
                int stateID = Integer.parseInt(key.toString().split("_")[1]);
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    transitionMatrix.set(stateID, ((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else if (key.charAt(0) == (int) 'E') {
                // emission distribution stripe
                // key is of the form EMIT_0, EMIT_1 etc
                int stateID = Integer.parseInt(key.toString().split("_")[1]);
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    emissionMatrix.set(stateID, ((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else {
                throw new IllegalStateException("Error creating HmmModel from Sequence File Path");
            }
        }
    }

    HmmModel model = new HmmModel(transitionMatrix, emissionMatrix, initialProbabilities);

    if (model != null) {
        return model;
    } else
        throw new IOException("Error building model from output location");

}