Example usage for org.apache.hadoop.io DefaultStringifier toString

List of usage examples for org.apache.hadoop.io DefaultStringifier toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io DefaultStringifier toString.

Prototype

@Override
    public String toString(T obj) throws IOException 

Source Link

Usage

From source file:org.apache.mahout.classifier.cbayes.CBayesNormalizedWeightMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    try {/*from   w w  w .  j  a  va  2 s  .  c o  m*/
        if (thetaNormalizer == null) {
            thetaNormalizer = new HashMap<String, Double>();

            DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(
                    job, GenericsUtil.getClass(thetaNormalizer));

            String thetaNormalizationsString = mapStringifier.toString(thetaNormalizer);
            thetaNormalizationsString = job.get("cnaivebayes.thetaNormalizations", thetaNormalizationsString);
            thetaNormalizer = mapStringifier.fromString(thetaNormalizationsString);

        }
    } catch (IOException ex) {
        log.warn(ex.toString(), ex);
    }
}

From source file:org.apache.mahout.classifier.cbayes.CBayesThetaDriver.java

License:Apache License

/**
 * Run the job// w  w w  .  ja va2  s .  c  om
 *
 * @param input  the input pathname String
 * @param output the output pathname String
 */
public static void runJob(String input, String output) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(CBayesThetaDriver.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-weights/Sigma_j"));
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-tfIdf/trainer-tfIdf"));
    Path outPath = new Path(output + "/trainer-theta");
    FileOutputFormat.setOutputPath(conf, outPath);
    //conf.setNumMapTasks(1);
    //conf.setNumReduceTasks(1);
    conf.setMapperClass(CBayesThetaMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    //conf.setCombinerClass(CBayesThetaReducer.class);    
    conf.setReducerClass(CBayesThetaReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.set("io.serializations",
            "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
        dfs.delete(outPath, true);
    }

    Path Sigma_kFiles = new Path(output + "/trainer-weights/Sigma_k/*");
    Map<String, Double> labelWeightSum = SequenceFileModelReader.readLabelSums(dfs, Sigma_kFiles, conf);
    DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(conf,
            GenericsUtil.getClass(labelWeightSum));
    String labelWeightSumString = mapStringifier.toString(labelWeightSum);

    log.info("Sigma_k for Each Label");
    Map<String, Double> c = mapStringifier.fromString(labelWeightSumString);
    log.info("{}", c);
    conf.set("cnaivebayes.sigma_k", labelWeightSumString);

    Path sigma_kSigma_jFile = new Path(output + "/trainer-weights/Sigma_kSigma_j/*");
    double sigma_jSigma_k = SequenceFileModelReader.readSigma_jSigma_k(dfs, sigma_kSigma_jFile, conf);
    DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class);
    String sigma_jSigma_kString = stringifier.toString(sigma_jSigma_k);

    log.info("Sigma_kSigma_j for each Label and for each Features");
    double retSigma_jSigma_k = stringifier.fromString(sigma_jSigma_kString);
    log.info("{}", retSigma_jSigma_k);
    conf.set("cnaivebayes.sigma_jSigma_k", sigma_jSigma_kString);

    Path vocabCountFile = new Path(output + "/trainer-tfIdf/trainer-vocabCount/*");
    double vocabCount = SequenceFileModelReader.readVocabCount(dfs, vocabCountFile, conf);
    String vocabCountString = stringifier.toString(vocabCount);

    log.info("Vocabulary Count");
    conf.set("cnaivebayes.vocabCount", vocabCountString);
    double retvocabCount = stringifier.fromString(vocabCountString);
    log.info("{}", retvocabCount);

    client.setConf(conf);

    JobClient.runJob(conf);
}

From source file:org.apache.mahout.classifier.cbayes.CBayesThetaMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    try {/*from  ww w. j  av  a2 s. c o  m*/
        if (labelWeightSum == null) {
            labelWeightSum = new HashMap<String, Double>();

            DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(
                    job, GenericsUtil.getClass(labelWeightSum));

            String labelWeightSumString = mapStringifier.toString(labelWeightSum);
            labelWeightSumString = job.get("cnaivebayes.sigma_k", labelWeightSumString);
            labelWeightSum = mapStringifier.fromString(labelWeightSumString);

            DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(job,
                    GenericsUtil.getClass(sigma_jSigma_k));
            String sigma_jSigma_kString = stringifier.toString(sigma_jSigma_k);
            sigma_jSigma_kString = job.get("cnaivebayes.sigma_jSigma_k", sigma_jSigma_kString);
            sigma_jSigma_k = stringifier.fromString(sigma_jSigma_kString);

            String vocabCountString = stringifier.toString(vocabCount);
            vocabCountString = job.get("cnaivebayes.vocabCount", vocabCountString);
            vocabCount = stringifier.fromString(vocabCountString);

        }
    } catch (IOException ex) {
        log.info(ex.toString(), ex);
    }
}

From source file:org.apache.mahout.classifier.cbayes.CBayesThetaNormalizerDriver.java

License:Apache License

/**
 * Run the job//from   w  w  w.  j  a va 2s.c  om
 *
 * @param input  the input pathname String
 * @param output the output pathname String
 */
public static void runJob(String input, String output) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(CBayesThetaNormalizerDriver.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-weights/Sigma_j"));
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-tfIdf/trainer-tfIdf"));
    Path outPath = new Path(output + "/trainer-thetaNormalizer");
    FileOutputFormat.setOutputPath(conf, outPath);
    conf.setNumMapTasks(100);
    //conf.setNumReduceTasks(1);
    conf.setMapperClass(CBayesThetaNormalizerMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(CBayesThetaNormalizerReducer.class);
    conf.setReducerClass(CBayesThetaNormalizerReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.set("io.serializations",
            "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code

    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
        dfs.delete(outPath, true);
    }

    Path Sigma_kFiles = new Path(output + "/trainer-weights/Sigma_k/*");
    Map<String, Double> labelWeightSum = SequenceFileModelReader.readLabelSums(dfs, Sigma_kFiles, conf);
    DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(conf,
            GenericsUtil.getClass(labelWeightSum));
    String labelWeightSumString = mapStringifier.toString(labelWeightSum);

    log.info("Sigma_k for Each Label");
    Map<String, Double> c = mapStringifier.fromString(labelWeightSumString);
    log.info("{}", c);
    conf.set("cnaivebayes.sigma_k", labelWeightSumString);

    Path sigma_kSigma_jFile = new Path(output + "/trainer-weights/Sigma_kSigma_j/*");
    double sigma_jSigma_k = SequenceFileModelReader.readSigma_jSigma_k(dfs, sigma_kSigma_jFile, conf);
    DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class);
    String sigma_jSigma_kString = stringifier.toString(sigma_jSigma_k);

    log.info("Sigma_kSigma_j for each Label and for each Features");
    double retSigma_jSigma_k = stringifier.fromString(sigma_jSigma_kString);
    log.info("{}", retSigma_jSigma_k);
    conf.set("cnaivebayes.sigma_jSigma_k", sigma_jSigma_kString);

    Path vocabCountFile = new Path(output + "/trainer-tfIdf/trainer-vocabCount/*");
    double vocabCount = SequenceFileModelReader.readVocabCount(dfs, vocabCountFile, conf);
    String vocabCountString = stringifier.toString(vocabCount);

    log.info("Vocabulary Count");
    conf.set("cnaivebayes.vocabCount", vocabCountString);
    double retvocabCount = stringifier.fromString(vocabCountString);
    log.info("{}", retvocabCount);

    client.setConf(conf);

    JobClient.runJob(conf);

}

From source file:org.apache.mahout.common.Parameters.java

License:Apache License

@Override
public String toString() {
    Configuration conf = new Configuration();
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    DefaultStringifier<Map<String, String>> mapStringifier = new DefaultStringifier<Map<String, String>>(conf,
            GenericsUtil.getClass(params));
    try {/*  ww  w.  j ava2  s  .co m*/
        return mapStringifier.toString(params);
    } catch (IOException e) {
        log.info("Encountered IOException while deserializing returning empty string", e);
        return "";
    }

}

From source file:org.apache.mahout.fpm.pfpgrowth.PFPGrowth.java

License:Apache License

/**
 * Generates the fList from the serialized string representation
 * //from  w  w w . ja va2 s  .  co  m
 * @param params
 * @param key
 * @param conf
 * @return Deserialized Feature Frequency List
 * @throws IOException
 */
public static List<Pair<String, Long>> deserializeList(Parameters params, String key, Configuration conf)
        throws IOException {
    List<Pair<String, Long>> list = new ArrayList<Pair<String, Long>>();
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");

    DefaultStringifier<List<Pair<String, Long>>> listStringifier = new DefaultStringifier<List<Pair<String, Long>>>(
            conf, GenericsUtil.getClass(list));
    String serializedString = params.get(key, listStringifier.toString(list));
    list = listStringifier.fromString(serializedString);
    return list;
}

From source file:org.apache.mahout.fpm.pfpgrowth.PFPGrowth.java

License:Apache License

/**
 * Generates the gList(Group ID Mapping of Various frequent Features) Map from the corresponding serialized
 * representation//from  ww  w. j a va  2s  .c  om
 * 
 * @param params
 * @param key
 * @param conf
 * @return Deserialized Group List
 * @throws IOException
 */
public static Map<String, Long> deserializeMap(Parameters params, String key, Configuration conf)
        throws IOException {
    Map<String, Long> map = new HashMap<String, Long>();
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");

    DefaultStringifier<Map<String, Long>> mapStringifier = new DefaultStringifier<Map<String, Long>>(conf,
            GenericsUtil.getClass(map));
    String gListString = params.get(key, mapStringifier.toString(map));
    map = mapStringifier.fromString(gListString);
    return map;
}

From source file:org.apache.mahout.fpm.pfpgrowth.PFPGrowth.java

License:Apache License

/**
 * Serializes the fList and returns the string representation of the List
 * //  ww w.  j ava2s.  c  o  m
 * @param list
 * @param conf
 * @return Serialized String representation of List
 * @throws IOException
 */
private static String serializeList(List<Pair<String, Long>> list, Configuration conf) throws IOException {
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    DefaultStringifier<List<Pair<String, Long>>> listStringifier = new DefaultStringifier<List<Pair<String, Long>>>(
            conf, GenericsUtil.getClass(list));
    return listStringifier.toString(list);
}

From source file:org.apache.mahout.fpm.pfpgrowth.PFPGrowth.java

License:Apache License

/**
 * Converts a given Map in to a String using DefaultStringifier of Hadoop
 * //from  w ww  . ja v  a 2 s  .com
 * @param map
 * @param conf
 * @return Serialized String representation of the GList Map
 * @throws IOException
 */
private static String serializeMap(Map<String, Long> map, Configuration conf) throws IOException {
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    DefaultStringifier<Map<String, Long>> mapStringifier = new DefaultStringifier<Map<String, Long>>(conf,
            GenericsUtil.getClass(map));
    return mapStringifier.toString(map);
}

From source file:org.apache.mahout.text.wikipedia.WikipediaDatasetCreatorMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    Configuration conf = context.getConfiguration();

    if (inputCategories == null) {
        Set<String> newCategories = Sets.newHashSet();
        DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,
                GenericsUtil.getClass(newCategories));
        String categoriesStr = conf.get("wikipedia.categories", setStringifier.toString(newCategories));
        Set<String> inputCategoriesSet = setStringifier.fromString(categoriesStr);
        inputCategories = Lists.newArrayList(inputCategoriesSet);
        inputCategoryPatterns = Lists.newArrayListWithCapacity(inputCategories.size());
        for (String inputCategory : inputCategories) {
            inputCategoryPatterns.add(Pattern.compile(".*\\b" + inputCategory + "\\b.*"));
        }//from ww  w  .j  a  va  2 s . c o m

    }

    exactMatchOnly = conf.getBoolean("exact.match.only", false);

    if (analyzer == null) {
        String analyzerStr = conf.get("analyzer.class", WikipediaAnalyzer.class.getName());
        analyzer = ClassUtils.instantiateAs(analyzerStr, Analyzer.class);
    }

    log.info("Configure: Input Categories size: {} Exact Match: {} Analyzer: {}", inputCategories.size(),
            exactMatchOnly, analyzer.getClass().getName());
}