List of usage examples for org.apache.hadoop.conf Configurable setConf
void setConf(Configuration conf);
From source file:com.asakusafw.runtime.directio.hadoop.SequenceFileFormat.java
License:Apache License
private void configure(Object object) { if (object instanceof Configurable) { Configurable configurable = (Configurable) object; if (configurable.getConf() == null) { configurable.setConf(getConf()); }//from www. j a v a 2 s . com } }
From source file:com.ebay.erl.mobius.util.SerializableUtil.java
License:Apache License
public static Object deserializeFromBase64(String base64String, Configuration conf) throws IOException { ObjectInputStream ois = null; try {// w ww. ja v a2s. c o m byte[] objBinary = Base64.decodeBase64(base64String.getBytes()); ois = new ObjectInputStream(new ByteArrayInputStream(objBinary)); Object object = ois.readObject(); if (conf != null) { if (object instanceof Configurable) { ((Configurable) object).setConf(conf); } else if (object instanceof Configurable[]) { Configurable[] confArray = (Configurable[]) object; for (Configurable aConfigurable : confArray) { aConfigurable.setConf(conf); } } } return object; } catch (ClassNotFoundException e) { throw new RuntimeException(e); } finally { if (ois != null) { try { ois.close(); } catch (Throwable e) { } } } }
From source file:com.nearinfinity.blur.mapreduce.BlurReducer.java
License:Apache License
protected CompressionCodec getInstance(String compressionClass) throws IOException { try {/* ww w.j av a 2 s. c om*/ CompressionCodec codec = (CompressionCodec) Class.forName(compressionClass).newInstance(); if (codec instanceof Configurable) { Configurable configurable = (Configurable) codec; configurable.setConf(_configuration); } return codec; } catch (Exception e) { throw new IOException(e); } }
From source file:com.nearinfinity.blur.utils.BlurUtil.java
License:Apache License
public static <T> T configure(T t) { if (t instanceof Configurable) { Configurable configurable = (Configurable) t; configurable.setConf(new Configuration()); }/*from w w w . j a va 2 s. com*/ return t; }
From source file:org.apache.mahout.classifier.bayes.mapreduce.bayes.BayesClassifierDriver.java
License:Apache License
/** * Run the job/*from w w w . j a v a 2 s. c o m*/ * * @param params * The Job parameters containing the gramSize, input output folders, defaultCat, encoding */ public static void runJob(Parameters params) throws IOException { Configurable client = new JobClient(); JobConf conf = new JobConf(BayesClassifierDriver.class); conf.setJobName("Bayes Classifier Driver running over input: " + params.get("testDirPath")); conf.setOutputKeyClass(StringTuple.class); conf.setOutputValueClass(DoubleWritable.class); FileInputFormat.setInputPaths(conf, new Path(params.get("testDirPath"))); Path outPath = new Path(params.get("testDirPath") + "-output"); FileOutputFormat.setOutputPath(conf, outPath); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setMapperClass(BayesClassifierMapper.class); conf.setCombinerClass(BayesClassifierReducer.class); conf.setReducerClass(BayesClassifierReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); HadoopUtil.overwriteOutput(outPath); conf.set("bayes.parameters", params.toString()); client.setConf(conf); JobClient.runJob(conf); Path outputFiles = new Path(outPath, "part*"); FileSystem dfs = FileSystem.get(outPath.toUri(), conf); ConfusionMatrix matrix = readResult(dfs, outputFiles, conf, params); log.info("{}", matrix.summarize()); }
From source file:org.apache.mahout.classifier.bayes.mapreduce.bayes.BayesThetaNormalizerDriver.java
License:Apache License
@Override public void runJob(Path input, Path output, BayesParameters params) throws IOException { Configurable client = new JobClient(); JobConf conf = new JobConf(BayesThetaNormalizerDriver.class); conf.setJobName("Bayes Theta Normalizer Driver running over input: " + input); conf.setOutputKeyClass(StringTuple.class); conf.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(conf, new Path(output, "trainer-tfIdf/trainer-tfIdf")); Path outPath = new Path(output, "trainer-thetaNormalizer"); FileOutputFormat.setOutputPath(conf, outPath); // conf.setNumMapTasks(100); // conf.setNumReduceTasks(1); conf.setMapperClass(BayesThetaNormalizerMapper.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setCombinerClass(BayesThetaNormalizerReducer.class); conf.setReducerClass(BayesThetaNormalizerReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); // Dont ever forget this. People should keep track of how hadoop conf // parameters and make or break a piece of code HadoopUtil.overwriteOutput(outPath); FileSystem dfs = FileSystem.get(outPath.toUri(), conf); Path sigmaKFiles = new Path(output, "trainer-weights/Sigma_k/*"); Map<String, Double> labelWeightSum = SequenceFileModelReader.readLabelSums(dfs, sigmaKFiles, conf); DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(conf, GenericsUtil.getClass(labelWeightSum)); String labelWeightSumString = mapStringifier.toString(labelWeightSum); log.info("Sigma_k for Each Label"); Map<String, Double> c = mapStringifier.fromString(labelWeightSumString); log.info("{}", c); conf.set("cnaivebayes.sigma_k", labelWeightSumString); Path sigmaJSigmaKFile = new Path(output, "trainer-weights/Sigma_kSigma_j/*"); double sigmaJSigmaK = SequenceFileModelReader.readSigmaJSigmaK(dfs, sigmaJSigmaKFile, conf); DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class); String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK); log.info("Sigma_kSigma_j for each Label and for each Features"); double retSigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString); log.info("{}", retSigmaJSigmaK); conf.set("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString); Path vocabCountFile = new Path(output, "trainer-tfIdf/trainer-vocabCount/*"); double vocabCount = SequenceFileModelReader.readVocabCount(dfs, vocabCountFile, conf); String vocabCountString = stringifier.toString(vocabCount); log.info("Vocabulary Count"); conf.set("cnaivebayes.vocabCount", vocabCountString); double retvocabCount = stringifier.fromString(vocabCountString); log.info("{}", retvocabCount); conf.set("bayes.parameters", params.toString()); conf.set("output.table", output.toString()); client.setConf(conf); JobClient.runJob(conf);/* www .j a va 2 s . com*/ }
From source file:org.apache.mahout.classifier.bayes.mapreduce.cbayes.CBayesThetaNormalizerDriver.java
License:Apache License
@Override public void runJob(Path input, Path output, BayesParameters params) throws IOException { Configurable client = new JobClient(); JobConf conf = new JobConf(CBayesThetaNormalizerDriver.class); conf.setJobName("Complementary Bayes Theta Normalizer Driver running over input: " + input); conf.setOutputKeyClass(StringTuple.class); conf.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(conf, new Path(output, "trainer-weights/Sigma_j")); FileInputFormat.addInputPath(conf, new Path(output, "trainer-tfIdf/trainer-tfIdf")); Path outPath = new Path(output, "trainer-thetaNormalizer"); FileOutputFormat.setOutputPath(conf, outPath); // conf.setNumMapTasks(100); // conf.setNumReduceTasks(1); conf.setMapperClass(CBayesThetaNormalizerMapper.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setCombinerClass(CBayesThetaNormalizerReducer.class); conf.setReducerClass(CBayesThetaNormalizerReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization"); // Dont ever forget this. People should keep track of how hadoop conf // parameters and make or break a piece of code FileSystem dfs = FileSystem.get(outPath.toUri(), conf); HadoopUtil.overwriteOutput(outPath); Path sigmaKFiles = new Path(output, "trainer-weights/Sigma_k/*"); Map<String, Double> labelWeightSum = SequenceFileModelReader.readLabelSums(dfs, sigmaKFiles, conf); DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(conf, GenericsUtil.getClass(labelWeightSum)); String labelWeightSumString = mapStringifier.toString(labelWeightSum); log.info("Sigma_k for Each Label"); Map<String, Double> c = mapStringifier.fromString(labelWeightSumString); log.info("{}", c); conf.set("cnaivebayes.sigma_k", labelWeightSumString); Path sigmaKSigmaJFile = new Path(output, "trainer-weights/Sigma_kSigma_j/*"); double sigmaJSigmaK = SequenceFileModelReader.readSigmaJSigmaK(dfs, sigmaKSigmaJFile, conf); DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class); String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK); log.info("Sigma_kSigma_j for each Label and for each Features"); double retSigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString); log.info("{}", retSigmaJSigmaK); conf.set("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString); Path vocabCountFile = new Path(output, "trainer-tfIdf/trainer-vocabCount/*"); double vocabCount = SequenceFileModelReader.readVocabCount(dfs, vocabCountFile, conf); String vocabCountString = stringifier.toString(vocabCount); log.info("Vocabulary Count"); conf.set("cnaivebayes.vocabCount", vocabCountString); double retvocabCount = stringifier.fromString(vocabCountString); log.info("{}", retvocabCount); conf.set("bayes.parameters", params.toString()); conf.set("output.table", output.toString()); client.setConf(conf); JobClient.runJob(conf);//from w w w . j a va2 s .c o m }
From source file:org.apache.mahout.classifier.bayes.mapreduce.common.BayesFeatureDriver.java
License:Apache License
@Override public void runJob(Path input, Path output, BayesParameters params) throws IOException { Configurable client = new JobClient(); JobConf conf = new JobConf(BayesFeatureDriver.class); conf.setJobName("Bayes Feature Driver running over input: " + input); conf.setOutputKeyClass(StringTuple.class); conf.setOutputValueClass(DoubleWritable.class); conf.setPartitionerClass(FeaturePartitioner.class); conf.setOutputKeyComparatorClass(FeatureLabelComparator.class); FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); conf.setMapperClass(BayesFeatureMapper.class); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setCombinerClass(BayesFeatureCombiner.class); conf.setReducerClass(BayesFeatureReducer.class); conf.setOutputFormat(BayesFeatureOutputFormat.class); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization"); // this conf parameter needs to be set enable serialisation of conf values HadoopUtil.overwriteOutput(output);/*ww w .j a v a2 s . co m*/ conf.set("bayes.parameters", params.toString()); client.setConf(conf); JobClient.runJob(conf); }
From source file:org.apache.mahout.classifier.bayes.mapreduce.common.BayesTfIdfDriver.java
License:Apache License
@Override public void runJob(Path input, Path output, BayesParameters params) throws IOException { Configurable client = new JobClient(); JobConf conf = new JobConf(BayesWeightSummerDriver.class); conf.setJobName("TfIdf Driver running over input: " + input); conf.setOutputKeyClass(StringTuple.class); conf.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(conf, new Path(output, "trainer-termDocCount")); FileInputFormat.addInputPath(conf, new Path(output, "trainer-wordFreq")); FileInputFormat.addInputPath(conf, new Path(output, "trainer-featureCount")); Path outPath = new Path(output, "trainer-tfIdf"); FileOutputFormat.setOutputPath(conf, outPath); // conf.setNumMapTasks(100); conf.setJarByClass(BayesTfIdfDriver.class); conf.setMapperClass(BayesTfIdfMapper.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setCombinerClass(BayesTfIdfReducer.class); conf.setReducerClass(BayesTfIdfReducer.class); conf.setOutputFormat(BayesTfIdfOutputFormat.class); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization"); // Dont ever forget this. People should keep track of how hadoop conf // parameters and make or break a piece of code FileSystem dfs = FileSystem.get(outPath.toUri(), conf); HadoopUtil.overwriteOutput(outPath); Path interimFile = new Path(output, "trainer-docCount/part-*"); Map<String, Double> labelDocumentCounts = SequenceFileModelReader.readLabelDocumentCounts(dfs, interimFile, conf);//from www. jav a 2 s . c o m DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(conf, GenericsUtil.getClass(labelDocumentCounts)); String labelDocumentCountString = mapStringifier.toString(labelDocumentCounts); log.info("Counts of documents in Each Label"); Map<String, Double> c = mapStringifier.fromString(labelDocumentCountString); log.info("{}", c); conf.set("cnaivebayes.labelDocumentCounts", labelDocumentCountString); log.info(params.print()); if (params.get("dataSource").equals("hbase")) { String tableName = output.toString(); HBaseConfiguration hc = new HBaseConfiguration(new Configuration()); HTableDescriptor ht = new HTableDescriptor(tableName); HColumnDescriptor hcd = new HColumnDescriptor(BayesConstants.HBASE_COLUMN_FAMILY + ':'); hcd.setBloomfilter(true); hcd.setInMemory(true); hcd.setMaxVersions(1); hcd.setBlockCacheEnabled(true); ht.addFamily(hcd); log.info("Connecting to hbase..."); HBaseAdmin hba = new HBaseAdmin(hc); log.info("Creating Table {}", output); if (hba.tableExists(tableName)) { hba.disableTable(tableName); hba.deleteTable(tableName); hba.majorCompact(".META."); } hba.createTable(ht); conf.set("output.table", tableName); } conf.set("bayes.parameters", params.toString()); client.setConf(conf); JobClient.runJob(conf); }
From source file:org.apache.mahout.classifier.bayes.mapreduce.common.BayesWeightSummerDriver.java
License:Apache License
@Override public void runJob(Path input, Path output, BayesParameters params) throws IOException { Configurable client = new JobClient(); JobConf conf = new JobConf(BayesWeightSummerDriver.class); conf.setJobName("Bayes Weight Summer Driver running over input: " + input); conf.setOutputKeyClass(StringTuple.class); conf.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(conf, new Path(output, "trainer-tfIdf/trainer-tfIdf")); Path outPath = new Path(output, "trainer-weights"); FileOutputFormat.setOutputPath(conf, outPath); HadoopUtil.overwriteOutput(outPath); // conf.setNumReduceTasks(1); // conf.setNumMapTasks(100); conf.setMapperClass(BayesWeightSummerMapper.class); // see the javadoc for the spec for file input formats: first token is key, // rest is input. Whole document on one line conf.setInputFormat(SequenceFileInputFormat.class); conf.setCombinerClass(BayesWeightSummerReducer.class); conf.setReducerClass(BayesWeightSummerReducer.class); conf.setOutputFormat(BayesWeightSummerOutputFormat.class); conf.set("bayes.parameters", params.toString()); conf.set("output.table", output.toString()); client.setConf(conf); JobClient.runJob(conf);//from w ww . ja v a2s . c o m }