List of usage examples for org.apache.hadoop.mapred JobConf getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:edu.ucsb.cs.hybrid.io.Splitter.java
License:Apache License
public static void configure(JobConf job, Path inputPath) throws IOException { hdfs = FileSystem.get(job);//ww w . j a v a 2 s. co m long initial_S = job.getInt(Config.MAP_S_PROPERTY, Config.MAP_S_VALUE); long nVectors = Collector.countDirVectors(hdfs, inputPath, job); if (initial_S > nVectors) try { throw new UnsupportedEncodingException( "WARNING: Mapper's host partition \"S\" is larger than the total number of input vectors!\n" + Config.MAP_S_PROPERTY + " is set to " + nVectors); } catch (UnsupportedEncodingException e) { initial_S = nVectors; } if (job.getBoolean(Config.SINGLE_MAP_PROPERTY, Config.SINGLE_MAP_VALUE)) { prepareOneMap(job, inputPath, initial_S); } else { long S_size = initial_S; //get_max_S_size(nVectors, initial_S); Path splitsDir = splitAll(job, S_size, inputPath); hdfs.delete(inputPath, true); hdfs.rename(splitsDir, inputPath); } }
From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Mapper.java
License:Apache License
@Override public void configure(JobConf job) { blockSize = job.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE); threshold = job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE); }
From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java
License:Apache License
public static Reader getReader(JobConf conf) throws IOException { boolean oneMap = conf.getBoolean(Config.SINGLE_MAP_PROPERTY, Config.SINGLE_MAP_VALUE); boolean splittable = conf.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE); if (!oneMap || splittable) return new Reader(conf, new Path(conf.get("map.input.file")), conf.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE)); else//from w w w . j a v a 2 s . c o m return new OneMapReader(conf, new Path(conf.get("map.input.file")), conf.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE)); }
From source file:edu.ucsb.cs.lsh.minhash.LshMapper.java
License:Apache License
@Override public void configure(JobConf job) { l = job.getInt(MinHashLshDriver.L_PROPERTY, MinHashLshDriver.L_VALUE); try {/*ww w . j av a 2 s .c o m*/ Path[] localFiles = DistributedCache.getLocalCacheFiles(job); // System.out.println("local:" + localFiles[0].getName()); // FileSystem fs = localFiles[0].getFileSystem(job); FileSystem fs = FileSystem.get(job); // Reader reader = new SequenceFile.Reader(fs, localFiles[0], job); Reader reader = new SequenceFile.Reader(fs, new Path("lshfile"), job); reader.next(lsh); reader.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:edu.ucsb.cs.lsh.minhash.MinHashLshDriver.java
License:Apache License
public static void main(String args[]) throws ParseException, IOException { JobConf job = new JobConf(); job.setJarByClass(MinHashLshDriver.class); job.setJobName(MinHashLshDriver.class.getSimpleName()); GenericOptionsParser gop = new GenericOptionsParser(job, args); args = gop.getRemainingArgs();// ww w . j a v a 2 s. com job.setMapperClass(LshMapper.class); job.setMapOutputKeyClass(IntArrayWritable.class); // signatures job.setMapOutputValueClass(LongWritable.class); // doc IDs job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE)); job.setReducerClass(LshReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); String inputDir = args[0]; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input directory not set."); } FileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path("lsh-jaccard-buckets"); FileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024, job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE), job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE)); writeLsh(job, outputPath.getFileSystem(job), lshTable); JobSubmitter.run(job, "LSH", job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE)); }
From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java
License:Apache License
public static void main(JobConf job) throws IOException { int nBits/*D*/, nFeatures/*K*/, nReducers; job.setJobName(ProjectionsGenerator.class.getSimpleName()); FileSystem fs = FileSystem.get(job); nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE); nFeatures = readCollectionFeatureCount(fs, job); setParameters(nBits, nFeatures);//from w w w .j ava 2 s . c om nReducers = job.getInt(ProjectionLshDriver.LSH_NREDUCER_PROPERTY, ProjectionLshDriver.LSH_NREDUCER_VALUE); Path inputPath = new Path(INPUT_DIR); Path outputPath = new Path(OUTPUT_DIR); if (fs.exists(outputPath)) fs.delete(outputPath, true); if (fs.exists(inputPath)) fs.delete(inputPath, true); SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, new Path(inputPath.toString() + "/file"), IntWritable.class, IntWritable.class); for (int i = 0; i < nReducers; i++) writer.append(new IntWritable(i), new IntWritable(i)); writer.close(); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); job.set("mapred.child.java.opts", "-Xmx2048m"); job.setInt("mapred.map.max.attempts", 10); job.setInt("mapred.reduce.max.attempts", 10); job.setNumMapTasks(1); job.setNumReduceTasks(nReducers); job.setMapperClass(IdentityMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(ProjectionReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(RandomVector.class); JobSubmitter.run(job, "LSH", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); }
From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java
License:Apache License
public static int readCollectionFeatureCount(FileSystem hdfs, JobConf job) throws IOException { Path nFeaturesPath = new Path(Properties.NUM_FEATURES_FILE); if (hdfs.exists(nFeaturesPath)) { BufferedReader br = new BufferedReader( new InputStreamReader(new DataInputStream(new FileInputStream(nFeaturesPath.toString())))); String line;/*w w w . jav a 2 s.c om*/ if ((line = br.readLine()) != null) job.setInt(ProjectionLshDriver.LSH_NFEATURES_PROPERTY, Integer.parseInt(line)); } return job.getInt(ProjectionLshDriver.LSH_NFEATURES_PROPERTY, ProjectionLshDriver.LSH_NFEATURES_VALUE); }
From source file:edu.ucsb.cs.lsh.projection.SignaturesGenerator.java
License:Apache License
public static void main(String[] args) throws Exception { JobConf job = new JobConf(SignaturesGenerator.class); new GenericOptionsParser(job, args); job.setJobName(SignaturesGenerator.class.getSimpleName()); int nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE); setParameters();//from ww w . ja v a2 s. com FileSystem fs = FileSystem.get(job); prepareDistributedCache(job, fs, new Path(ProjectionsGenerator.OUTPUT_DIR)); Path outputPath = new Path(OUTPUT_DIR); if (fs.exists(outputPath)) fs.delete(outputPath); FileInputFormat.setInputPaths(job, INPUT_DIR); // Path(INPUT_DIR)); FileOutputFormat.setOutputPath(job, outputPath); // FileOutputFormat.setCompressOutput(job, false); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.set("mapred.child.java.opts", "-Xmx2048m"); job.setInt("mapred.map.max.attempts", 10); job.setInt("mapred.reduce.max.attempts", 10); job.setInt("mapred.task.timeout", 6000000); job.setMapperClass(SigMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BitSignature.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BitSignature.class); JobSubmitter.run(job, "LSH", -1); }
From source file:edu.ucsb.cs.partitioning.cosine.CosinePartitioning.java
License:Apache License
/** * Sets MapReduce input configurations for the core cosine partitioning job. *//* w ww. j a v a 2 s. c o m*/ public static JobConf setMapReduce(JobConf job, Class mapper, Class reducer) { job.setMapperClass(mapper); job.setMapOutputKeyClass(IntIntWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setNumReduceTasks(job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE)); job.setReducerClass(reducer); job.setOutputKeyClass(IntIntWritable.class); job.setOutputValueClass(IdFeatureWeightArrayWritable.class); return job; }
From source file:edu.ucsb.cs.partitioning.cosine.Partitioner.java
License:Apache License
/** * Uniformly partitions the sequence vectors given the number of partitions * input in the configuration file. It also prepares partitions information * about its partitions: maximum p-norms, weights or norm/weights/lengths in * a file to guide the core static partitioning next for skipping. * // w w w. j a v a 2s .c om * @param norm_weight_all * @return */ public static JobConf main(String[] args, int norm_weight_all) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJarByClass(Partitioner.class); System.out.println( JobSubmitter.stars() + "\n Running partitioner to prepare uniform partitionins (Single JVM) "); String inputDir = SortDriver.OUTPUT_DIR, maxDir; if (norm_weight_all == 1) maxDir = "/maxpnorm"; // maxDir = inputDir + "/maxpnorm"; else if (norm_weight_all == 2) maxDir = "/maxweight"; // maxDir = inputDir + "/maxweight"; else maxDir = "/maxall"; // maxDir = inputDir + "/maxall"; if (!(new Path(inputDir).getFileSystem(job)).exists(new Path(inputDir))) throw new UnsupportedOperationException("ERROR: " + inputDir + " directory not set."); job.set(MAX_DIR_PATH, maxDir); job.set(Config.NUM_PARTITIONS_PROPERTY, Integer.toString(produceStaticParitions(job, inputDir, OUTPUT_DIR, maxDir, job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE), norm_weight_all))); return job; }