List of usage examples for org.apache.hadoop.mapred JobConf getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:edu.uci.ics.fuzzyjoin.hadoop.tokens.array.ReduceAggregate.java
License:Apache License
@Override public void configure(JobConf job) { lengthStats = job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY, FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE); }
From source file:edu.uci.ics.fuzzyjoin.hadoop.tokens.array.ReduceSelect.java
License:Apache License
@Override public void configure(JobConf job) { conf = job;//from ww w.ja v a2 s . c om lengthStats = job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY, FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE); }
From source file:edu.ucsb.cs.hybrid.HybridDriver.java
License:Apache License
public static void main(String args[]) throws ParseException, IOException { // job.set("mapred.job.tracker", "local"); // job.set("fs.default.name", "file:///"); JobConf job = new JobConf(); job.setJarByClass(HybridDriver.class); new GenericOptionsParser(job, args); setMapperAndRunner(job);/*from www . j av a2 s . co m*/ job.setMapOutputKeyClass(DocDocWritable.class); job.setMapOutputValueClass(FloatWritable.class); job.setNumReduceTasks(0); job.setOutputKeyClass(DocDocWritable.class); job.setOutputValueClass(FloatWritable.class); Path inputPath = new Path(INPUT_DIR); CustomSequenceFileInputFormat.addInputPath(job, inputPath); Path outputPath = new Path(OUTPUT_DIR); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); job.setBoolean("fs.hdfs.impl.disable.cache", true); //xun not sure if needed if (job.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE)) { job.setInputFormat(CustomSequenceFileInputFormat.class); Long splitMB = job.getLong(Config.SPLIT_MB_PROPERTY, Config.SPLIT_MB_VALUE) * 1024 * 1024; job.setLong("mapred.min.split.size", splitMB); job.setLong("mapred.max.split.size", splitMB); job.setLong("dfs.block.size", splitMB); } else { // Comment the following of splitter for www experiments it assumes no splitting // of partitions for load balancing, should be fixed. Splitter.configure(job, inputPath);// remove comment unless for www job.setInputFormat(NonSplitableSequenceInputFormat.class); //remove comment } //SIGIR'14 two-stage balancing //not yet fully incorporated if (job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE) != 0) { TwoStageLoadbalancing.main(job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE), new Path(PartDriver.OUTPUT_DIR), job); } JobSubmitter.run(job, "SIMILARITY", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); if (job.getBoolean(Config.CONVERT_TEXT_PROPERTY, Config.CONVERT_TEXT_VALUE)) IDMappingJob(args); }
From source file:edu.ucsb.cs.hybrid.HybridDriver.java
License:Apache License
/** * @param job : passed by reference to set its mapper class. *///from w ww. jav a 2s .c o m public static void setMapperAndRunner(JobConf job) { int numSplits = job.getInt(Config.NUMBER_SPLITS_PROPERTY, Config.NUMBER_SPLITS_VALUE); int PSSChoice = job.getInt(Config.BLOCK_CHOICE_PROPERTY, Config.BLOCK_CHOICE_VALUE);//1,2 String name = "PSS"; if (numSplits > 1) { //check can I set # splits for runner here? job.setMapRunnerClass(MultipleS_Runner.class); if (job.getBoolean(Config.MULTI_THREADS_PROPERTY, Config.MULTI_THREADS_VALUE)) { // threads testing job.setMapperClass(PSS1_Threaded_Mapper.class);// naming } else if (PSSChoice == 1) { name += "1"; job.setMapperClass(PSS1_Mapper.class); } else if (PSSChoice == 2) { name += "2"; job.setMapperClass(PSS2_Mapper.class);// MultipleS_Block1_Mapper } else ;//For future implementations } else { job.setMapRunnerClass(SingleS_Runner.class); if (job.getBoolean(Config.MULTI_THREADS_PROPERTY, Config.MULTI_THREADS_VALUE)) // threads throw new RuntimeException( "ERROR: Single S with multithreads! Set hybrid.threads.property to false."); if (PSSChoice == 1) { job.setMapperClass(PSS_Mapper.class); if (job.getBoolean(Config.BAYADRO_SKIP_PROPERTY, Config.BAYADRO_SKIP_VALUE)) { name += "/Bayardo_Dynamic_filter"; job.setMapperClass(PSS_Bayardo_Mapper.class);//PSS+Bayardo WWW'07 } } else if (PSSChoice == 2) { name += "2/SingleS"; job.setMapperClass(PSS2_SingleS_Mapper.class); } else job.setMapperClass(PSS3_SingleS_Mapper.class); //what is this? } job.setJobName(name); }
From source file:edu.ucsb.cs.hybrid.io.Splitter.java
License:Apache License
public static void configure(JobConf job, Path inputPath) throws IOException { hdfs = FileSystem.get(job);//from w w w. j a v a 2 s . c o m long initial_S = job.getInt(Config.MAP_S_PROPERTY, Config.MAP_S_VALUE); long nVectors = Collector.countDirVectors(hdfs, inputPath, job); if (initial_S > nVectors) try { throw new UnsupportedEncodingException( "WARNING: Mapper's host partition \"S\" is larger than the total number of input vectors!\n" + Config.MAP_S_PROPERTY + " is set to " + nVectors); } catch (UnsupportedEncodingException e) { initial_S = nVectors; } if (job.getBoolean(Config.SINGLE_MAP_PROPERTY, Config.SINGLE_MAP_VALUE)) { prepareOneMap(job, inputPath, initial_S); } else { long S_size = initial_S; //get_max_S_size(nVectors, initial_S); Path splitsDir = splitAll(job, S_size, inputPath); hdfs.delete(inputPath, true); hdfs.rename(splitsDir, inputPath); } }
From source file:edu.ucsb.cs.hybrid.io.TwoStageLoadbalancing.java
License:Apache License
public static void main(int step, Path inputDir, JobConf job) throws IOException { FileSystem hdfs = inputDir.getFileSystem(job); if (!hdfs.exists(Collector.partitionSizesPath)) { System.out.println("Partition sizes file does not exists!"); return;/* w w w .j ava 2 s . co m*/ } debugStages = job.getBoolean(Config.DEBUG_STAGES_PROPERTY, Config.DEBUG_STAGES_VALUE); MapFile.Reader partitionSizeReader = new MapFile.Reader(hdfs, Collector.partitionSizesPath.getName(), new JobConf()); Text partitionK = new Text(); LongWritable partSizeV = new LongWritable(); try { while (partitionSizeReader.next(partitionK, partSizeV)) { partitionsNames.add(partitionK.toString()); // useless? partitionsSizes.put(partitionK.toString(), partSizeV.get()); } } catch (Exception e) { ; } for (int i = 0; i < partitionsNames.size(); i++) { System.out.println("Partition " + partitionsNames.get(i) + " has " + partitionsSizes.get(partitionsNames.get(i)) + " vectors."); } if (partitionsNames.size() <= 1) return; stage0(); printUndirectedNeighbors("Stage0"); printPartitionsStat("Stage0"); printCircularPartitionsWeight("\nCircular"); calcCWStandardDeviation(); stage1(); printDirectedNeighbors("Stage1"); System.out.println("Stage 1 final weights: "); printPartitionsWeights("Stage1"); if ((step == 2) || (step == 12)) { stage2(); printDirectedNeighbors("Stage2"); System.out.println("Stage 2 final weights: "); printPartitionsWeights("Stage2"); } // stage3(job, hdfs); writeComparisonList(job, hdfs); // printComparisonList(job, hdfs);// remove }
From source file:edu.ucsb.cs.hybrid.mappers.PSS2_Mapper.java
License:Apache License
@Override public void configure(JobConf job) { super.configure(job); loopsloopb = job.getBoolean(LOOPSLOOPB_PROPERTY, LOOPSLOOPB_VALUE); allocateCurrentB(currentB, blockSize); }
From source file:edu.ucsb.cs.hybrid.mappers.PSS2_SingleS_Mapper.java
License:Apache License
@Override public void configure(JobConf job) { super.configure(job); // idComparison set ? loopsloopb = job.getBoolean(LOOPSLOOPB_PROPERTY, LOOPSLOOPB_VALUE); currentB = new IndexFeatureWeight[blockSize]; for (i = 0; i < blockSize; i++) currentB[i] = new IndexFeatureWeight(0, Long.MAX_VALUE, 0); currentBpointers = new int[blockSize]; }
From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java
License:Apache License
public static Reader getReader(JobConf conf) throws IOException { boolean oneMap = conf.getBoolean(Config.SINGLE_MAP_PROPERTY, Config.SINGLE_MAP_VALUE); boolean splittable = conf.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE); if (!oneMap || splittable) return new Reader(conf, new Path(conf.get("map.input.file")), conf.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE)); else// ww w .ja v a 2 s . c om return new OneMapReader(conf, new Path(conf.get("map.input.file")), conf.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE)); }
From source file:edu.ucsb.cs.partitioning.cosine.Organizer.java
License:Apache License
public static void readCombineCopy(Path input, String output, JobConf job) throws IOException { boolean printDist = job.getBoolean(Config.PRINT_DISTRIBUTION_PROPERTY, Config.PRINT_DISTRIBUTION_VALUE); BufferedWriter distout = null; SequenceFile.Writer out = null; if (printDist) distout = new BufferedWriter(new FileWriter("p-norm-distribution" + output)); int pc = 0, pr = 0; float pChoice = job.getFloat(NormSortMain.P_NORM_PROPERTY, NormSortMain.P_NORM_VALUE); FileSystem hdfs = input.getFileSystem(new JobConf()); FileStatus[] files = Partitioner.setFiles(hdfs, input); ArrayList<String> partitions = arrangeNames(files); for (int i = 0; i < partitions.size(); i++) { Path inputPath = new Path(input.toString() + "/" + partitions.get(i)); if (hdfs.isDirectory(inputPath)) continue; SequenceFile.Reader in = new SequenceFile.Reader(hdfs, inputPath, job); if (!isCombined(pr, pc, getRow(inputPath.getName()), getCol(inputPath.getName()), partitions)) { if (out != null) out.close();/*ww w.j a v a2 s. com*/ pr = getRow(inputPath.getName()); pc = getCol(inputPath.getName()); out = SequenceFile.createWriter(hdfs, job, new Path(output + "/" + inputPath.getName()), LongWritable.class, FeatureWeightArrayWritable.class, SequenceFile.CompressionType.NONE); } while (in.next(unused, document)) { out.append(new LongWritable(document.id), new FeatureWeightArrayWritable(document.vectorSize, document.vector)); if (printDist) distout.write(document.getPNorm(pChoice) + " \n"); } in.close(); } if (out != null) out.close(); }