List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name, String defaultValue)
name
. From source file:edu.uci.ics.fuzzyjoin.hadoop.ridrecordpairs.token.ReduceVerifyListSelfJoin.java
License:Apache License
@Override public void configure(JobConf job) { //// w w w. ja v a2s.c om // set SimilarityFilters // String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE); similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY, FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE); similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold); similarityMetric = SimilarityMetricFactory.getSimilarityMetric(similarityName); }
From source file:edu.uci.ics.fuzzyjoin.hadoop.tokens.array.Map.java
License:Apache License
@Override public void configure(JobConf job) { tokenizer = TokenizerFactory.getTokenizer( job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE), FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR); IntWritable one = new IntWritable(1); countArray.set(new IntWritable[] { one }); countLengthArray.set(new IntWritable[] { one, new IntWritable() }); //// w w w. j av a 2s .c o m // set dataColumn // dataColumns = FuzzyJoinUtil .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE)); }
From source file:edu.uci.ics.fuzzyjoin.hadoop.tokens.scalar.Map.java
License:Apache License
@Override public void configure(JobConf job) { tokenizer = TokenizerFactory.getTokenizer( job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE), FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR); ///*from w w w .j a v a 2s . c o m*/ // set dataColumn // dataColumns = FuzzyJoinUtil .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE)); }
From source file:edu.ucsb.cs.partitioning.PartDriver.java
License:Apache License
public static void main(String args[]) throws UnsupportedEncodingException { int exitCode = -1; ProgramDriver pgd = new ProgramDriver(); try {// www. j a v a2s .c o m JobConf job = new JobConf(); new GenericOptionsParser(job, args); String metric = job.get(Config.METRIC_PROPERTY, Config.METRIC_VALUE).toLowerCase(); if (metric.contains("j")) { JaccardCoarsePartitionMain.main(args); } else HolderCosinePartitionMain.main(args); // // pgd.addClass("cpartitionw", CosineWeightPartitionMain.class, // // "\tCosine static partitioning on weight sorted documents"); // pgd.addClass("cpartitiona", CosineAllPartitionMain.class, // "\tCosine static partitioning on ALL sorted documents"); //pgd.driver(args); exitCode = 0; } catch (Throwable e) { e.printStackTrace(); } System.exit(exitCode); }
From source file:edu.umn.cs.spatialHadoop.core.GridRecordWriter.java
License:Open Source License
/** * Creates a new GridRecordWriter that will write all data files to the * given directory/*w ww. java 2 s . c om*/ * @param outDir The directory in which all files will be stored * @param job The MapReduce job associated with this output * @param prefix A unique prefix to be associated with files of this writer * @param cells Cells to partition the file * @throws IOException */ public GridRecordWriter(Path outDir, JobConf job, String prefix, CellInfo[] cells) throws IOException { if (job != null) { this.sindex = job.get("sindex", "heap"); this.pack = PackedIndexes.contains(sindex); this.expand = ExpandedIndexes.contains(sindex); } this.prefix = prefix; this.fileSystem = outDir == null ? FileOutputFormat.getOutputPath(job).getFileSystem(job) : outDir.getFileSystem(job != null ? job : new Configuration()); this.outDir = outDir; this.jobConf = job; if (cells != null) { // Make sure cellIndex maps to array index. This is necessary for calls that // call directly write(int, Text) int highest_index = 0; for (CellInfo cell : cells) { if (cell.cellId > highest_index) highest_index = (int) cell.cellId; } // Create a master file that contains meta information about partitions masterFile = fileSystem.create(getMasterFilePath()); this.cells = new CellInfo[highest_index + 1]; for (CellInfo cell : cells) this.cells[(int) cell.cellId] = cell; // Prepare arrays that hold cells information intermediateCellStreams = new OutputStream[this.cells.length]; intermediateCellPath = new Path[this.cells.length]; cellsMbr = new Rectangle[this.cells.length]; // Initialize the counters for each cell intermediateCellRecordCount = new int[this.cells.length]; intermediateCellSize = new int[this.cells.length]; } else { intermediateCellStreams = new OutputStream[1]; intermediateCellPath = new Path[1]; cellsMbr = new Rectangle[1]; intermediateCellSize = new int[1]; intermediateCellRecordCount = new int[1]; } for (int i = 0; i < cellsMbr.length; i++) { cellsMbr[i] = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); } this.blockSize = fileSystem.getDefaultBlockSize(outDir); closingThreads = new ArrayList<Thread>(); text = new Text(); }
From source file:edu.umn.cs.spatialHadoop.operations.PyramidPlot.java
License:Apache License
/** * Plot a file to a set of images in different zoom levels using a MapReduce * program.// w w w . ja v a2s . co m * @param <S> type of shapes stored in file * @param inFile - Path to the input file(s) * @param outFile - Path to the output file (image) * @param shape - A sample object to be used for parsing input file * @param tileWidth - With of each tile * @param tileHeight - Height of each tile * @param vflip - Set to <code>true</code> to file the whole image vertically * @param color - Color used to draw single shapes * @param numLevels - Number of zoom levels to plot * @throws IOException */ private static <S extends Shape> RunningJob plotMapReduce(Path inFile, Path outFile, OperationsParams params) throws IOException { Color color = params.getColor("color", Color.BLACK); String hdfDataset = (String) params.get("dataset"); Shape shape = hdfDataset != null ? new NASARectangle() : params.getShape("shape"); Shape plotRange = params.getShape("rect"); boolean background = params.is("background"); JobConf job = new JobConf(params, PyramidPlot.class); job.setJobName("PlotPyramid"); String partition = job.get("partition", "space").toLowerCase(); if (partition.equals("space")) { job.setMapperClass(SpacePartitionMap.class); job.setReducerClass(SpacePartitionReduce.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(shape.getClass()); job.setInputFormat(ShapeInputFormat.class); } else { job.setMapperClass(DataPartitionMap.class); job.setReducerClass(DataPartitionReduce.class); job.setMapOutputKeyClass(TileIndex.class); job.setMapOutputValueClass(ImageWritable.class); job.setInputFormat(ShapeArrayInputFormat.class); } job.setInt("color", color.getRGB()); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); if (shape instanceof Point && job.getBoolean("sample", false)) { // Enable adaptive sampling int imageWidthRoot = job.getInt("tilewidth", 256); int imageHeightRoot = job.getInt("tileheight", 256); long recordCount = FileMBR.fileMBR(inFile, params).recordCount; float sampleRatio = params.getFloat(GeometricPlot.AdaptiveSampleFactor, 1.0f) * imageWidthRoot * imageHeightRoot / recordCount; job.setFloat(GeometricPlot.AdaptiveSampleRatio, sampleRatio); } Rectangle fileMBR; if (hdfDataset != null) { // Input is HDF job.set(HDFRecordReader.DatasetName, hdfDataset); job.setBoolean(HDFRecordReader.SkipFillValue, true); job.setClass("shape", NASARectangle.class, Shape.class); // Determine the range of values by opening one of the HDF files Aggregate.MinMax minMax = Aggregate.aggregate(new Path[] { inFile }, params); job.setInt(MinValue, minMax.minValue); job.setInt(MaxValue, minMax.maxValue); //fileMBR = new Rectangle(-180, -90, 180, 90); fileMBR = plotRange != null ? plotRange.getMBR() : new Rectangle(-180, -140, 180, 169); // job.setClass(HDFRecordReader.ProjectorClass, MercatorProjector.class, // GeoProjector.class); } else { fileMBR = FileMBR.fileMBR(inFile, params); } boolean keepAspectRatio = params.is("keep-ratio", true); if (keepAspectRatio) { // Expand input file to a rectangle for compatibility with the pyramid // structure if (fileMBR.getWidth() > fileMBR.getHeight()) { fileMBR.y1 -= (fileMBR.getWidth() - fileMBR.getHeight()) / 2; fileMBR.y2 = fileMBR.y1 + fileMBR.getWidth(); } else { fileMBR.x1 -= (fileMBR.getHeight() - fileMBR.getWidth() / 2); fileMBR.x2 = fileMBR.x1 + fileMBR.getHeight(); } } SpatialSite.setRectangle(job, InputMBR, fileMBR); // Set input and output ShapeInputFormat.addInputPath(job, inFile); if (plotRange != null) { job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); } job.setOutputFormat(PyramidOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); job.setOutputCommitter(PlotPyramidOutputCommitter.class); if (background) { JobClient jc = new JobClient(job); return lastSubmittedJob = jc.submitJob(job); } else { return lastSubmittedJob = JobClient.runJob(job); } }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java
License:Apache License
public static String getDGenInstallDir(JobConf conf) { String nodePath = conf.get("mapred.myriad.dgen.node.path", ""); if (nodePath == "") { throw new IllegalArgumentException("Bad `mapred.myriad.dgen.node.path` parameter value"); }// w w w.j a v a 2 s . com return nodePath; }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java
License:Apache License
public static String getStage(JobConf conf) { String stage = conf.get("mapred.myriad.dgen.stage", ""); if (stage == "") { throw new IllegalArgumentException("Bad `mapred.myriad.dgen.stage` parameter value"); }/*from ww w. ja va 2s.c o m*/ return stage; }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java
License:Apache License
public static String getOutputBase(JobConf conf) { String nodePath = conf.get("mapred.myriad.dgen.output.base", ""); if (nodePath == "") { throw new IllegalArgumentException("Bad `mapred.myriad.dgen.output.base` parameter value"); }/* w ww . j ava 2s .co m*/ return nodePath; }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java
License:Apache License
public static String getDatasetID(JobConf conf) { String nodePath = conf.get("mapred.myriad.dgen.dataset.id", ""); if (nodePath == "") { throw new IllegalArgumentException("Bad `mapred.myriad.dgen.dataset.id` parameter value"); }/* ww w . j a v a 2s. co m*/ return nodePath; }