List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass)
From source file:edu.umn.cs.spatialHadoop.operations.Within.java
License:Open Source License
public static <S extends Shape> long within(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Within.class); LOG.info("Within journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {/* w w w.ja v a 2 s. co m*/ outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Within"); job.setMapperClass(WithinMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(WithinReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.RandomSpatialGenerator.java
License:Open Source License
private static void generateMapReduce(Path outFile, OperationsParams params) throws IOException { JobConf job = new JobConf(params, RandomSpatialGenerator.class); job.setJobName("Generator"); Shape shape = params.getShape("shape"); FileSystem outFs = outFile.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); // Set input format and map class job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Repartition.RepartitionMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); String sindex = params.get("sindex"); Rectangle mbr = params.getShape("mbr").getMBR(); CellInfo[] cells;//from www . ja va 2 s . c o m if (sindex == null) { cells = new CellInfo[] { new CellInfo(1, mbr) }; } else if (sindex.equals("grid")) { GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); FileSystem fs = outFile.getFileSystem(job); long blocksize = fs.getDefaultBlockSize(outFile); long size = params.getSize("size"); int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, outFile, blocksize); gridInfo.calculateCellDimensions(numOfCells); cells = gridInfo.getAllCells(); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cells); // Do not set a reduce function. Use the default identity reduce function if (cells.length == 1) { // All objects are in one partition. No need for a reduce phase job.setNumReduceTasks(0); } else { // More than one partition. Need a reduce phase to group shapes of the // same partition together job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); } // Set output path FileOutputFormat.setOutputPath(job, outFile); if (sindex == null || sindex.equals("grid")) { job.setOutputFormat(GridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobClient.runJob(job); // TODO move the following part to OutputCommitter // Concatenate all master files into one file FileStatus[] resultFiles = outFs.listStatus(outFile, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains("_master"); } }); String ext = resultFiles[0].getPath().getName() .substring(resultFiles[0].getPath().getName().lastIndexOf('.')); Path masterPath = new Path(outFile, "_master" + ext); OutputStream destOut = outFs.create(masterPath); byte[] buffer = new byte[4096]; for (FileStatus f : resultFiles) { InputStream in = outFs.open(f.getPath()); int bytes_read; do { bytes_read = in.read(buffer); if (bytes_read > 0) destOut.write(buffer, 0, bytes_read); } while (bytes_read > 0); in.close(); outFs.delete(f.getPath(), false); } destOut.close(); }
From source file:edu.umn.cs.spatialHadoop.temporal.RepartitionTemporal.java
License:Apache License
public static void repartitionMapReduce(Path[] inputPaths, Path outputPath, OperationsParams params) throws IOException, InterruptedException { String sindex = params.get("sindex"); boolean overwrite = params.getBoolean("overwrite", false); Shape stockShape = params.getShape("shape"); FileSystem outFs = outputPath.getFileSystem(params); @SuppressWarnings("deprecation") final long blockSize = outFs.getDefaultBlockSize(); // Calculate the dimensions of each partition based on gindex type CellInfo[] cellInfos;// w w w. ja v a 2s.c o m if (sindex.equals("grid")) { Rectangle inputMBR = FileMBR.fileMBR(inputPaths[0], params); long inputFileSize = FileMBR.sizeOfLastProcessedFile; for (int i = 1; i < inputPaths.length; i++) { Rectangle currentInputMBR = FileMBR.fileMBR(inputPaths[i], params); inputMBR.expand(currentInputMBR); inputFileSize = inputFileSize + FileMBR.sizeOfLastProcessedFile; } int num_partitions = calculateNumberOfPartitions(new Configuration(), inputFileSize, outFs, outputPath, blockSize); GridInfo gridInfo = new GridInfo(inputMBR.x1, inputMBR.y1, inputMBR.x2, inputMBR.y2); gridInfo.calculateCellDimensions(num_partitions); cellInfos = gridInfo.getAllCells(); } else if (sindex.equals("rtree") || sindex.equals("r+tree") || sindex.equals("str") || sindex.equals("str+")) { // Pack in rectangles using an RTree cellInfos = packInRectangles(inputPaths, outputPath, params, null); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobConf job = new JobConf(params, RepartitionTemporal.class); job.setJobName("RepartitionTemporal"); // Overwrite output file if (outFs.exists(outputPath)) { if (overwrite) outFs.delete(outputPath, true); else throw new RuntimeException( "Output file '" + outputPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global // index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid, str+, and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); CombinedSpatialInputFormat.setInputPaths(job, inputPaths); job.setInputFormat(CombinedSpatialInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outputPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); JobClient.runJob(job); }
From source file:edu.umn.cs.spatialHadoop.temporal.RepartitionTemporal.java
License:Apache License
public static void repartitionMapReduce(Path[] inputPaths, Path outputPath, Shape stockShape, long blockSize, CellInfo[] cellInfos, String sindex, boolean overwrite) throws IOException { JobConf job = new JobConf(Repartition.class); job.setJobName("RepartitionTemporal"); FileSystem outFs = outputPath.getFileSystem(job); // Overwrite output file if (outFs.exists(outputPath)) { if (overwrite) outFs.delete(outputPath, true); else//from www. j av a 2s . c o m throw new RuntimeException( "Output file '" + outputPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global // index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); CombinedSpatialInputFormat.setInputPaths(job, inputPaths); job.setInputFormat(CombinedSpatialInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outputPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); if (blockSize != 0) { job.setLong("dfs.block.size", blockSize); job.setLong("fs.local.block.size", blockSize); } JobClient.runJob(job); }
From source file:edu.yale.cs.hadoopdb.dataloader.GlobalHasher.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("GlobalHasher"); conf.setMapOutputKeyClass(UnsortableInt.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(GlobalHasher.Map.class); conf.setReducerClass(GlobalHasher.Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 5) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }//from w w w . j av a2 s . c om FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); int partNo = Integer.parseInt(args[2]); conf.setNumReduceTasks(partNo); conf.set(DELIMITER_PARAM, args[3]); int hashFieldPos = Integer.parseInt(args[4]); conf.setInt(HASH_FIELD_POS_PARAM, hashFieldPos); return conf; }
From source file:findstableweatherstate.FindStableWeatherState.java
public String call() throws Exception { Path firstOutputPath = new Path("input/firstOutput"); Path secondOutputPath = new Path("input/secondOutput"); long startTime, stopTime, elapsedTime; JobConf job = new JobConf(); job.setJarByClass(getClass());/*from www . j av a2 s .c o m*/ job.setJobName("invertedindex"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setReducerClass(JoinReducer.class); MultipleInputs.addInputPath(job, new Path(getInputPathStation()), TextInputFormat.class, StationMapper.class); MultipleInputs.addInputPath(job, new Path(getInputPathReadings()), TextInputFormat.class, ReadingsMapper.class); FileOutputFormat.setOutputPath(job, firstOutputPath); JobConf job2 = new JobConf(); job2.setJarByClass(getClass()); job2.setJobName("secondJob"); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); //job2.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); FileInputFormat.setInputPaths(job2, firstOutputPath); job2.setMapperClass(CalculateMinMaxTemperatureMapper.class); job2.setReducerClass(CalculateMaxMinTemperatureReducer.class); if (getOutputPath() != null) { FileOutputFormat.setOutputPath(job2, secondOutputPath); } JobConf job3 = new JobConf(); job3.setJarByClass(getClass()); job3.setJobName("thirdJob"); job3.setOutputKeyClass(Text.class); job3.setOutputValueClass(Text.class); job3.setMapOutputKeyClass(DoubleWritable.class); job3.setMapOutputValueClass(Text.class); //job2.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); FileInputFormat.setInputPaths(job3, secondOutputPath); job3.setMapperClass(SortStateMapper.class); job3.setReducerClass(SortStateReducer.class); if (getOutputPath() != null) { FileOutputFormat.setOutputPath(job3, new Path(getOutputPath())); } startTime = System.currentTimeMillis(); JobClient.runJob(job); stopTime = System.currentTimeMillis(); elapsedTime = stopTime - startTime; System.out.println("******************** First Job : " + elapsedTime / 1000); startTime = System.currentTimeMillis(); JobClient.runJob(job2); stopTime = System.currentTimeMillis(); elapsedTime = stopTime - startTime; System.out.println("******************** Second Job : " + elapsedTime / 1000); startTime = System.currentTimeMillis(); JobClient.runJob(job3); stopTime = System.currentTimeMillis(); elapsedTime = stopTime - startTime; System.out.println("******************** Third Job : " + elapsedTime / 1000); return ""; }
From source file:findstableweatherstate.FindStableWeatherState.java
@SuppressWarnings("unchecked") // <editor-fold defaultstate="collapsed" // desc="Generated Code">//GEN-BEGIN:initJobConf public static void initJobConf(JobConf conf) { // Generating code using Karmasphere Protocol for Hadoop 0.18 // CG_GLOBAL/* www .j a va 2s .co m*/ // CG_INPUT_HIDDEN conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); // CG_MAPPER_HIDDEN conf.setMapperClass(StationMapper.class); // CG_MAPPER conf.setMapOutputKeyClass(org.apache.hadoop.io.Text.class); //conf.setReducerClass(IndexReducer.class); // CG_REDUCER // conf.setNumReduceTasks(1); conf.setOutputKeyClass(Text.class); // CG_OUTPUT_HIDDEN conf.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); conf.setMapOutputValueClass(Text.class); conf.setOutputValueClass(Text.class); // CG_OUTPUT // Others }
From source file:gov.nih.ncgc.hadoop.BioIsostere.java
public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), BioIsostere.class); jobConf.setJobName(BioIsostere.class.getSimpleName()); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(IntWritable.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(MoleculePairWritable.class); jobConf.setMapperClass(BioisostereMapper.class); jobConf.setReducerClass(MoleculePairReducer.class); // jobConf.setInputFormat(TextInputFormat.class); jobConf.setInt("mapred.line.input.format.linespermap", 10); jobConf.setInputFormat(NLineInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); if (args.length != 3) { System.err.println("Usage: bisos <datafile> <out> <license file>"); System.exit(2);/*w ww .ja va 2 s . com*/ } FileInputFormat.setInputPaths(jobConf, new Path(args[0])); FileOutputFormat.setOutputPath(jobConf, new Path(args[1])); // make the license file available vis dist cache DistributedCache.addCacheFile(new Path(args[2]).toUri(), jobConf); long start = System.currentTimeMillis(); JobClient.runJob(jobConf); double duration = (System.currentTimeMillis() - start) / 1000.0; System.out.println("Total runtime was " + duration + "s"); return 0; }
From source file:gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.java
public static void main(String[] args) { //paths and input handling Path inputRatingsPath = new Path(args[0]); //movieid, userid, rating (text files) Path job1OutputPath = new Path("/user/hduser/partialResults"); Path partialDistancesPath = new Path("/user/hduser/partialResults/part-*"); //member_nonMember \t partialDistance (sequence files) Path candidateMoviesPath = new Path("/user/hduser/partialResults/candidateMovies-*"); //candidateMovieId, nonMemberUserId_rating (text files) Path userSimilaritiesPath = new Path("/user/hduser/userSimilarities"); //similarity of each group member to his friends (text files) Path finalScoresPath = new Path(args[1]); //movieId \t outputScore int numReduceTasks = 56; //defaultValue if (args.length == 3) { numReduceTasks = Integer.parseInt(args[2]); }//from w w w.ja va2s . com final float friendsSimThresh = 0.8f; String groupFilePath = "/user/hduser/group.txt"; //one-line csv file with user ids (text file) if (args.length < 2 || args.length > 3) { System.err.println( "Incorrect input. Example usage: hadoop jar ~/GroupRecs/MultiUser.jar inputPath outputPath [numReduceTasks]"); return; } //JOB 1// JobClient client = new JobClient(); JobConf conf = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); try { FileSystem fs = FileSystem.get(conf); if (fs.exists(job1OutputPath)) { fs.delete(job1OutputPath, true); } if (fs.exists(userSimilaritiesPath)) { fs.delete(userSimilaritiesPath, true); } if (fs.exists(finalScoresPath)) { fs.delete(finalScoresPath, true); } } catch (IOException ex) { Logger.getLogger(MultiUserMain.class.getName()).log(Level.SEVERE, null, ex); } conf.setJobName("Multi-user approach - Job 1"); System.out.println("Starting Job 1 (Multi-user approach)..."); conf.setMapOutputKeyClass(VIntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ByteWritable.class); conf.setInputFormat(TextInputFormat.class); //conf.setOutputFormat(TextOutputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); FileInputFormat.setInputPaths(conf, inputRatingsPath); //user ratings FileOutputFormat.setOutputPath(conf, job1OutputPath); //partial distances MultipleOutputs.addNamedOutput(conf, "candidateMovies", SequenceFileOutputFormat.class, VIntWritable.class, Text.class); //movieId, userId_rating conf.setMapperClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Mapper.class); conf.setReducerClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Reducer.class); conf.setNumReduceTasks(numReduceTasks); try { DistributedCache.addCacheFile(new URI(groupFilePath), conf); // group } catch (URISyntaxException e1) { System.err.println(e1.toString()); } conf.setInt("mapred.task.timeout", 6000000); client.setConf(conf); RunningJob job; try { job = JobClient.runJob(conf); job.waitForCompletion(); } catch (Exception e) { System.err.println(e); } //JOB 2// System.out.println("Starting Job 2 (Multi-user approach)..."); JobClient client2 = new JobClient(); JobConf conf2 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); conf2.setJobName("Multi-user approach - Job 2"); conf2.setMapOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends conf2.setMapOutputValueClass(ByteWritable.class);//similarity part unsquared conf2.setOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends conf2.setOutputValueClass(DoubleWritable.class);//similarity conf2.setInputFormat(SequenceFileInputFormat.class); //conf2.setInputFormat(TextInputFormat.class); conf2.setOutputFormat(TextOutputFormat.class); //conf2.setOutputFormat(SequenceFileOutputFormat.class); //SequenceFileOutputFormat.setOutputCompressionType(conf2, SequenceFile.CompressionType.BLOCK); FileInputFormat.setInputPaths(conf2, partialDistancesPath); //Job 1 output FileOutputFormat.setOutputPath(conf2, userSimilaritiesPath); //Job 2 output (similarity of each group member to his friends) conf2.setMapperClass(IdentityMapper.class); conf2.setReducerClass(gr.forth.ics.isl.grouprecsmr.job2.Job2ReducerMulti.class); int numSimilaritiesPartitions = numReduceTasks; conf2.setNumReduceTasks(numSimilaritiesPartitions); conf2.setFloat("friendsSimThreshold", friendsSimThresh); conf2.setInt("mapred.task.timeout", 6000000); conf2.set("io.sort.mb", "500"); client2.setConf(conf2); RunningJob job2; try { job2 = JobClient.runJob(conf2); job2.waitForCompletion(); } catch (Exception e) { System.err.println(e); } //JOB 3// System.out.println("Starting Job 3 (Multi-user approach)..."); JobClient client3 = new JobClient(); JobConf conf3 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); conf3.setJobName("Multi-user approach - Job 3"); conf3.setMapOutputKeyClass(VIntWritable.class); conf3.setMapOutputValueClass(Text.class); conf3.setOutputKeyClass(VIntWritable.class); conf3.setOutputValueClass(DoubleWritable.class); conf3.setInputFormat(SequenceFileInputFormat.class); //conf3.setInputFormat(TextInputFormat.class); conf3.setOutputFormat(TextOutputFormat.class); //conf3.setOutputFormat(SequenceFileOutputFormat.class); //SequenceFileOutputFormat.setOutputCompressionType(conf3,SequenceFile.CompressionType.BLOCK); try { DistributedCache.addCacheFile(new URI(groupFilePath), conf3); } catch (URISyntaxException ex) { System.err.println("Could not add group file to distributed cache. " + ex); } for (int i = 0; i < numSimilaritiesPartitions; i++) { String reduceId = String.format("%05d", i); //5-digit int with leading try { DistributedCache.addCacheFile(new URI(userSimilaritiesPath.toString() + "/part-" + reduceId), conf3); } catch (URISyntaxException ex) { System.err.println("Could not add similarities files to distributed cache. " + ex); } } FileInputFormat.setInputPaths(conf3, candidateMoviesPath); //Job 1 output (candidate movies) FileOutputFormat.setOutputPath(conf3, finalScoresPath); //Job 3 output (movie \t outputScore) // conf3.setMapperClass(IdentityMapper.class); conf3.setMapperClass(gr.forth.ics.isl.grouprecsmr.job3.Job3MapperMulti.class); //filtering out ratings from non-Friends conf3.setReducerClass(gr.forth.ics.isl.grouprecsmr.job3.Job3ReducerMulti.class); conf3.setInt("mapred.task.timeout", 6000000); conf3.set("io.sort.mb", "500"); conf3.setNumReduceTasks(numReduceTasks); client3.setConf(conf3); RunningJob job3; try { job3 = JobClient.runJob(conf3); job3.waitForCompletion(); } catch (Exception e) { System.err.println(e); } }
From source file:graphbuilding.GenomixDriver.java
License:Apache License
public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath) throws IOException { JobConf conf = new JobConf(GenomixDriver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }/*from w w w . ja v a2s. c o m*/ conf.setJobName("Genomix Graph Building"); conf.setMapperClass(GenomixMapper.class); conf.setReducerClass(GenomixReducer.class); conf.setCombinerClass(GenomixCombiner.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(KmerCountValue.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Kmer.class); conf.setOutputValueClass(KmerCountValue.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); }