List of usage examples for org.apache.hadoop.mapreduce Job isSuccessful
public boolean isSuccessful() throws IOException
From source file:edu.umn.cs.spatialHadoop.operations.ClosestPair.java
License:Open Source License
public static Job closestPairMapReduce(Path[] inPaths, Path outPath, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "Closest Pair"); job.setJarByClass(ClosestPair.class); Shape shape = params.getShape("shape"); // Set map and reduce job.setMapperClass(ClosestPairMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(ClosestPairReduce.class); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPaths); job.setOutputFormatClass(ClosestPairOutputFormat.class); TextOutputFormat.setOutputPath(job, outPath); // Set column boundaries to define the boundaries of each reducer SpatialSite.splitReduceSpace(job, inPaths, params); // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(params.getBoolean("verbose", false)); if (!job.isSuccessful()) throw new RuntimeException("Job failed!"); } else {//from ww w. jav a 2s .c o m job.submit(); } return job; }
From source file:edu.umn.cs.spatialHadoop.operations.RangeQuery.java
License:Open Source License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); final Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();/*ww w. ja v a 2 s . c o m*/ System.exit(1); } if (paths.length >= 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("rect") == null) { System.err.println("You must provide a query range"); printUsage(); System.exit(1); } final Path inPath = params.getInputPath(); final Path outPath = params.getOutputPath(); final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle()); // All running jobs final Vector<Long> resultsCounts = new Vector<Long>(); Vector<Job> jobs = new Vector<Job>(); Vector<Thread> threads = new Vector<Thread>(); long t1 = System.currentTimeMillis(); for (int i = 0; i < queryRanges.length; i++) { final OperationsParams queryParams = new OperationsParams(params); OperationsParams.setShape(queryParams, "rect", queryRanges[i]); if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) { // Run in local mode final Rectangle queryRange = queryRanges[i]; final Shape shape = queryParams.getShape("shape"); final Path output = outPath == null ? null : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i))); Thread thread = new Thread() { @Override public void run() { FSDataOutputStream outFile = null; final byte[] newLine = System.getProperty("line.separator", "\n").getBytes(); try { ResultCollector<Shape> collector = null; if (output != null) { FileSystem outFS = output.getFileSystem(queryParams); final FSDataOutputStream foutFile = outFile = outFS.create(output); collector = new ResultCollector<Shape>() { final Text tempText = new Text2(); @Override public synchronized void collect(Shape r) { try { tempText.clear(); r.toText(tempText); foutFile.write(tempText.getBytes(), 0, tempText.getLength()); foutFile.write(newLine); } catch (IOException e) { e.printStackTrace(); } } }; } else { outFile = null; } long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams, collector); resultsCounts.add(resultCount); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } finally { try { if (outFile != null) outFile.close(); } catch (IOException e) { e.printStackTrace(); } } } }; thread.start(); threads.add(thread); } else { // Run in MapReduce mode queryParams.setBoolean("background", true); Job job = rangeQueryMapReduce(inPath, outPath, queryParams); jobs.add(job); } } while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); System.exit(1); } Counters counters = firstJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); resultsCounts.add(outputRecordCounter.getValue()); jobs.remove(0); } while (!threads.isEmpty()) { try { Thread thread = threads.firstElement(); thread.join(); threads.remove(0); } catch (InterruptedException e) { e.printStackTrace(); } } long t2 = System.currentTimeMillis(); System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Results counts: " + resultsCounts); }
From source file:edu.umn.cs.spatialHadoop.operations.UltimateUnion.java
License:Open Source License
private static Job ultimateUnionMapReduce(Path input, Path output, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "UltimateUnion"); job.setJarByClass(UltimateUnion.class); // Set map and reduce job.setMapperClass(UltimateUnionMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(OGCJTSShape.class); job.setNumReduceTasks(0);//from ww w . j a v a 2 s . c om // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.addInputPath(job, input); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, output); // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(false); if (!job.isSuccessful()) throw new RuntimeException("Job failed!"); } else { job.submit(); } return job; }
From source file:edu.umn.cs.spatialHadoop.operations.Union.java
License:Open Source License
private static Job unionMapReduce(Path input, Path output, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "BasicUnion"); job.setJarByClass(Union.class); // Set map and reduce job.setMapperClass(UnionMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(OGCJTSShape.class); job.setReducerClass(UnionReduce.class); SpatialSite.splitReduceSpace(job, new Path[] { input }, params); // Set input and output job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.addInputPath(job, input); job.setOutputFormatClass(UnionOutputFormat.class); TextOutputFormat.setOutputPath(job, output); // Submit the job if (!params.getBoolean("background", false)) { job.waitForCompletion(false);//from ww w. j a va 2 s . co m if (!job.isSuccessful()) throw new RuntimeException("Job failed!"); } else { job.submit(); } return job; }
From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java
License:Open Source License
public static void rangeQueryOperation(OperationsParams parameters) throws Exception { final OperationsParams params = parameters; final Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();//from w w w . j ava 2 s. c om System.exit(1); } if (paths.length >= 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("rect") == null) { String x1 = "-" + Double.toString(Double.MAX_VALUE); String y1 = "-" + Double.toString(Double.MAX_VALUE); String x2 = Double.toString(Double.MAX_VALUE); String y2 = Double.toString(Double.MAX_VALUE); System.out.println(x1 + "," + y1 + "," + x2 + "," + y2); params.set("rect", x1 + "," + y1 + "," + x2 + "," + y2); // System.err.println("You must provide a query range"); // printUsage(); // System.exit(1); } if (params.get("interval") == null) { System.err.println("Temporal range missing"); printUsage(); System.exit(1); } TextSerializable inObj = params.getShape("shape"); if (!(inObj instanceof STPoint) && !(inObj instanceof STRectangle)) { LOG.error("Shape is not instance of STPoint or STRectangle"); printUsage(); System.exit(1); } // Get spatio-temporal slices. List<Path> STPaths = getIndexedSlices(params); final Path outPath = params.getOutputPath(); final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle()); // All running jobs final Vector<Long> resultsCounts = new Vector<Long>(); Vector<Job> jobs = new Vector<Job>(); Vector<Thread> threads = new Vector<Thread>(); long t1 = System.currentTimeMillis(); for (Path stPath : STPaths) { final Path inPath = stPath; for (int i = 0; i < queryRanges.length; i++) { final OperationsParams queryParams = new OperationsParams(params); OperationsParams.setShape(queryParams, "rect", queryRanges[i]); if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) { // Run in local mode final Rectangle queryRange = queryRanges[i]; final Shape shape = queryParams.getShape("shape"); final Path output = outPath == null ? null : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i))); Thread thread = new Thread() { @Override public void run() { FSDataOutputStream outFile = null; final byte[] newLine = System.getProperty("line.separator", "\n").getBytes(); try { ResultCollector<Shape> collector = null; if (output != null) { FileSystem outFS = output.getFileSystem(queryParams); final FSDataOutputStream foutFile = outFile = outFS.create(output); collector = new ResultCollector<Shape>() { final Text tempText = new Text2(); @Override public synchronized void collect(Shape r) { try { tempText.clear(); r.toText(tempText); foutFile.write(tempText.getBytes(), 0, tempText.getLength()); foutFile.write(newLine); } catch (IOException e) { e.printStackTrace(); } } }; } else { outFile = null; } long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams, collector); resultsCounts.add(resultCount); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } finally { try { if (outFile != null) outFile.close(); } catch (IOException e) { e.printStackTrace(); } } } }; thread.start(); threads.add(thread); } else { // Run in MapReduce mode Path outTempPath = outPath == null ? null : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName()); queryParams.setBoolean("background", true); Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams); jobs.add(job); } } } while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); System.exit(1); } Counters counters = firstJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); resultsCounts.add(outputRecordCounter.getValue()); jobs.remove(0); } while (!threads.isEmpty()) { try { Thread thread = threads.firstElement(); thread.join(); threads.remove(0); } catch (InterruptedException e) { e.printStackTrace(); } } long t2 = System.currentTimeMillis(); System.out.println("QueryPlan:"); for (Path stPath : STPaths) { System.out.println(stPath.getName()); } System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Results counts: " + resultsCounts); }
From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java
License:Open Source License
public static void main(String[] args) throws Exception { // args = new String[7]; // args[0] = "/home/louai/nyc-taxi/yellowIndex"; // args[1] = "/home/louai/nyc-taxi/resultSTRQ"; // args[2] = "shape:edu.umn.cs.sthadoop.core.STPoint"; // args[3] = "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391"; // args[4] = "interval:2015-01-01,2015-01-02"; // args[5] = "-overwrite"; // args[6] = "-no-local"; // Query for test with output // args = new String[6]; // args[0] = "/home/louai/nyc-taxi/yellowIndex"; // args[1] = "shape:edu.umn.cs.sthadoop.core.STPoint"; // args[2] = "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391"; // args[3] = "interval:2015-01-01,2015-01-03"; // args[4] = "-overwrite"; // args[5 ] = "-no-local"; final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); final Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();/*w w w .ja va2 s . c o m*/ System.exit(1); } if (paths.length >= 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("rect") == null) { String x1 = "-" + Double.toString(Double.MAX_VALUE); String y1 = "-" + Double.toString(Double.MAX_VALUE); String x2 = Double.toString(Double.MAX_VALUE); String y2 = Double.toString(Double.MAX_VALUE); System.out.println(x1 + "," + y1 + "," + x2 + "," + y2); params.set("rect", x1 + "," + y1 + "," + x2 + "," + y2); // System.err.println("You must provide a query range"); // printUsage(); // System.exit(1); } if (params.get("interval") == null) { System.err.println("Temporal range missing"); printUsage(); System.exit(1); } TextSerializable inObj = params.getShape("shape"); if (!(inObj instanceof STPoint) && !(inObj instanceof STRectangle)) { LOG.error("Shape is not instance of STPoint or STRectangle"); printUsage(); System.exit(1); } // Get spatio-temporal slices. List<Path> STPaths = getIndexedSlices(params); final Path outPath = params.getOutputPath(); final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle()); // All running jobs final Vector<Long> resultsCounts = new Vector<Long>(); Vector<Job> jobs = new Vector<Job>(); Vector<Thread> threads = new Vector<Thread>(); long t1 = System.currentTimeMillis(); for (Path stPath : STPaths) { final Path inPath = stPath; for (int i = 0; i < queryRanges.length; i++) { final OperationsParams queryParams = new OperationsParams(params); OperationsParams.setShape(queryParams, "rect", queryRanges[i]); if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) { // Run in local mode final Rectangle queryRange = queryRanges[i]; final Shape shape = queryParams.getShape("shape"); final Path output = outPath == null ? null : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i))); Thread thread = new Thread() { @Override public void run() { FSDataOutputStream outFile = null; final byte[] newLine = System.getProperty("line.separator", "\n").getBytes(); try { ResultCollector<Shape> collector = null; if (output != null) { FileSystem outFS = output.getFileSystem(queryParams); final FSDataOutputStream foutFile = outFile = outFS.create(output); collector = new ResultCollector<Shape>() { final Text tempText = new Text2(); @Override public synchronized void collect(Shape r) { try { tempText.clear(); r.toText(tempText); foutFile.write(tempText.getBytes(), 0, tempText.getLength()); foutFile.write(newLine); } catch (IOException e) { e.printStackTrace(); } } }; } else { outFile = null; } long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams, collector); resultsCounts.add(resultCount); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } finally { try { if (outFile != null) outFile.close(); } catch (IOException e) { e.printStackTrace(); } } } }; thread.start(); threads.add(thread); } else { // Run in MapReduce mode Path outTempPath = outPath == null ? null : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName()); queryParams.setBoolean("background", true); Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams); jobs.add(job); } } } while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); System.exit(1); } Counters counters = firstJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); resultsCounts.add(outputRecordCounter.getValue()); jobs.remove(0); } while (!threads.isEmpty()) { try { Thread thread = threads.firstElement(); thread.join(); threads.remove(0); } catch (InterruptedException e) { e.printStackTrace(); } } long t2 = System.currentTimeMillis(); System.out.println("QueryPlan:"); for (Path stPath : STPaths) { System.out.println(stPath.getName()); } System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Results counts: " + resultsCounts); }
From source file:edu.umn.cs.sthadoop.trajectory.TrajectoryOverlap.java
License:Open Source License
public static void main(String[] args) throws Exception { // args = new String[8]; // args[0] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/index_geolife"; // args[1] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/knn-dis-result"; // args[2] = "shape:edu.umn.cs.sthadoop.trajectory.GeolifeTrajectory"; // args[3] = "interval:2008-05-01,2008-05-30"; // args[4] = "time:month"; // args[5] = "traj:39.9119983,116.606835;39.9119783,116.6065483;39.9119599,116.6062649;39.9119416,116.6059899;39.9119233,116.6057282;39.9118999,116.6054783;39.9118849,116.6052366;39.9118666,116.6050099;39.91185,116.604775;39.9118299,116.604525;39.9118049,116.6042649;39.91177,116.6040166;39.9117516,116.6037583;39.9117349,116.6035066;39.9117199,116.6032666;39.9117083,116.6030232;39.9117,116.6027566;39.91128,116.5969383;39.9112583,116.5966766;39.9112383,116.5964232;39.9112149,116.5961699;39.9111933,116.5959249;39.9111716,116.5956883"; // args[6] = "-overwrite"; // args[7] = "-local";//"-no-local"; final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); final Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();//from w ww . ja v a 2 s. c o m System.exit(1); } if (paths.length >= 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("traj") == null) { System.err.println("Trajectory query is missing"); printUsage(); System.exit(1); } // Invoke method to compute the trajectory MBR. String rectangle = getTrajectoryRectangle(params.get("traj")); params.set("rect", rectangle); if (params.get("rect") == null) { System.err.println("You must provide a Trajectory Query"); printUsage(); System.exit(1); } if (params.get("interval") == null) { System.err.println("Temporal range missing"); printUsage(); System.exit(1); } TextSerializable inObj = params.getShape("shape"); if (!(inObj instanceof STPoint)) { LOG.error("Shape is not instance of STPoint"); printUsage(); System.exit(1); } // Get spatio-temporal slices. List<Path> STPaths = getIndexedSlices(params); final Path outPath = params.getOutputPath(); final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle()); // All running jobs final Vector<Long> resultsCounts = new Vector<Long>(); Vector<Job> jobs = new Vector<Job>(); Vector<Thread> threads = new Vector<Thread>(); long t1 = System.currentTimeMillis(); for (Path stPath : STPaths) { final Path inPath = stPath; for (int i = 0; i < queryRanges.length; i++) { final OperationsParams queryParams = new OperationsParams(params); OperationsParams.setShape(queryParams, "rect", queryRanges[i]); if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) { // Run in local mode final Rectangle queryRange = queryRanges[i]; final Shape shape = queryParams.getShape("shape"); final Path output = outPath == null ? null : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i))); Thread thread = new Thread() { @Override public void run() { FSDataOutputStream outFile = null; final byte[] newLine = System.getProperty("line.separator", "\n").getBytes(); try { ResultCollector<Shape> collector = null; if (output != null) { FileSystem outFS = output.getFileSystem(queryParams); final FSDataOutputStream foutFile = outFile = outFS.create(output); collector = new ResultCollector<Shape>() { final Text tempText = new Text2(); @Override public synchronized void collect(Shape r) { try { tempText.clear(); r.toText(tempText); foutFile.write(tempText.getBytes(), 0, tempText.getLength()); foutFile.write(newLine); } catch (IOException e) { e.printStackTrace(); } } }; } else { outFile = null; } long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams, collector); resultsCounts.add(resultCount); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } finally { try { if (outFile != null) outFile.close(); } catch (IOException e) { e.printStackTrace(); } } } }; thread.start(); threads.add(thread); } else { // Run in MapReduce mode Path outTempPath = outPath == null ? null : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName()); queryParams.setBoolean("background", true); Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams); jobs.add(job); } } } while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); System.exit(1); } Counters counters = firstJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); resultsCounts.add(outputRecordCounter.getValue()); jobs.remove(0); } while (!threads.isEmpty()) { try { Thread thread = threads.firstElement(); thread.join(); threads.remove(0); } catch (InterruptedException e) { e.printStackTrace(); } } long t2 = System.currentTimeMillis(); System.out.println("QueryPlan:"); for (Path stPath : STPaths) { System.out.println(stPath.getName()); } System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Results counts: " + resultsCounts); }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.upload.DataFileDistCp.java
License:LGPL
public void copy(final Map<DataFile, DataFile> entries) throws IOException { if (entries == null || entries.size() == 0) { return;// www. ja va 2 s . c o m } final Configuration conf = this.conf; final Path tmpInputDir = PathUtils.createTempPath(this.jobPath, "distcp-in-", "", conf); final Path tmpOutputDir = PathUtils.createTempPath(this.jobPath, "distcp-out-", "", conf); // // Create entries for distcp // final FileSystem fs = tmpInputDir.getFileSystem(conf); fs.mkdirs(tmpInputDir); // Sort files by size final List<DataFile> inFiles = Lists.newArrayList(entries.keySet()); sortInFilesByDescSize(inFiles); // Set the format for the id of the copy task final NumberFormat nf = NumberFormat.getInstance(); nf.setMinimumIntegerDigits(Integer.toString(inFiles.size()).length()); nf.setGroupingUsed(false); int count = 0; for (DataFile inFile : inFiles) { count++; final DataFile outFile = entries.get(inFile); final Path f = new Path(tmpInputDir, "distcp-" + nf.format(count) + ".cp"); getLogger().info("Task copy " + inFile + " in " + f.toString()); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(f), CHARSET)); bw.write(inFile.getSource() + "\t" + outFile.getSource() + "\n"); bw.close(); } final Job job = createJobConf(conf, tmpInputDir, tmpOutputDir); try { job.waitForCompletion(false); } catch (InterruptedException | ClassNotFoundException e) { throw new EoulsanRuntimeException("Error while distcp: " + e.getMessage(), e); } // Remove tmp directory PathUtils.fullyDelete(tmpInputDir, conf); PathUtils.fullyDelete(tmpOutputDir, conf); if (!job.isSuccessful()) { throw new IOException("Unable to copy files using DataFileDistCp."); } }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.MapReduceUtils.java
License:LGPL
/** * Wait the completion of a job./*from ww w . jav a 2 s. c om*/ * @param job the job to submit * @param jobDescription the description of the job * @param waitTimeInMillis waiting time between 2 checks of the completion of * jobs * @param status step status * @param counterGroup group of the counter to log * @throws EoulsanException if the job fail or if an exception occurs while * submitting or waiting the end of the job */ public static void submitAndWaitForJob(final Job job, final String jobDescription, final int waitTimeInMillis, final TaskStatus status, final String counterGroup) throws EoulsanException { if (job == null) { throw new NullPointerException("The job is null"); } if (jobDescription == null) { throw new NullPointerException("The jobDescription is null"); } try { // Set the description of the context status.setDescription(job.getJobName()); // Submit the job job.submit(); // Add the Hadoop job to the list of job to kill if workflow fails HadoopJobEmergencyStopTask.addHadoopJobEmergencyStopTask(job); // Job the completion of the job (non verbose mode) job.waitForCompletion(false); // Remove the Hadoop job to the list of job to kill if workflow fails HadoopJobEmergencyStopTask.removeHadoopJobEmergencyStopTask(job); // Check if the job has been successfully executed if (!job.isSuccessful()) { status.setProgressMessage("FAILED"); throw new EoulsanException("Fail of the Hadoop job: " + job.getJobFile()); } // Set the counters status.setCounters(new HadoopReporter(job.getCounters()), counterGroup); } catch (ClassNotFoundException | InterruptedException | IOException e) { throw new EoulsanException(e); } }
From source file:gaffer.accumulo.bulkimport.BulkImportDriver.java
License:Apache License
public int run(String[] args) throws Exception { // Usage/* w w w . jav a 2s . c o m*/ if (args.length < 3) { System.err.println("Usage: " + BulkImportDriver.class.getName() + " <inputpath> <output_path> <accumulo_properties_file>"); return 1; } // Gets paths Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1] + "/data_for_accumulo/"); Path splitsFilePath = new Path(args[1] + "/splits_file"); String accumuloPropertiesFile = args[2]; // Hadoop configuration Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); // Connect to Accumulo AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile); Connector conn = Accumulo.connect(accConf); String tableName = accConf.getTable(); // Check if the table exists if (!conn.tableOperations().exists(tableName)) { System.err.println("Table " + tableName + " does not exist - create the table before running this"); return 1; } // Get the current splits from the table. // (This assumes that we have already created the table using <code>InitialiseTable</code>.) Collection<Text> splits = conn.tableOperations().getSplits(tableName); int numSplits = splits.size(); System.out.println("Number of splits in table is " + numSplits); // Write current splits to a file (this is needed so that the following MapReduce // job can move them to the DistributedCache). IngestUtils.createSplitsFile(conn, tableName, fs, splitsFilePath); // Run MapReduce to output data suitable for bulk import to Accumulo // Conf and job conf.setBoolean("mapred.compress.map.output", true); conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName("Convert data to Accumulo format: input = " + inputPath + ", output = " + outputPath); // Input job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, inputPath); // Mapper job.setMapperClass(BulkImportMapper.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); // Partitioner job.setPartitionerClass(KeyRangePartitioner.class); KeyRangePartitioner.setSplitFile(job, splitsFilePath.toString()); // Reducer job.setReducerClass(BulkImportReducer.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); job.setNumReduceTasks(numSplits + 1); // Output job.setOutputFormatClass(AccumuloFileOutputFormat.class); AccumuloFileOutputFormat.setOutputPath(job, outputPath); // Run job job.waitForCompletion(true); // Successful? if (!job.isSuccessful()) { System.err.println("Error running job"); return 1; } return 0; }