List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:Assignment4_P4_MemoryConscious.MovieRatingMemConsciousDriver.java
/** * @param args the command line arguments *///from w ww .j ava 2s . co m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Movie Rating Mem Conscious Standard Deviation"); job.setJarByClass(MovieRatingMemConsciousDriver.class); job.setMapperClass(MovieRatingMemConscious_Mapper.class); job.setCombinerClass(MovingRatingMemConscious_Combiner.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SortedMapWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setReducerClass(MovieRatingMemConscious_Reducer.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment5_P2_DistinctIPAddress.DistinctIPAddressDriver.java
/** * @param args the command line arguments *//*from w w w. j a va 2s . c o m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IP Address Count"); job.setJarByClass(DistinctIPAddressDriver.class); job.setMapperClass(DistinctIPAddress_Mapper.class); job.setCombinerClass(DistinctIPAddress_Reducer.class); job.setReducerClass(DistinctIPAddress_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment5_P3_PartitionPattern.Partition_IPAddress_By_MonthDriver.java
/** * @param args the command line arguments *//*from www .java 2 s . c o m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IP Address By Date"); job.setJarByClass(Partition_IPAddress_By_MonthDriver.class); job.setMapperClass(Partition_IPAddress_By_Month_Mapper.class); //job.setCombinerClass(Partition_IPAddress_By_Month_Reducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); // partitioner class inclusion job.setPartitionerClass(Partition_IPAddress_By_Month_Partitioner.class); // set num of reduce tasks based on partition we need (here we need 12 cos total no.of months in a year) job.setNumReduceTasks(12); job.setReducerClass(Partition_IPAddress_By_Month_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment5_P4_BinningPattern.Binning_IPAddress_By_DayDriver.java
/** * @param args the command line arguments */// ww w . ja v a 2s. co m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IP Address By Hour"); job.setJarByClass(Binning_IPAddress_By_DayDriver.class); job.setMapperClass(Binning_IPAddress_By_Day_Mapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); MultipleOutputs.addNamedOutput(job, "textualBins", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, "massaBins", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); // set num of reduce tasks to 0 job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment5_P6_StructureToHierarchyPattern.Structure_HierarchyDriver.java
/** * @param args the command line arguments *//*from w ww . ja v a 2 s.co m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Structure to Hierarchy"); job.setJarByClass(Structure_HierarchyDriver.class); // pass file 1 to this mapper in Text format MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Structure_Hierarchy_Movie_Mapper.class); // pass file 2 to this mapper in Text format MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, Structure_Hierarchy_Tag_Mapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setReducerClass(Structure_Hierarchy_Reducer.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); System.exit(job.waitForCompletion(true) ? 0 : 2); }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
public void setOutputTempPathString(String outPathString) { try {/*from ww w.j a v a 2 s . c o m*/ outputTmpBasePath = FileSystem.get(conf).makeQualified(new Path(outPathString)); } catch (IOException ioe) { log.error( "Unable to set outputBasePath to {}, leaving as {}" + outPathString + " " + outputTmpBasePath); } }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.MatrixMultiplicationBenchmark.java
License:Apache License
private void verify() throws Exception { // Create NOT transposed matrix A for verification check DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n, new Random(42L), m_matrixAPath, false); DistributedRowMatrix matrixA = new DistributedRowMatrix(m_matrixAPath, CONF_INPUT_DIR, n, n); matrixA.setConf(m_conf);//from ww w . ja v a 2 s . c o m Path matrixDPath = new Path(CONF_INPUT_DIR + "/MatrixD.seq"); DistributedRowMatrix matrixD = matrixA.multiplyJava(m_matrixB, matrixDPath); // Load MapReduce result matrix C DistributedRowMatrix matrixC = new DistributedRowMatrix(m_matrixCPath, CONF_OUTPUT_DIR, n, n); matrixC.setConf(m_conf); if (matrixC.verify(matrixD)) { System.out.println("Verify PASSED!"); } else { System.out.println("Verify FAILED!"); } // matrixC.printDistributedRowMatrix(); // matrixD.printDistributedRowMatrix(); }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
/********************************* CPU *********************************/ @Override/* w w w. j av a 2 s. c o m*/ public void setup( BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer) throws IOException { this.m_conf = peer.getConfiguration(); this.m_timeMeasurement = m_conf.getBoolean(CONF_TIME, false); this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false); this.m_maxIterations = m_conf.getInt(CONF_MAX_ITERATIONS, -1); // Init logging if (m_isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(m_conf); m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf)) + "/BSP_" + peer.getTaskId() + ".log")); } catch (IOException e) { e.printStackTrace(); } } long startTime = 0; if (m_timeMeasurement) { startTime = System.currentTimeMillis(); } // Init center vectors Path centroids = new Path(m_conf.get(CONF_CENTER_IN_PATH)); FileSystem fs = FileSystem.get(m_conf); final ArrayList<DoubleVector> centers = new ArrayList<DoubleVector>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, centroids, m_conf); PipesVectorWritable key = new PipesVectorWritable(); NullWritable value = NullWritable.get(); while (reader.next(key, value)) { DoubleVector center = key.getVector(); centers.add(center); } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) { reader.close(); } } Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!"); this.m_centers_cpu = centers.toArray(new DoubleVector[centers.size()]); long stopTime = 0; if (m_timeMeasurement) { stopTime = System.currentTimeMillis(); LOG.info("# setupTime: " + ((stopTime - startTime) / 1000.0) + " sec"); if (m_isDebuggingEnabled) { m_logger.writeChars("PiEstimatorHybrid,setupTime: " + ((stopTime - startTime) / 1000.0) + " sec\n"); } } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
private void recalculateAssignmentsAndWrite( BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer) throws IOException { IntWritable keyWrite = new IntWritable(); for (DoubleVector v : m_cache) { final int lowestDistantCenter = getNearestCenter(v); keyWrite.set(lowestDistantCenter); peer.write(keyWrite, new PipesVectorWritable(v)); }//w ww.j a v a2 s .co m // just on the first task write the centers to filesystem to prevent // collisions if (peer.getPeerName().equals(peer.getPeerName(0))) { String pathString = m_conf.get(CONF_CENTER_OUT_PATH); if (pathString != null) { final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(m_conf), m_conf, new Path(pathString), PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); final NullWritable value = NullWritable.get(); for (DoubleVector center : m_centers_cpu) { dataWriter.append(new PipesVectorWritable(center), value); } dataWriter.close(); } } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
/********************************* GPU *********************************/ @Override/*from w ww. ja v a 2s . c o m*/ public void setupGpu( BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer) throws IOException, SyncException, InterruptedException { this.m_conf = peer.getConfiguration(); this.m_timeMeasurement = m_conf.getBoolean(CONF_TIME, false); this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false); this.m_maxIterations = m_conf.getInt(CONF_MAX_ITERATIONS, -1); this.m_blockSize = Integer.parseInt(this.m_conf.get(CONF_BLOCKSIZE)); this.m_gridSize = Integer.parseInt(this.m_conf.get(CONF_GRIDSIZE)); // Init logging if (m_isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(m_conf); m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf)) + "/BSP_" + peer.getTaskId() + ".log")); } catch (IOException e) { e.printStackTrace(); } } long startTime = 0; if (m_timeMeasurement) { startTime = System.currentTimeMillis(); } // Init center vectors Path centroids = new Path(m_conf.get(CONF_CENTER_IN_PATH)); FileSystem fs = FileSystem.get(m_conf); final List<double[]> centers = new ArrayList<double[]>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, centroids, m_conf); PipesVectorWritable key = new PipesVectorWritable(); NullWritable value = NullWritable.get(); while (reader.next(key, value)) { centers.add(key.getVector().toArray()); } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) { reader.close(); } } Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!"); // build centers_gpu double[][] this.m_centers_gpu = new double[centers.size()][centers.get(0).length]; for (int i = 0; i < centers.size(); i++) { double[] vector = centers.get(i); for (int j = 0; j < vector.length; j++) { this.m_centers_gpu[i][j] = vector[j]; } } long stopTime = 0; if (m_timeMeasurement) { stopTime = System.currentTimeMillis(); LOG.info("# setupGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec"); if (m_isDebuggingEnabled) { m_logger.writeChars( "PiEstimatorHybrid,setupGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec\n"); } } }