List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.twitter.algebra.nmf.ReindexerJob.java
License:Apache License
public Job run(Configuration conf, Path matrixInputPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(ReindexerJob.class); job.setJobName(ReindexerJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(KeyValueTextInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(MyReducer.class); // this makes the reindexing very slow but is necessary to have total order job.setNumReduceTasks(1);//from ww w .j av a 2s. co m job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IntWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); return job; }
From source file:com.twitter.algebra.nmf.RowSquareSumJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows) throws IOException, InterruptedException, ClassNotFoundException { @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(RowSquareSumJob.class); job.setJobName(RowSquareSumJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); int numReducers = 1; job.setNumReduceTasks(numReducers);/*from w w w. jav a2 s. c o m*/ job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(SumMapper.class); job.setCombinerClass(MergeVectorsReducer.class); job.setReducerClass(MergeVectorsReducer.class); // RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, // aRows); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.SampleColsJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, int cols, Path matrixOutputPath, float sampleRate) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.setFloat(SAMPLERATE, sampleRate); conf.setInt(COLS, cols);//from w w w.j a v a 2s .c o m FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplecol"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(SampleColsJob.class); job.setJobName(SampleColsJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.SampleRowsJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, float sampleRate) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.setFloat(SAMPLERATE, sampleRate); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplerows"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(SampleRowsJob.class); job.setJobName(SampleRowsJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0);//from w w w. ja v a 2 s . co m job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.XtXJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, int numCols, String xmPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.setInt(MATRIXCOLS, numCols);//from w ww. j av a2s. c o m // conf.set(XMPATH, xmPath); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, new Path[] { matrixInputPath }, "xtx"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJobName("XtXJob-" + matrixOutputPath.getName()); job.setJarByClass(XtXJob.class); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "xtx"); job.setNumReduceTasks(numReducers); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numCols); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); job.waitForCompletion(true); }
From source file:com.twitter.elephanttwin.util.HdfsUtils.java
License:Apache License
public static File downloadFileFromHdfs(final FileSystem hdfs, final String hdfsFilePath, final String localDirName) throws IOException { Preconditions.checkNotNull(hdfs);/*from www .j a v a 2s .c om*/ MorePreconditions.checkNotBlank(hdfsFilePath); File localDir = new File(MorePreconditions.checkNotBlank(localDirName)); Preconditions.checkArgument(localDir.exists(), "Local directory does not exist: " + localDirName); Preconditions.checkArgument(localDir.isDirectory(), "Not a directory: " + localDirName); Path path = new Path(hdfsFilePath); FSDataInputStream remoteStream = hdfs.open(path); String localFileName = localDirName.endsWith("/") ? localDirName + path.getName() : localDirName + "/" + path.getName(); File localFile = new File(localFileName); FileOutputStream localStream = new FileOutputStream(localFile); try { IOUtils.copy(remoteStream, localStream); } finally { IOUtils.closeQuietly(remoteStream); IOUtils.closeQuietly(localStream); } return localFile; }
From source file:com.twitter.elephanttwin.util.HdfsUtils.java
License:Apache License
public static File downloadFileFromHdfs(Configuration conf, String fileUri, String localDirName, boolean overwrite) throws IOException { Path path = new Path(fileUri); FileSystem hdfs = path.getFileSystem(conf); FSDataInputStream remoteStream = hdfs.open(path); File localFile = new File(localDirName, path.getName()); if (overwrite && localFile.exists()) { boolean success = localFile.delete(); if (!success) { LOG.warning("Failed to delete file to be overwritten: " + localFile); }/*from w w w . j a v a 2 s. c om*/ } FileOutputStream localStream = new FileOutputStream(localFile); try { IOUtils.copy(remoteStream, localStream); } finally { IOUtils.closeQuietly(remoteStream); IOUtils.closeQuietly(localStream); } return localFile; }
From source file:com.twitter.heron.statefulstorage.hdfs.HDFSStorageTest.java
License:Open Source License
@Test public void testDisposePartial() throws Exception { Path mockPath = mock(Path.class); when(mockPath.getName()).thenReturn("0"); FileStatus mockFS1 = mock(FileStatus.class); when(mockFS1.getPath()).thenReturn(mockPath); FileStatus mockFS2 = mock(FileStatus.class); when(mockFS2.getPath()).thenReturn(mockPath); FileStatus[] mockFileStatus = { mockFS1, mockFS2 }; FileStatus[] emptyFileStatus = new FileStatus[0]; when(mockFileSystem.listStatus(any(Path.class))).thenReturn(mockFileStatus).thenReturn(emptyFileStatus); hdfsStorage.dispose(StatefulStorageTestContext.TOPOLOGY_NAME, StatefulStorageTestContext.CHECKPOINT_ID, false);/* w ww.jav a 2 s . com*/ verify(mockFileSystem, times(2)).delete(any(Path.class), eq(true)); }
From source file:com.twitter.hraven.etl.FileLister.java
License:Apache License
/** * extracts the job id from a Path/*ww w . jav a 2s .c om*/ * @param input Path * @return job id as string */ static String getJobIdFromPath(Path aPath) { String fileName = aPath.getName(); JobFile jf = new JobFile(fileName); String jobId = jf.getJobid(); if (jobId == null) { throw new ProcessingException("job id is null for " + aPath.toUri()); } return jobId; }
From source file:com.twitter.hraven.etl.JobFileModifiedRangePathFilter.java
License:Apache License
@Override public boolean accept(Path path) { if (!super.accept(path)) { return false; }/*from w ww . j a v a2 s. c o m*/ JobFile jobFile = new JobFile(path.getName()); if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) { try { FileSystem fs = path.getFileSystem(myConf); FileStatus fileStatus = fs.getFileStatus(path); long fileModificationTimeMillis = fileStatus.getModificationTime(); return accept(fileModificationTimeMillis); } catch (IOException e) { throw new ImportException("Cannot determine file modification time of " + path.getName(), e); } } else { // Reject anything that does not match a job conf filename. LOG.info(" Not a valid job conf / job history file " + path.getName()); return false; } }