Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:com.twitter.algebra.nmf.ReindexerJob.java

License:Apache License

public Job run(Configuration conf, Path matrixInputPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(ReindexerJob.class);
    job.setJobName(ReindexerJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setReducerClass(MyReducer.class);
    // this makes the reindexing very slow but is necessary to have total order
    job.setNumReduceTasks(1);//from  ww w .j av a  2s. co m

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
    return job;
}

From source file:com.twitter.algebra.nmf.RowSquareSumJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows)
        throws IOException, InterruptedException, ClassNotFoundException {
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(RowSquareSumJob.class);
    job.setJobName(RowSquareSumJob.class.getSimpleName() + "-" + matrixOutputPath.getName());
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);

    int numReducers = 1;
    job.setNumReduceTasks(numReducers);/*from   w  w  w.  jav  a2 s. c  o  m*/

    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(SumMapper.class);
    job.setCombinerClass(MergeVectorsReducer.class);
    job.setReducerClass(MergeVectorsReducer.class);

    //    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class,
    //        aRows);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.SampleColsJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, int cols, Path matrixOutputPath, float sampleRate)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setFloat(SAMPLERATE, sampleRate);
    conf.setInt(COLS, cols);//from w  w  w.j  a  v  a  2s .c o  m
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplecol");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(SampleColsJob.class);
    job.setJobName(SampleColsJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.SampleRowsJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, float sampleRate)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setFloat(SAMPLERATE, sampleRate);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplerows");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(SampleRowsJob.class);
    job.setJobName(SampleRowsJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);

    job.setNumReduceTasks(0);//from w  w  w.  ja v  a  2  s  .  co  m
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.XtXJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, int numCols, String xmPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setInt(MATRIXCOLS, numCols);//from w  ww.  j av a2s.  c o m
    //    conf.set(XMPATH, xmPath);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, new Path[] { matrixInputPath }, "xtx");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJobName("XtXJob-" + matrixOutputPath.getName());
    job.setJarByClass(XtXJob.class);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "xtx");
    job.setNumReduceTasks(numReducers);
    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numCols);

    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    job.waitForCompletion(true);
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

public static File downloadFileFromHdfs(final FileSystem hdfs, final String hdfsFilePath,
        final String localDirName) throws IOException {
    Preconditions.checkNotNull(hdfs);/*from  www .j  a v a 2s .c  om*/
    MorePreconditions.checkNotBlank(hdfsFilePath);

    File localDir = new File(MorePreconditions.checkNotBlank(localDirName));
    Preconditions.checkArgument(localDir.exists(), "Local directory does not exist: " + localDirName);
    Preconditions.checkArgument(localDir.isDirectory(), "Not a directory: " + localDirName);

    Path path = new Path(hdfsFilePath);
    FSDataInputStream remoteStream = hdfs.open(path);
    String localFileName = localDirName.endsWith("/") ? localDirName + path.getName()
            : localDirName + "/" + path.getName();
    File localFile = new File(localFileName);
    FileOutputStream localStream = new FileOutputStream(localFile);
    try {
        IOUtils.copy(remoteStream, localStream);
    } finally {
        IOUtils.closeQuietly(remoteStream);
        IOUtils.closeQuietly(localStream);
    }
    return localFile;
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

public static File downloadFileFromHdfs(Configuration conf, String fileUri, String localDirName,
        boolean overwrite) throws IOException {

    Path path = new Path(fileUri);
    FileSystem hdfs = path.getFileSystem(conf);
    FSDataInputStream remoteStream = hdfs.open(path);

    File localFile = new File(localDirName, path.getName());

    if (overwrite && localFile.exists()) {
        boolean success = localFile.delete();
        if (!success) {
            LOG.warning("Failed to delete file to be overwritten: " + localFile);
        }/*from w  w  w .  j  a  v  a 2 s. c om*/
    }

    FileOutputStream localStream = new FileOutputStream(localFile);
    try {
        IOUtils.copy(remoteStream, localStream);
    } finally {
        IOUtils.closeQuietly(remoteStream);
        IOUtils.closeQuietly(localStream);
    }
    return localFile;
}

From source file:com.twitter.heron.statefulstorage.hdfs.HDFSStorageTest.java

License:Open Source License

@Test
public void testDisposePartial() throws Exception {
    Path mockPath = mock(Path.class);
    when(mockPath.getName()).thenReturn("0");

    FileStatus mockFS1 = mock(FileStatus.class);
    when(mockFS1.getPath()).thenReturn(mockPath);

    FileStatus mockFS2 = mock(FileStatus.class);
    when(mockFS2.getPath()).thenReturn(mockPath);

    FileStatus[] mockFileStatus = { mockFS1, mockFS2 };
    FileStatus[] emptyFileStatus = new FileStatus[0];
    when(mockFileSystem.listStatus(any(Path.class))).thenReturn(mockFileStatus).thenReturn(emptyFileStatus);

    hdfsStorage.dispose(StatefulStorageTestContext.TOPOLOGY_NAME, StatefulStorageTestContext.CHECKPOINT_ID,
            false);/* w ww.jav a  2  s . com*/

    verify(mockFileSystem, times(2)).delete(any(Path.class), eq(true));
}

From source file:com.twitter.hraven.etl.FileLister.java

License:Apache License

/**
 * extracts the job id from a Path/*ww w .  jav a 2s .c om*/
 * @param input Path
 * @return job id as string
 */
static String getJobIdFromPath(Path aPath) {
    String fileName = aPath.getName();
    JobFile jf = new JobFile(fileName);
    String jobId = jf.getJobid();
    if (jobId == null) {
        throw new ProcessingException("job id is null for " + aPath.toUri());
    }
    return jobId;
}

From source file:com.twitter.hraven.etl.JobFileModifiedRangePathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    if (!super.accept(path)) {
        return false;
    }/*from   w  ww .  j  a  v  a2 s.  c  o  m*/

    JobFile jobFile = new JobFile(path.getName());
    if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) {
        try {
            FileSystem fs = path.getFileSystem(myConf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long fileModificationTimeMillis = fileStatus.getModificationTime();
            return accept(fileModificationTimeMillis);
        } catch (IOException e) {
            throw new ImportException("Cannot determine file modification time of " + path.getName(), e);
        }
    } else {
        // Reject anything that does not match a job conf filename.
        LOG.info(" Not a valid job conf / job history file " + path.getName());
        return false;
    }
}