Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:com.twitter.hraven.etl.JobFileModifiedRangeSubstringPathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    if (!super.accept(path)) {
        return false;
    }//from www  . j a v  a  2  s .c  om

    JobFile jobFile = new JobFile(path.getName());
    if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) {
        if (jobFile.isJobHistoryFile()) {
            if (!includesPathSubstrings(path) || !excludesPathSubstrings(path)) {
                return false;
            }
        }
        try {
            FileSystem fs = path.getFileSystem(myConf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long fileModificationTimeMillis = fileStatus.getModificationTime();
            return accept(fileModificationTimeMillis);
        } catch (IOException e) {
            throw new ImportException("Cannot determine file modification time of " + path.getName(), e);
        }
    } else {
        // Reject anything that does not match a job conf filename.
        LOG.info(" Not a valid job conf / job history file " + path.getName());
        return false;
    }
}

From source file:com.twitter.hraven.etl.JobFilePartitioner.java

License:Apache License

/**
 * @param hdfs/*from   w  ww.  j  a v  a 2 s  . c om*/
 *          FileSystem handle
 * @param outputPath
 *          base directory where files to be written to
 * @param fileModTime
 *          of the file that needs to be moved/copied to hdfs
 * @return the existing path in HDFS to write to the file to. Will be created
 *         if it does not exist.
 * @throws IOException
 *           if the year/month/day directory with cannot be created in
 *           outputPath.
 */
private Path getTargetDirectory(FileSystem hdfs, Path outputPath, long fileModTime) throws IOException {
    String year = YEAR_FORMAT.format(new Date(fileModTime));
    String month = MONTH_FORMAT.format(new Date(fileModTime));
    String day = DAY_FORMAT.format(new Date(fileModTime));

    Path yearDir = new Path(outputPath, year);
    Path monthDir = new Path(yearDir, month);
    Path dayDir = new Path(monthDir, day);

    // Check if the directory already exists, if not, then insert a record into
    // HBase for it.
    if (!hdfs.exists(dayDir)) {
        if (hdfs.mkdirs(dayDir)) {
            LOG.info("Created: " + dayDir.toString());
        } else {
            throw new IOException("Unable to create target directory with date: " + dayDir.getName());
        }
    }
    return dayDir;
}

From source file:com.twitter.hraven.etl.JobFilePartitioner.java

License:Apache License

/**
 * @param hdfs//from   w  w w  . jav a  2 s.  c  o m
 *          FileSystem handle
 * @param f
 *          file to copy to HDFS
 * @param outputPath
 * @param skipExisting
 *          skip if the file already exist in the target. File will be
 *          overwritten if already there and this argument is false.
 * @throws IOException
 *           if target directory cannot be created or file cannot be copied to
 *           target directory.
 */
private void processPlainFile(FileSystem hdfs, File f, Path outputPath, boolean skipExisting)
        throws IOException {
    long fileModTime = f.lastModified();
    Path targetDir = getTargetDirectory(hdfs, outputPath, fileModTime);

    boolean doCopy = true;
    Path sourceFile = new Path(f.getPath());
    if (skipExisting) {
        Path target = new Path(targetDir, sourceFile.getName());
        if (hdfs.exists(target)) {
            doCopy = false;
        }
    }
    if (doCopy) {
        hdfs.copyFromLocalFile(sourceFile, targetDir);
    }

}

From source file:com.twitter.hraven.etl.JobFilePathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    // Ideally we want to do this
    // JobFile jobFile = new JobFile(path.getName());
    // return (jobFile.isJobConfFile() || jobFile.isJobHistoryFile());
    // Aside from that not being efficient, it also chokes on input directories.

    // therefore, allow anythying but CRC files. The record reader will have to deal with the rest.
    return !((path == null) || (path.getName().endsWith(".crc")));
}

From source file:com.twitter.hraven.etl.ProcessRecordService.java

License:Apache License

/**
 * @param initialProcessFile// w w w . ja  v a  2  s.com
 *          The path to the file to be moved.
 * @param outputPath
 *          The path where this file is to be moved to.
 * @return the new path or null if the rename failed.
 * @throws IOException
 *           when bad things happen.
 * @throws ProcessingException
 *           when the file cannot be moved.
 */
public Path moveProcessFile(Path initialProcessFile, Path outputPath) throws IOException {
    String processFileName = initialProcessFile.getName();
    Path processFile = new Path(outputPath, processFileName);

    boolean success = fs.rename(initialProcessFile, processFile);
    if (!success) {
        throw new ProcessingException(
                "Unable to move processing file " + initialProcessFile + " to " + processFile);
    }
    return processFile;
}

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

@Test
public void testPruneFileListBySize() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[2];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);//w w w  .  j  a  va  2s.  c o  m
    assertTrue(hdfs.exists(inputPath));

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[0] = hdfs.getFileStatus(expPath);

    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    File jobConfFile = new File(JOB_CONF_FILE_NAME);
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[1] = hdfs.getFileStatus(expPath);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 0);

    Path emptyFile = new Path(
            inputPath.toUri() + "/" + "job_1329341111111_0101-1329111113227-user2-Sleep.jhist");
    os = hdfs.createNewFile(emptyFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile));
    origList[0] = hdfs.getFileStatus(emptyFile);

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329341111111_0101_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);

    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[1] = hdfs.getFileStatus(emptyConfFile);

    prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 2);

}

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

/**
 * removes conf file which has already been put in prunedList
 *
 * @throws IOException/*  w w w .  jav  a  2s.  co m*/
 */
@Test
public void testPruneFileListRemovingConfFromPruneList() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[2];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize_pruneList");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);
    assertTrue(hdfs.exists(inputPath));

    Path relocationPath = new Path("/relocation_filesize_pruneList");
    os = hdfs.mkdirs(relocationPath);
    assertTrue(os);
    assertTrue(hdfs.exists(relocationPath));

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329348432655_0001_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[0] = hdfs.getFileStatus(emptyConfFile);

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[1] = hdfs.getFileStatus(expPath);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 0);
}

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

/**
 * tests the case when several files are spread out in the dir and need to be removed
 *
 * @throws IOException/* www  . j av a  2 s.co m*/
 */
@Test
public void testPruneFileListMultipleFilesAlreadyMovedCases() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[12];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize_multiple");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);
    assertTrue(hdfs.exists(inputPath));

    Path relocationPath = new Path("/relocation_filesize_multiple");
    os = hdfs.mkdirs(relocationPath);
    assertTrue(os);
    assertTrue(hdfs.exists(relocationPath));

    Path emptyFile = new Path(
            inputPath.toUri() + "/" + "job_1329341111111_0101-1329111113227-user2-Sleep.jhist");
    os = hdfs.createNewFile(emptyFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile));
    origList[0] = hdfs.getFileStatus(emptyFile);

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329341111111_0101_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);

    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[1] = hdfs.getFileStatus(emptyConfFile);

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[2] = hdfs.getFileStatus(expPath);

    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    File jobConfFile = new File(JOB_CONF_FILE_NAME);
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[3] = hdfs.getFileStatus(expPath);

    Path inputPath2 = new Path(inputPath.toUri() + "/"
            + "job_1311222222255_0221-1311111143227-user10101-WordCount-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath2);
    assertTrue(hdfs.exists(inputPath2));
    origList[4] = hdfs.getFileStatus(inputPath2);

    Path inputPath3 = new Path(inputPath.toUri() + "/"
            + "job_1399999999155_0991-1311111143227-user3321-TeraGen-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath3);
    assertTrue(hdfs.exists(inputPath3));
    origList[5] = hdfs.getFileStatus(inputPath3);

    Path inputPath4 = new Path(inputPath.toUri() + "/"
            + "job_1399977777177_0771-1311111143227-user3321-TeraSort-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath4);
    assertTrue(hdfs.exists(inputPath4));
    origList[6] = hdfs.getFileStatus(inputPath4);

    Path emptyFile2 = new Path(
            inputPath.toUri() + "/" + "job_1329343333333_5551-1329111113227-user2-SomethingElse.jhist");
    os = hdfs.createNewFile(emptyFile2);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile2));
    origList[7] = hdfs.getFileStatus(emptyFile2);

    Path emptyConfFile2 = new Path(inputPath.toUri() + "/" + "job_1329343333333_5551_conf.xml");
    os = hdfs.createNewFile(emptyConfFile2);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile2));
    origList[8] = hdfs.getFileStatus(emptyConfFile2);

    // this is an empty file which tests the toBeRemovedFileList
    // at the end of function pruneFileListBySize
    Path emptyConfFile3 = new Path(inputPath.toUri() + "/" + "job_1399999999155_0991_conf.xml");
    os = hdfs.createNewFile(emptyConfFile3);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile3));
    origList[9] = hdfs.getFileStatus(emptyConfFile3);

    Path inputConfPath2 = new Path(inputPath.toUri() + "/" + "job_1311222222255_0221_conf.xml");
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputConfPath2);
    assertTrue(hdfs.exists(inputConfPath2));
    origList[10] = hdfs.getFileStatus(inputConfPath2);

    // this is an empty file which tests the toBeRemovedFileList
    // at the end of function pruneFileListBySize
    Path emptyConfFile4 = new Path(inputPath.toUri() + "/" + "job_1399977777177_0771_conf.xml");
    os = hdfs.createNewFile(emptyConfFile4);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile4));
    origList[11] = hdfs.getFileStatus(emptyConfFile4);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 4);
}

From source file:com.twitter.hraven.mapreduce.JobFileTableMapper.java

License:Apache License

/**
 * calculates the cost of this job based on mbMillis, machineType
 * and cost details from the properties file
 * @param mbMillis//from w w w.  j a va  2s  .  co m
 * @param currentConf
 * @return cost of the job
 */
private Double getJobCost(Long mbMillis, Configuration currentConf) {
    Double computeTco = 0.0;
    Long machineMemory = 0L;
    Properties prop = null;
    String machineType = currentConf.get(Constants.HRAVEN_MACHINE_TYPE, "default");
    LOG.debug(" machine type " + machineType);
    try {
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(currentConf);
        if (null != cacheFiles && cacheFiles.length > 0) {
            for (Path cachePath : cacheFiles) {
                LOG.debug(" distributed cache path " + cachePath);
                if (cachePath.getName().equals(Constants.COST_PROPERTIES_FILENAME)) {
                    prop = loadCostProperties(cachePath, machineType);
                    break;
                }
            }
        } else {
            LOG.error("Unable to find anything (" + Constants.COST_PROPERTIES_FILENAME
                    + ") in distributed cache, continuing with defaults");
        }

    } catch (IOException ioe) {
        LOG.error("IOException reading from distributed cache for " + Constants.COST_PROPERTIES_HDFS_DIR
                + ", continuing with defaults" + ioe.toString());
    }
    if (prop != null) {
        String computeTcoStr = prop.getProperty(machineType + ".computecost");
        try {
            computeTco = Double.parseDouble(computeTcoStr);
        } catch (NumberFormatException nfe) {
            LOG.error("error in conversion to long for compute tco " + computeTcoStr
                    + " using default value of 0");
        }
        String machineMemStr = prop.getProperty(machineType + ".machinememory");
        try {
            machineMemory = Long.parseLong(machineMemStr);
        } catch (NumberFormatException nfe) {
            LOG.error("error in conversion to long for machine memory  " + machineMemStr
                    + " using default value of 0");
        }
    } else {
        LOG.error("Could not load properties file, using defaults");
    }

    Double jobCost = JobHistoryFileParserBase.calculateJobCost(mbMillis, computeTco, machineMemory);
    LOG.info("from cost properties file, jobCost is " + jobCost + " based on compute tco: " + computeTco
            + " machine memory: " + machineMemory + " for machine type " + machineType);
    return jobCost;
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java

License:Apache License

public static void setupDistributedCache(PigContext pigContext, Configuration conf, String[] paths,
        boolean shipToCluster) throws IOException {
    // Turn on the symlink feature
    DistributedCache.createSymlink(conf);

    for (String path : paths) {
        path = path.trim();/*from  ww w  .  j a v  a  2s  . c  o  m*/
        if (path.length() != 0) {
            Path src = new Path(path);

            // Ensure that 'src' is a valid URI
            URI srcURI = toURI(src);

            // Ship it to the cluster if necessary and add to the
            // DistributedCache
            if (shipToCluster) {
                Path dst = new Path(FileLocalizer.getTemporaryPath(pigContext).toString());
                FileSystem fs = dst.getFileSystem(conf);
                fs.copyFromLocalFile(src, dst);

                // Construct the dst#srcName uri for DistributedCache
                URI dstURI = null;
                try {
                    dstURI = new URI(dst.toString() + "#" + src.getName());
                } catch (URISyntaxException ue) {
                    byte errSrc = pigContext.getErrorSource();
                    int errCode = 0;
                    switch (errSrc) {
                    case PigException.REMOTE_ENVIRONMENT:
                        errCode = 6004;
                        break;
                    case PigException.USER_ENVIRONMENT:
                        errCode = 4004;
                        break;
                    default:
                        errCode = 2037;
                        break;
                    }
                    String msg = "Invalid ship specification. " + "File doesn't exist: " + dst;
                    throw new ExecException(msg, errCode, errSrc);
                }
                DistributedCache.addCacheFile(dstURI, conf);
            } else {
                DistributedCache.addCacheFile(srcURI, conf);
            }
        }
    }
}