Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:com.indeed.imhotep.builder.tsv.EasyIndexBuilderFromTSV.java

License:Apache License

protected InputReader getInputReader() {
    final Path inputFile = getInputFilePath();
    final BufferedReader fileReader = getInputFileReader(inputFile);
    final String fileName = inputFile.getName();
    if (fileName.endsWith(".csv") || fileName.endsWith(".csv.gz")) {
        return new CSVInputReader(fileReader);
    } else {//from   w ww  . j a  v  a 2s  .c o m
        // Should we also using the escaped TSV in some situations? What would be the trigger?
        return new TSVInputReaderNoEscaping(fileReader);
    }
}

From source file:com.indeed.imhotep.builder.tsv.EasyIndexBuilderFromTSV.java

License:Apache License

private BufferedReader getInputFileReader(Path inputFile) {
    try {//from  w  w w  .  j  a  v  a2 s . c  om
        final FileSystem hdfs = getHDFS(inputFile);
        final Path qualifiedInputFile = inputFile.makeQualified(hdfs);
        if (!hdfs.exists(inputFile)) {
            throw new RuntimeException("The provided input file doesn't exist " + qualifiedInputFile
                    + "\nFor hdfs files use 'hdfs:' prefix like hdfs:/tmp/file.tsv");
        }
        log.info("Reading TSV data from " + qualifiedInputFile);
        InputStream inputStream = hdfs.open(inputFile);
        if (inputFile.getName().endsWith(".gz")) {
            inputStream = new GZIPInputStream(inputStream);
        }
        return new BufferedReader(new InputStreamReader(inputStream, Charsets.UTF_8));
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java

License:Apache License

private List<FileToIndex> findNewFilesToIndex() {
    try {/*from  w  ww.j a v a 2 s.com*/
        final List<FileToIndex> files = Lists.newArrayList();
        for (FileStatus dir : inputFS.listStatus(toIndexPath)) {
            if (!dir.isDir()) {
                continue;
            }
            final Path indexPath = dir.getPath();
            final String indexName = indexPath.getName();
            if (!indexName.matches(ALLOWED_INDEX_NAMES)) {
                log.info("Skipped directory " + indexPath + ". Index names should match regex "
                        + ALLOWED_INDEX_NAMES);
                continue;
            }
            for (FileStatus file : inputFS.listStatus(indexPath)) {
                if (file.isDir()) {
                    continue;
                }
                final Path filePath = file.getPath();
                String fileName = filePath.getName();

                boolean extFound = false;
                for (String allowedExt : ALLOWED_FILE_EXT) {
                    if (!fileName.endsWith(allowedExt)) {
                        continue;
                    }
                    fileName = fileName.substring(0, fileName.length() - allowedExt.length());
                    files.add(new FileToIndex(fileName, indexName, filePath));
                    extFound = true;
                    break;
                }
                if (!extFound) {
                    log.info("Not one of supported extensions (" + StringUtils.join(ALLOWED_FILE_EXT, ", ")
                            + ") file: " + filePath);
                }
            }
        }

        return files;

    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.inmobi.conduit.AbstractService.java

License:Apache License

private long getPreviousRuntime(FileSystem fs, String destDir, String category) throws Exception {
    String localDestDir = destDir + File.separator + category;
    LOG.warn("Querying Directory [" + localDestDir + "]");
    Path latestyeardir = getLatestDir(fs, new Path(localDestDir));
    int latestyear = 0, latestmonth = 0, latestday = 0, latesthour = 0, latestminute = 0;

    if (latestyeardir != null) {
        latestyear = Integer.parseInt(latestyeardir.getName());
        Path latestmonthdir = getLatestDir(fs, latestyeardir);
        if (latestmonthdir != null) {
            latestmonth = Integer.parseInt(latestmonthdir.getName());
            Path latestdaydir = getLatestDir(fs, latestmonthdir);
            if (latestdaydir != null) {
                latestday = Integer.parseInt(latestdaydir.getName());
                Path latesthourdir = getLatestDir(fs, latestdaydir);
                if (latesthourdir != null) {
                    latesthour = Integer.parseInt(latesthourdir.getName());
                    Path latestminutedir = getLatestDir(fs, latesthourdir);
                    if (latestminutedir != null) {
                        latestminute = Integer.parseInt(latestminutedir.getName());
                    }//from   ww w  .  jav  a  2  s.  c om
                }
            }
        }
    } else
        return -1;
    LOG.debug("Date Found " + latestyear + File.separator + latestmonth + File.separator + latestday
            + File.separator + latesthour + File.separator + latestminute);
    return CalendarHelper.getDateHourMinute(latestyear, latestmonth, latestday, latesthour, latestminute)
            .getTimeInMillis();
}

From source file:com.inmobi.conduit.distcp.DistcpBaseService.java

License:Apache License

protected String getTopicNameFromDestnPath(Path destnPath) {
    String destnPathAsString = destnPath.toString();
    String destnDirAsString = new Path(destCluster.getFinalDestDirRoot()).toString();
    String pathWithoutRoot = destnPathAsString.substring(destnDirAsString.length());
    Path tmpPath = new Path(pathWithoutRoot);
    while (tmpPath.depth() != 1)
        tmpPath = tmpPath.getParent();/*from  ww  w. j  ava 2  s  . c  o  m*/
    return tmpPath.getName();
}

From source file:com.inmobi.conduit.distcp.MergedStreamService.java

License:Apache License

public Map<Path, Path> createLocalCommitPaths(Path tmpOut, long commitTime,
        Map<String, List<Path>> categoriesToCommit) throws Exception {

    // find final destination paths
    Map<Path, Path> mvPaths = new LinkedHashMap<Path, Path>();
    Set<Map.Entry<String, List<Path>>> commitEntries = categoriesToCommit.entrySet();
    Iterator<Entry<String, List<Path>>> it = commitEntries.iterator();
    while (it.hasNext()) {
        Map.Entry<String, List<Path>> entry = (Map.Entry<String, List<Path>>) it.next();
        String category = entry.getKey();
        List<Path> filesInCategory = entry.getValue();
        for (Path filePath : filesInCategory) {
            Path destParentPath = new Path(getDestCluster().getFinalDestDir(category, commitTime));
            Path commitPath = new Path(destParentPath, filePath.getName());
            mvPaths.put(filePath, commitPath);
        }//from  w  w  w  .ja va  2  s . co  m
    }
    return mvPaths;
}

From source file:com.inmobi.conduit.distcp.MirrorStreamService.java

License:Apache License

private void createCommitPaths(LinkedHashMap<FileStatus, Path> commitPaths, List<FileStatus> streamPaths) {
    /*  Path eg in streamPaths -
     *  /conduit/system/distcp_mirror_<srcCluster>_<destCluster>/conduit/streams
     *  /<streamName>/2012/1/13/15/7/<hostname>-<streamName>-2012-01-16-07
     *  -21_00000.gz//  w w w  . j av  a 2 s  . c  om
     *
     * or it could be an emptyDir like
     *  /* Path eg in streamPaths -
     *  /conduit/system/distcp_mirror_<srcCluster>_<destCluster>/conduit/streams
     *  /<streamName>/2012/1/13/15/7/
     *
     */

    for (FileStatus fileStatus : streamPaths) {
        String fileName = null;

        Path prefixDir = null;
        if (fileStatus.isDir()) {
            //empty directory
            prefixDir = fileStatus.getPath();
        } else {
            fileName = fileStatus.getPath().getName();
            prefixDir = fileStatus.getPath().getParent();
        }

        Path min = prefixDir;
        Path hr = min.getParent();
        Path day = hr.getParent();
        Path month = day.getParent();
        Path year = month.getParent();
        Path streamName = year.getParent();

        String finalPath = getDestCluster().getFinalDestDirRoot() + File.separator + streamName.getName()
                + File.separator + year.getName() + File.separator + month.getName() + File.separator
                + day.getName() + File.separator + hr.getName() + File.separator + min.getName();

        if (fileName != null) {
            finalPath += File.separator + fileName;
        }

        commitPaths.put(fileStatus, new Path(finalPath));
        LOG.debug("Going to commit [" + fileStatus.getPath() + "] to [" + finalPath + "]");
    }

}

From source file:com.inmobi.conduit.distcp.tools.DistCp.java

License:Apache License

/**
 * Setup ssl configuration on the job configuration to enable hsftp access
 * from map job. Also copy the ssl configuration file to Distributed cache
 *
 * @param configuration - Reference to job's configruation handle
 * @throws java.io.IOException - Exception if unable to locate ssl config file
 *//*from   w w w .j a  v a2s . com*/
private void setupSSLConfig(Configuration configuration) throws IOException {

    Path sslConfigPath = new Path(configuration.getResource(inputOptions.getSslConfigurationFile()).toString());

    addSSLFilesToDistCache(configuration, sslConfigPath);
    configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName());
    configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName());
}

From source file:com.inmobi.conduit.distcp.tools.DistCp.java

License:Apache License

/**
 * Add SSL files to distributed cache. Trust store, key store and ssl config xml
 *
 * @param configuration - Job configuration
 * @param sslConfigPath - ssl Configuration file specified through options
 * @throws IOException - If any//w  w w. j  a va2s  .com
 */
private void addSSLFilesToDistCache(Configuration configuration, Path sslConfigPath) throws IOException {
    FileSystem localFS = FileSystem.getLocal(configuration);

    Configuration sslConf = new Configuration(false);
    sslConf.addResource(sslConfigPath);

    Path localStorePath = getLocalStorePath(sslConf, "ssl.client.truststore.location");
    DistributedCache.addCacheFile(localStorePath.makeQualified(localFS).toUri(), configuration);
    configuration.set("ssl.client.truststore.location", localStorePath.getName());

    localStorePath = getLocalStorePath(sslConf, "ssl.client.keystore.location");
    DistributedCache.addCacheFile(localStorePath.makeQualified(localFS).toUri(), configuration);
    configuration.set("ssl.client.keystore.location", localStorePath.getName());

    DistributedCache.addCacheFile(sslConfigPath.makeQualified(localFS).toUri(), configuration);
}

From source file:com.inmobi.conduit.distcp.tools.DistCp.java

License:Apache License

/**
 * Setup output format appropriately/*from  w w w  . j  a  va  2 s. c o  m*/
 *
 * @param job - Job handle
 * @throws IOException - Exception if any
 */
private void configureOutputFormat(Job job) throws IOException {
    final Configuration configuration = job.getConfiguration();
    Path targetPath = inputOptions.getTargetPath();
    targetPath = targetPath.makeQualified(targetPath.getFileSystem(configuration));

    if (inputOptions.shouldAtomicCommit()) {
        Path workDir = inputOptions.getAtomicWorkPath();
        if (workDir == null) {
            workDir = targetPath.getParent();
        }
        workDir = new Path(workDir, WIP_PREFIX + targetPath.getName() + rand.nextInt());
        FileSystem workFS = workDir.getFileSystem(configuration);
        FileSystem targetFS = targetPath.getFileSystem(configuration);
        if (!DistCpUtils.compareFs(targetFS, workFS)) {
            throw new IllegalArgumentException("Work path " + workDir + " and target path " + targetPath
                    + " are in different file system");
        }
        CopyOutputFormat.setWorkingDirectory(job, workDir);
    } else {
        CopyOutputFormat.setWorkingDirectory(job, targetPath);
    }
    CopyOutputFormat.setCommitDirectory(job, targetPath);

    Path counterFilePath = inputOptions.getOutPutDirectory();
    if (counterFilePath == null) {
        LOG.error("Output directory is null for distcp");
    } else {
        LOG.info("DistCp output directory path: " + counterFilePath);
        CopyOutputFormat.setOutputPath(job, counterFilePath);
    }

}