Example usage for org.apache.hadoop.fs Path makeQualified

List of usage examples for org.apache.hadoop.fs Path makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path makeQualified.

Prototype

@Deprecated
public Path makeQualified(FileSystem fs) 

Source Link

Document

Returns a qualified path object for the FileSystem 's working directory.

Usage

From source file:com.indeed.imhotep.builder.tsv.EasyIndexBuilderFromTSV.java

License:Apache License

private BufferedReader getInputFileReader(Path inputFile) {
    try {//from  w  w  w  .j  a va2s. c o  m
        final FileSystem hdfs = getHDFS(inputFile);
        final Path qualifiedInputFile = inputFile.makeQualified(hdfs);
        if (!hdfs.exists(inputFile)) {
            throw new RuntimeException("The provided input file doesn't exist " + qualifiedInputFile
                    + "\nFor hdfs files use 'hdfs:' prefix like hdfs:/tmp/file.tsv");
        }
        log.info("Reading TSV data from " + qualifiedInputFile);
        InputStream inputStream = hdfs.open(inputFile);
        if (inputFile.getName().endsWith(".gz")) {
            inputStream = new GZIPInputStream(inputStream);
        }
        return new BufferedReader(new InputStreamReader(inputStream, Charsets.UTF_8));
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java

License:Apache License

private void checkPathExists(Path path) {
    boolean exists;
    FileSystem fs;/*from   w  w w .j  av a  2s .com*/
    Path qualifiedPath = path;
    try {
        fs = getFS(path);
        qualifiedPath = path.makeQualified(fs);
        exists = fs.exists(path);
    } catch (Exception e) {
        exists = false;
    }
    if (!exists) {
        throw new RuntimeException("The provided path doesn't exist " + qualifiedPath.toString()
                + "\nFor hdfs files use 'hdfs:' prefix like hdfs:/tmp/file.tsv"
                + "\nFor local files use 'file://' prefix like file:/tmp/file.tsv");
    }
}

From source file:com.inmobi.conduit.distcp.MirrorStreamService.java

License:Apache License

LinkedHashMap<FileStatus, Path> prepareForCommit(Path tmpOut) throws Exception {
    /*//w  ww  .j ava2 s.  c  o m
     * tmpOut would be like -
     * /conduit/system/tmp/distcp_mirror_<srcCluster>_<destCluster>/ After
     * distcp paths inside tmpOut would be eg:
     *
     * /conduit/system/distcp_mirror_<srcCluster>_<destCluster>
     * /conduit/streams/<streamName>/2012/1/13/15/7/
     * <hostname>-<streamName>-2012-01-16-07-21_00000.gz
     *
     * tmpStreamRoot eg: /conduit/system/distcp_mirror_<srcCluster>_
     * <destCluster>/conduit/streams/
     */

    Path tmpStreamRoot = new Path(tmpOut.makeQualified(getDestFs()).toString() + File.separator
            + getSrcCluster().getUnqaulifiedReadUrlFinalDestDirRoot());
    LOG.debug("tmpStreamRoot [" + tmpStreamRoot + "]");

    /* tmpStreamRoot eg -
     * /conduit/system/tmp/distcp_mirror_<srcCluster>_<destCluster>/conduit
     * /streams/
     *
     * multiple streams can get mirrored from the same cluster
     * streams can get processed in any order but we have to retain order
     * of paths within a stream*/
    FileStatus[] fileStatuses = null;
    try {
        fileStatuses = getDestFs().listStatus(tmpStreamRoot);
    } catch (FileNotFoundException e) {
    }
    //Retain the order of commitPaths
    LinkedHashMap<FileStatus, Path> commitPaths = new LinkedHashMap<FileStatus, Path>();
    if (fileStatuses != null) {
        for (FileStatus streamRoot : fileStatuses) {
            //for each stream : list the path in order of YYYY/mm/DD/HH/MM
            LOG.debug("StreamRoot [" + streamRoot.getPath() + "] streamName [" + streamRoot.getPath().getName()
                    + "]");
            List<FileStatus> streamPaths = new ArrayList<FileStatus>();
            createListing(getDestFs(), streamRoot, streamPaths);
            Collections.sort(streamPaths, new DatePathComparator());
            LOG.debug("createListing size: [" + streamPaths.size() + "]");
            createCommitPaths(commitPaths, streamPaths);
        }
    }
    return commitPaths;
}

From source file:com.inmobi.conduit.distcp.tools.DistCp.java

License:Apache License

/**
 * Add SSL files to distributed cache. Trust store, key store and ssl config xml
 *
 * @param configuration - Job configuration
 * @param sslConfigPath - ssl Configuration file specified through options
 * @throws IOException - If any/*from w w  w .  ja  v  a  2 s.  c o  m*/
 */
private void addSSLFilesToDistCache(Configuration configuration, Path sslConfigPath) throws IOException {
    FileSystem localFS = FileSystem.getLocal(configuration);

    Configuration sslConf = new Configuration(false);
    sslConf.addResource(sslConfigPath);

    Path localStorePath = getLocalStorePath(sslConf, "ssl.client.truststore.location");
    DistributedCache.addCacheFile(localStorePath.makeQualified(localFS).toUri(), configuration);
    configuration.set("ssl.client.truststore.location", localStorePath.getName());

    localStorePath = getLocalStorePath(sslConf, "ssl.client.keystore.location");
    DistributedCache.addCacheFile(localStorePath.makeQualified(localFS).toUri(), configuration);
    configuration.set("ssl.client.keystore.location", localStorePath.getName());

    DistributedCache.addCacheFile(sslConfigPath.makeQualified(localFS).toUri(), configuration);
}

From source file:com.inmobi.conduit.distcp.tools.DistCp.java

License:Apache License

/**
 * Setup output format appropriately/*from   w w w .j av a2  s . c  o  m*/
 *
 * @param job - Job handle
 * @throws IOException - Exception if any
 */
private void configureOutputFormat(Job job) throws IOException {
    final Configuration configuration = job.getConfiguration();
    Path targetPath = inputOptions.getTargetPath();
    targetPath = targetPath.makeQualified(targetPath.getFileSystem(configuration));

    if (inputOptions.shouldAtomicCommit()) {
        Path workDir = inputOptions.getAtomicWorkPath();
        if (workDir == null) {
            workDir = targetPath.getParent();
        }
        workDir = new Path(workDir, WIP_PREFIX + targetPath.getName() + rand.nextInt());
        FileSystem workFS = workDir.getFileSystem(configuration);
        FileSystem targetFS = targetPath.getFileSystem(configuration);
        if (!DistCpUtils.compareFs(targetFS, workFS)) {
            throw new IllegalArgumentException("Work path " + workDir + " and target path " + targetPath
                    + " are in different file system");
        }
        CopyOutputFormat.setWorkingDirectory(job, workDir);
    } else {
        CopyOutputFormat.setWorkingDirectory(job, targetPath);
    }
    CopyOutputFormat.setCommitDirectory(job, targetPath);

    Path counterFilePath = inputOptions.getOutPutDirectory();
    if (counterFilePath == null) {
        LOG.error("Output directory is null for distcp");
    } else {
        LOG.info("DistCp output directory path: " + counterFilePath);
        CopyOutputFormat.setOutputPath(job, counterFilePath);
    }

}

From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java

License:Apache License

private Path makeQualified(Path path) throws IOException {
    return path.makeQualified(path.getFileSystem(getConf()));
}

From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java

License:Apache License

private static void verifyResults() throws Exception {
    for (Path path : pathList) {
        FileSystem fs = cluster.getFileSystem();

        Path sourcePath = path.makeQualified(fs);
        Path targetPath = new Path(sourcePath.toString().replaceAll(SOURCE_PATH, TARGET_PATH));

        Assert.assertTrue(fs.exists(targetPath));
        Assert.assertEquals(fs.isFile(sourcePath), fs.isFile(targetPath));
    }/*w  w  w.j  a  v a2s . com*/
}

From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java

License:Apache License

private void createMockForFileSystem(FileSystem fs, Cluster cluster) throws Exception {
    FileStatus[] files = createTestData(2, "/conduit/data/stream", true);

    FileStatus[] stream1 = createTestData(2, "/conduit/data/stream1/collector", true);

    FileStatus[] stream3 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream1/collector1/file", true);

    FileStatus[] stream4 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream1/collector2/file", true);

    FileStatus[] stream2 = createTestData(2, "/conduit/data/stream2/collector", true);

    FileStatus[] stream5 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream2/collector1/file", true);

    FileStatus[] stream6 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream2/collector2/file", true);

    when(fs.getWorkingDirectory()).thenReturn(new Path("/tmp/"));
    when(fs.getUri()).thenReturn(new URI("localhost"));
    when(fs.listStatus(cluster.getDataDir())).thenReturn(files);
    when(fs.listStatus(new Path("/conduit/data/stream1"))).thenReturn(stream1);

    when(fs.listStatus(new Path("/conduit/data/stream1/collector1"),
            any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream3);
    when(fs.listStatus(new Path("/conduit/data/stream2"))).thenReturn(stream2);
    when(fs.listStatus(new Path("/conduit/data/stream1/collector2"),
            any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream4);
    when(fs.listStatus(new Path("/conduit/data/stream2/collector1"),
            any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream5);
    when(fs.listStatus(new Path("/conduit/data/stream2/collector2"),
            any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream6);

    Path file = mock(Path.class);
    when(file.makeQualified(any(FileSystem.class))).thenReturn(new Path("/conduit/data/stream1/collector1/"));
}

From source file:com.inmobi.databus.distcp.DistcpBaseService.java

License:Apache License

private Path getFinalPathForDistCP(Path tmpPath, Map<Path, FileSystem> consumePaths) throws IOException {
    if (tmpPath != null) {
        LOG.warn("Source File For distCP [" + tmpPath + "]");
        consumePaths.put(tmpPath.makeQualified(destFs), destFs);
        return tmpPath.makeQualified(destFs);
    } else {//from  w  w  w.j  av  a  2  s.  c  om
        /*
        * no valid paths to return.
        */
        return null;
    }
}

From source file:com.inmobi.databus.distcp.MergedStreamService.java

License:Apache License

private Map<String, List<Path>> prepareForCommit(Path tmpOut) throws Exception {
    Map<String, List<Path>> categoriesToCommit = new HashMap<String, List<Path>>();
    FileStatus[] allFiles = getDestFs().listStatus(tmpOut);
    for (int i = 0; i < allFiles.length; i++) {
        String fileName = allFiles[i].getPath().getName();
        if (fileName != null) {
            String category = getCategoryFromFileName(fileName);
            if (category != null) {
                Path intermediatePath = new Path(tmpOut, category);
                if (!getDestFs().exists(intermediatePath))
                    getDestFs().mkdirs(intermediatePath);
                Path source = allFiles[i].getPath().makeQualified(getDestFs());

                Path intermediateFilePath = new Path(
                        intermediatePath.makeQualified(getDestFs()).toString() + File.separator + fileName);
                if (getDestFs().rename(source, intermediateFilePath) == false) {
                    LOG.warn("Failed to Rename [" + source + "] to [" + intermediateFilePath + "]");
                    LOG.warn("Aborting Tranasction prepareForCommit to avoid data "
                            + "LOSS. Retry would happen in next run");
                    throw new Exception("Rename [" + source + "] to [" + intermediateFilePath + "]");
                }/*from  ww  w  . ja  va2 s  .co  m*/
                LOG.debug("Moving [" + source + "] to intermediateFilePath [" + intermediateFilePath + "]");
                List<Path> fileList = categoriesToCommit.get(category);
                if (fileList == null) {
                    fileList = new ArrayList<Path>();
                    fileList.add(intermediateFilePath.makeQualified(getDestFs()));
                    categoriesToCommit.put(category, fileList);
                } else {
                    fileList.add(intermediateFilePath);
                }
            }
        }
    }
    return categoriesToCommit;
}