Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.facebook.presto.hive.PrestoS3FileSystem.java

License:Apache License

private static String keyFromPath(Path path) {
    checkArgument(path.isAbsolute(), "Path is not absolute: %s", path);
    String key = nullToEmpty(path.toUri().getPath());
    if (key.startsWith("/")) {
        key = key.substring(1);/*from   w w w.  j a va2s .c o  m*/
    }
    if (key.endsWith("/")) {
        key = key.substring(0, key.length() - 1);
    }
    return key;
}

From source file:com.facebook.presto.hive.s3.PrestoS3FileSystem.java

License:Apache License

private static String keyFromPath(Path path) {
    checkArgument(path.isAbsolute(), "Path is not absolute: %s", path);
    String key = nullToEmpty(path.toUri().getPath());
    if (key.startsWith(PATH_SEPARATOR)) {
        key = key.substring(PATH_SEPARATOR.length());
    }/*from   w  w  w  .  j a va 2  s  .  c  o  m*/
    if (key.endsWith(PATH_SEPARATOR)) {
        key = key.substring(0, key.length() - PATH_SEPARATOR.length());
    }
    return key;
}

From source file:com.facebook.presto.hive.S3SelectCsvRecordReader.java

License:Apache License

@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path) {
    SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
    URI uri = path.toUri();
    selectObjectRequest.setBucketName(PrestoS3FileSystem.getBucketName(uri));
    selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
    selectObjectRequest.setExpression(query);
    selectObjectRequest.setExpressionType(ExpressionType.SQL);

    String fieldDelimiter = getFieldDelimiter(schema);
    String quoteChar = schema.getProperty(QUOTE_CHAR, null);
    String escapeChar = schema.getProperty(ESCAPE_CHAR, null);

    CSVInput selectObjectCSVInputSerialization = new CSVInput();
    selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
    selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
    InputSerialization selectObjectInputSerialization = new InputSerialization();

    CompressionCodec codec = compressionCodecFactory.getCodec(path);
    if (codec instanceof GzipCodec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.GZIP);
    } else if (codec instanceof BZip2Codec) {
        selectObjectInputSerialization.setCompressionType(CompressionType.BZIP2);
    } else if (codec != null) {
        throw new PrestoException(NOT_SUPPORTED, "Compression extension not supported for S3 Select: " + path);
    }//from  w w  w.  j  a  v a  2  s .  co  m

    selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
    selectObjectRequest.setInputSerialization(selectObjectInputSerialization);

    OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
    CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
    selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
    selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
    selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
    selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
    selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
    selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);

    return selectObjectRequest;
}

From source file:com.flipkart.fdp.migration.distcp.core.MirrorUtils.java

License:Apache License

public static String getSimplePath(Path path) {
    return path.toUri().getPath();
}

From source file:com.flipkart.fdp.migration.distcp.utils.HistoricFileCleanUpDriver.java

License:Apache License

public List<String> getAllFilePath(Path filePath) throws IOException {
    List<String> fileList = new ArrayList<String>();
    FileStatus[] fileStatus = fs.listStatus(filePath);
    for (FileStatus fileStat : fileStatus) {
        if (fileStat.isDirectory()) {
            if (fileStat.getModificationTime() >= startTS && fileStat.getModificationTime() <= endTS
                    && filePath.toUri().getPath().toString() != rootpath)
                fileList.add(fileStat.getPath().toUri().getPath());
            else//from   w ww  .ja  va2  s  . com
                fileList.addAll(getAllFilePath(fileStat.getPath()));
        }
        // } else {
        // if (fileStat.getModificationTime() >= startTS
        // && fileStat.getModificationTime() <= endTS)
        // fileList.add(fileStat.getPath().toUri().getPath());
        // }
    }
    return fileList;
}

From source file:com.flyhz.avengers.framework.AvengersClient.java

License:Apache License

/**
 * Main run function for the client/*from   w  w  w.  j a  va2 s  . c  o  m*/
 * 
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
private boolean run(String appName, List<String> commands) throws IOException, YarnException {

    LOG.info("Running Client");

    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource
    // manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();
    appContext.setApplicationName(appName);

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of
    // the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    FileSystem fs = DistributedFileSystem.get(conf);
    Path src = new Path(appJar);
    Path dst = new Path(fs.getHomeDirectory(), "avengers/" + batchId + "/avengers.jar");
    if (copy) {
        LOG.info("copy local jar to hdfs");
        fs.copyFromLocalFile(false, true, src, dst);
        copy = false;
    }
    this.hdfsPath = dst.toUri().toString();
    LOG.info("hdfs hdfsPath = {}", dst);
    FileStatus destStatus = fs.getFileStatus(dst);
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);

    amJarRsrc.setType(LocalResourceType.FILE);
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    LOG.info("YarnURLFromPath ->{}", ConverterUtils.getYarnUrlFromPath(dst));
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    amJarRsrc.setTimestamp(destStatus.getModificationTime());
    amJarRsrc.setSize(destStatus.getLen());
    localResources.put("avengers.jar", amJarRsrc);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        Path log4jSrc = new Path(log4jPropFile);
        Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props");
        fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
        FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
        LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
        log4jRsrc.setType(LocalResourceType.FILE);
        log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
        log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
        log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
        log4jRsrc.setSize(log4jFileStatus.getLen());
        localResources.put("log4j.properties", log4jRsrc);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.

    // Set local resource info into app master container launch context
    amContainer.setLocalResources(localResources);

    // Set the necessary security tokens as needed
    // amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application
    // master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar);
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
        classPathEnv.append(File.pathSeparatorChar);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }
    LOG.info("CLASSPATH -> " + classPathEnv);
    env.put("CLASSPATH", classPathEnv.toString());

    amContainer.setEnvironment(env);

    for (String cmd : commands) {
        LOG.info("run command {},appId {}", cmd, appId.getId());
    }

    amContainer.setCommands(commands);

    // Set up resource type requirements
    // For now, only memory is supported so we set memory requirements
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(amMemory);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Records.newRecord(Priority.class);
    pri.setPriority(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp =
    // applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on
    // success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:com.github.hdl.tensorflow.yarn.app.TFAmContainer.java

License:Apache License

public void addToLocalResources(FileSystem fs, Path dst, String fileDstPath,
        Map<String, LocalResource> localResources) throws IOException {
    FileStatus scFileStatus = fs.getFileStatus(dst);
    LocalResource scRsrc = LocalResource.newInstance(URL.fromURI(dst.toUri()), LocalResourceType.FILE,
            LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime());
    localResources.put(fileDstPath, scRsrc);
}

From source file:com.github.hdl.tensorflow.yarn.app.TFContainer.java

License:Apache License

public void addToLocalResources(FileSystem fs, Path dst, String fileDstPath,
        Map<String, LocalResource> localResources) throws IOException {
    FileStatus scFileStatus = fs.getFileStatus(dst);
    LOG.info("Path " + dst.toString() + "->" + " " + fileDstPath);
    LocalResource scRsrc = LocalResource.newInstance(URL.fromURI(dst.toUri()), LocalResourceType.FILE,
            LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime());
    localResources.put(fileDstPath, scRsrc);
}

From source file:com.github.hdl.tensorflow.yarn.app.TFContainer.java

License:Apache License

public void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId,
        Map<String, LocalResource> localResources, String resources) throws IOException {

    execCmd("pwd");
    execCmd("ls -l");
    String suffix = appName + "/" + appId + "/" + fileDstPath;
    Path dst = new Path(fs.getHomeDirectory(), suffix);
    LOG.info("copy: " + fileSrcPath + " ===> " + dst.toString());
    if (fileSrcPath == null) {
        FSDataOutputStream ostream = null;
        try {//from w ww.  j  av  a  2  s . c  o  m
            ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710));
            ostream.writeUTF(resources);
        } finally {
            IOUtils.closeQuietly(ostream);
        }
    } else {
        fs.copyFromLocalFile(new Path(fileSrcPath), dst);
    }

    FileStatus scFileStatus = fs.getFileStatus(dst);
    LocalResource scRsrc = LocalResource.newInstance(URL.fromURI(dst.toUri()), LocalResourceType.FILE,
            LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime());
    localResources.put(fileDstPath, scRsrc);
}

From source file:com.github.sakserv.minicluster.yarn.InJvmContainerExecutor.java

License:Apache License

/**
 * Extracts {@link LocalResource}s from the {@link Container}.
 *///  ww  w .  ja  v  a 2 s.co m
@SuppressWarnings("unchecked")
private Set<Path> extractUserProvidedClassPathEntries(Container container) {
    Map<Path, List<String>> localizedResources;
    try {
        Field lf = container.getClass().getDeclaredField("localizedResources");
        lf.setAccessible(true);
        localizedResources = (Map<Path, List<String>>) lf.get(container);
        Set<Path> paths = localizedResources.keySet();
        // Needed for Tez
        for (Path path : paths) {
            if (path.toString().endsWith("tez-conf.pb") || path.toString().endsWith("tez-dag.pb")) {
                File sourceFile = new File(path.toUri());

                File targetFile = new File(System.getenv(Environment.PWD.name()) + "/" + sourceFile.getName());
                FileUtils.copyFile(sourceFile, targetFile);

                //         System.out.println("######## Copied file: " + targetFile);
                //         FileInputStream fis = new FileInputStream(new File(System.getenv(Environment.PWD.name()), targetFile.getName()));
                //         System.out.println(fis.available());
                //         fis.close();
                //         break;
            }
        }
        return paths;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}