Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.chinamobile.bcbsp.client.BSPJobClient.java

License:Apache License

/**
 * Submit a new job to run.//from  w  ww .  j  a va2s . co m
 * @param job BSPJob
 * @return Review comments: (1)The content of submitJobDir is decided by the
 *         client. I think it is dangerous because two different clients maybe
 *         generate the same submitJobDir. Review time: 2011-11-30; Reviewer:
 *         Hongxu Zhang. Fix log: (1)In order to avoid the conflict, I use the
 *         jobId to generate the submitJobDir. Because the jobId is unique so
 *         this problem can be solved. Fix time: 2011-12-04; Programmer:
 *         Zhigang Wang. Review comments: (2)There, the client must submit
 *         relative information about the job. There maybe some exceptions
 *         during this process. When exceptions occur, this job should not be
 *         executed and the relative submitJobDir must be cleanup. Review
 *         time: 2011-12-04; Reviewer: Hongxu Zhang. Fix log: (2)The process
 *         of submiting files has been surrounded by try-catch. The
 *         submitJobDir will be cleanup in the catch process. Fix time:
 *         2011-12-04; Programmer: Zhigang Wang.
 */
public RunningJob submitJobInternal(BSPJob job) {
    BSPJobID jobId = null;
    Path submitJobDir = null;
    try {
        jobId = jobSubmitClient.getNewJobId();
        submitJobDir = new Path(getSystemDir(), "submit_" + jobId.toString());
        Path submitJarFile = null;
        LOG.info("debug: job type is " + job.getJobType());
        if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(job.getJobType())) {
            submitJarFile = new Path(submitJobDir, "jobC");
            LOG.info("debug:" + submitJarFile.toString());
        } else {
            LOG.info("debug: before  submitJarFile = new " + "Path(submitJobDir,job.jar);");
            submitJarFile = new Path(submitJobDir, "job.jar");
            LOG.info("debug:" + submitJarFile.toString());
        }
        Path submitJobFile = new Path(submitJobDir, "job.xml");
        Path submitSplitFile = new Path(submitJobDir, "job.split");
        // set this user's id in job configuration, so later job files can
        // be accessed using this user's id
        UnixUserGroupInformation ugi = getUGI(job.getConf());
        // Create a number of filenames in the BSPController's fs namespace
        FileSystem files = getFs();
        files.delete(submitJobDir, true);
        submitJobDir = files.makeQualified(submitJobDir);
        submitJobDir = new Path(submitJobDir.toUri().getPath());
        BSPFsPermission bspSysPerms = new BSPFspermissionImpl(2);
        FileSystem.mkdirs(files, submitJobDir, bspSysPerms.getFp());
        files.mkdirs(submitJobDir);
        short replication = (short) job.getInt("bsp.submit.replication", 10);
        String originalJarPath = null;
        LOG.info("debug: job type is " + job.getJobType());
        if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(job.getJobType())) {
            LOG.info("debug: originalJarPath = job.getJobExe();" + job.getJobExe());
            originalJarPath = job.getJobExe();
            LOG.info("debug:" + submitJarFile.toString());
            job.setJobExe(submitJarFile.toString());
        } else {
            LOG.info("debug: jar");
            originalJarPath = job.getJar();
            job.setJar(submitJarFile.toString());
        }
        if (originalJarPath != null) {
            // copy jar to BSPController's fs
            // use jar name if job is not named.
            if ("".equals(job.getJobName())) {
                job.setJobName(new Path(originalJarPath).getName());
            }
            // job.setJar(submitJarFile.toString());
            fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile);
            fs.setReplication(submitJarFile, replication);
            fs.setPermission(submitJarFile, new BSPFspermissionImpl(0).getFp());
        } else {
            LOG.warn("No job jar file set.  User classes may not be found. "
                    + "See BSPJob#setJar(String) or check Your jar file.");
        }
        // Set the user's name and working directory
        job.setUser(ugi.getUserName());
        if (ugi.getGroupNames().length > 0) {
            job.set("group.name", ugi.getGroupNames()[0]);
        }
        if (new BSPHdfsImpl().getWorkingDirectory() == null) {
            job.setWorkingDirectory(fs.getWorkingDirectory());
        }
        int maxClusterStaffs = jobSubmitClient.getClusterStatus(false).getMaxClusterStaffs();
        if (job.getNumPartition() == 0) {
            job.setNumPartition(maxClusterStaffs);
        }
        if (job.getNumPartition() > maxClusterStaffs) {
            job.setNumPartition(maxClusterStaffs);
        }
        job.setNumBspStaff(job.getNumPartition());
        int splitNum = 0;
        splitNum = writeSplits(job, submitSplitFile);
        if (splitNum > job.getNumPartition() && splitNum <= maxClusterStaffs) {
            job.setNumPartition(splitNum);
            job.setNumBspStaff(job.getNumPartition());
        }
        if (splitNum > maxClusterStaffs) {
            LOG.error("Sorry, the number of files is more than maxClusterStaffs:" + maxClusterStaffs);
            throw new IOException("Could not launch job");
        }
        job.set(Constants.USER_BC_BSP_JOB_SPLIT_FILE, submitSplitFile.toString());
        LOG.info("[Max Staff Number] " + maxClusterStaffs);
        LOG.info("The number of splits for the job is: " + splitNum);
        LOG.info("The number of staffs for the job is: " + job.getNumBspStaff());
        BSPFSDataOutputStream bspout = new BSPFSDataOutputStreamImpl(fs, submitJobFile,
                new BSPFspermissionImpl(0).getFp());
        try {
            job.writeXml(bspout.getOut());
        } finally {
            bspout.close();
        }
        // Now, actually submit the job (using the submit name)
        JobStatus status = jobSubmitClient.submitJob(jobId, submitJobFile.toString());
        if (status != null) {
            return new NetworkedJob(status);
        } else {
            throw new IOException("Could not launch job");
        }
    } catch (FileNotFoundException fnfE) {
        LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", fnfE);
        Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.INDETERMINATE, "null", fnfE.toString());
        jobSubmitClient.recordFault(f);
        jobSubmitClient.recovery(jobId);
        try {
            FileSystem files = getFs();
            files.delete(submitJobDir, true);
        } catch (IOException e) {
            //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir);
            throw new RuntimeException("Failed to cleanup the submitJobDir", e);
        }
        return null;
    } catch (ClassNotFoundException cnfE) {
        LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", cnfE);
        Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.WARNING, "null", cnfE.toString());
        jobSubmitClient.recordFault(f);
        jobSubmitClient.recovery(jobId);
        try {
            FileSystem files = getFs();
            files.delete(submitJobDir, true);
        } catch (IOException e) {
            //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir);
            throw new RuntimeException("Failed to cleanup the submitJobDir", e);
        }
        return null;
    } catch (InterruptedException iE) {
        LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", iE);
        Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.CRITICAL, "null", iE.toString());
        jobSubmitClient.recordFault(f);
        jobSubmitClient.recovery(jobId);
        try {
            FileSystem files = getFs();
            files.delete(submitJobDir, true);
        } catch (IOException e) {
            //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir);
            throw new RuntimeException("Failed to cleanup the submitJobDir", e);
        }
        return null;
    } catch (Exception ioE) {
        LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", ioE);
        Fault f = new Fault(Fault.Type.DISK, Fault.Level.CRITICAL, "null", ioE.toString());
        jobSubmitClient.recordFault(f);
        jobSubmitClient.recovery(jobId);
        try {
            FileSystem files = getFs();
            files.delete(submitJobDir, true);
        } catch (IOException e) {
            //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir);
            throw new RuntimeException("Failed to cleanup the submitJobDir", e);
        }
        return null;
    }
}

From source file:com.cloudera.cdk.data.TestDatasetDescriptor.java

License:Apache License

@Test
public void testSchemaFromHdfs() throws IOException {
    FileSystem fs = getDFS();//from w  w  w .  jav  a  2  s. co  m

    // copy a schema to HDFS
    Path schemaPath = fs.makeQualified(new Path("schema.avsc"));
    FSDataOutputStream out = fs.create(schemaPath);
    IOUtils.copyBytes(USER_SCHEMA_URL.toURL().openStream(), out, fs.getConf());
    out.close();

    // build a schema using the HDFS path and check it's the same
    Schema schema = new DatasetDescriptor.Builder().schemaUri(schemaPath.toUri()).build().getSchema();

    Assert.assertEquals(USER_SCHEMA, schema);
}

From source file:com.cloudera.cdk.tools.JobClasspathHelper.java

License:Apache License

/**
 * /* w ww  .ja v  a  2s  .c  o  m*/
 * @param conf
 *            Configuration object for the Job. Used to get the FileSystem associated with it.
 * @param libDir
 *            Destination directory in the FileSystem (Usually HDFS) where to upload and look for the libs.
 * @param classesToInclude
 *            Classes that are needed by the job. JarFinder will look for the jar containing these classes.
 * @throws Exception
 */
public void prepareClasspath(final Configuration conf, final Path libDir, Class<?>... classesToInclude)
        throws Exception {
    FileSystem fs = null;
    List<Class<?>> classList = new ArrayList<Class<?>>(Arrays.asList(classesToInclude));
    fs = FileSystem.get(conf);
    Map<String, String> jarMd5Map = new TreeMap<String, String>();
    // for each classes we use JarFinder to locate the jar in the local classpath.
    for (Class<?> clz : classList) {
        if (clz != null) {
            String localJarPath = JarFinder.getJar(clz);
            // we don't want to upload the same jar twice
            if (!jarMd5Map.containsKey(localJarPath)) {
                // We should not push core Hadoop classes with this tool.
                // Should it be the responsibility of the developer or we let
                // this fence here?
                if (!clz.getName().startsWith("org.apache.hadoop.")) {
                    // we compute the MD5 sum of the local jar
                    InputStream in = new FileInputStream(localJarPath);
                    boolean threw = true;
                    try {
                        String md5sum = DigestUtils.md5Hex(in);
                        jarMd5Map.put(localJarPath, md5sum);
                        threw = false;
                    } finally {
                        Closeables.close(in, threw);
                    }
                } else {
                    logger.info("Ignoring {}, since it looks like it's from Hadoop's core libs", localJarPath);
                }
            }
        }
    }

    for (Entry<String, String> entry : jarMd5Map.entrySet()) {
        Path localJarPath = new Path(entry.getKey());
        String jarFilename = localJarPath.getName();
        String localMd5sum = entry.getValue();
        logger.info("Jar {}. MD5 : [{}]", localJarPath, localMd5sum);

        Path remoteJarPath = new Path(libDir, jarFilename);
        Path remoteMd5Path = new Path(libDir, jarFilename + ".md5");

        // If the jar file does not exist in HDFS or if the MD5 file does not exist in HDFS,
        // we force the upload of the jar.
        if (!fs.exists(remoteJarPath) || !fs.exists(remoteMd5Path)) {
            copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path);
        } else {
            // If the jar exist,we validate the MD5 file.
            // If the MD5 sum is different, we upload the jar
            FSDataInputStream md5FileStream = null;

            String remoteMd5sum = "";
            try {
                md5FileStream = fs.open(remoteMd5Path);
                byte[] md5bytes = new byte[32];
                if (32 == md5FileStream.read(md5bytes)) {
                    remoteMd5sum = new String(md5bytes, Charsets.UTF_8);
                }
            } finally {
                if (md5FileStream != null) {
                    md5FileStream.close();
                }
            }

            if (localMd5sum.equals(remoteMd5sum)) {
                logger.info("Jar {} already exists [{}] and md5sum are equals", jarFilename,
                        remoteJarPath.toUri().toASCIIString());
            } else {
                logger.info("Jar {} already exists [{}] and md5sum are different!", jarFilename,
                        remoteJarPath.toUri().toASCIIString());
                copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path);
            }

        }
        // In all case we want to add the jar to the DistributedCache's classpath
        DistributedCache.addFileToClassPath(remoteJarPath, conf, fs);
    }
    // and we create the symlink (was necessary in earlier versions of Hadoop)
    DistributedCache.createSymlink(conf);
}

From source file:com.cloudera.cdk.tools.JobClasspathHelper.java

License:Apache License

/**
 * @param fs/*from w w  w. j ava 2 s .c o m*/
 *            File system where to upload the jar.
 * @param localJarPath
 *            The local path where we find the jar.
 * @param md5sum
 *            The MD5 sum of the local jar.
 * @param remoteJarPath
 *            The remote path where to upload the jar.
 * @param remoteMd5Path
 *            The remote path where to create the MD5 file.
 * 
 * @throws IOException
 */
private void copyJarToHDFS(FileSystem fs, Path localJarPath, String md5sum, Path remoteJarPath,
        Path remoteMd5Path) throws IOException {

    logger.info("Copying {} to {}", localJarPath.toUri().toASCIIString(),
            remoteJarPath.toUri().toASCIIString());
    fs.copyFromLocalFile(localJarPath, remoteJarPath);
    // create the MD5 file for this jar.
    createMd5SumFile(fs, md5sum, remoteMd5Path);

    // we need to clean the tmp files that are are created by JarFinder after the JVM exits.
    if (remoteJarPath.getName().startsWith(JarFinder.TMP_HADOOP)) {
        fs.deleteOnExit(remoteJarPath);
    }
    // same for the MD5 file.
    if (remoteMd5Path.getName().startsWith(JarFinder.TMP_HADOOP)) {
        fs.deleteOnExit(remoteMd5Path);
    }
}

From source file:com.cloudera.crunch.util.DistCache.java

License:Open Source License

public static void write(Configuration conf, Path path, Object value) throws IOException {
    ObjectOutputStream oos = new ObjectOutputStream(FileSystem.get(conf).create(path));
    oos.writeObject(value);/*www  .  j  a  va 2  s.  c o  m*/
    oos.close();

    DistributedCache.addCacheFile(path.toUri(), conf);
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.handlers.WRITEHandler.java

License:Apache License

@Override
protected WRITEResponse doHandle(NFS4Handler server, Session session, WRITERequest request)
        throws NFS4Exception, IOException {
    if (session.getCurrentFileHandle() == null) {
        throw new NFS4Exception(NFS4ERR_NOFILEHANDLE);
    }//from  w  w w .j  av  a  2 s .c  o  m

    FileHandle fileHandle = session.getCurrentFileHandle();
    Path path = server.getPath(fileHandle);
    String file = path.toUri().getPath();
    FSDataOutputStream out = server.forWrite(request.getStateID(), session.getFileSystem(), fileHandle, false);

    LOGGER.info(session.getSessionID() + " xid = " + session.getXID() + ", write accepted " + file + " "
            + request.getOffset());

    WriteOrderHandler writeOrderHandler = server.getWriteOrderHandler(file, out);
    boolean sync = request.getStable() != NFS4_COMMIT_UNSTABLE4;
    int count = writeOrderHandler.write(path.toUri().getPath(), session.getXID(), request.getOffset(), sync,
            request.getData(), request.getStart(), request.getLength());

    WRITEResponse response = createResponse();
    OpaqueData8 verifer = new OpaqueData8();
    verifer.setData(Bytes.toBytes(server.getStartTime()));
    response.setVerifer(verifer);
    server.incrementMetric("HDFS_BYTES_WRITE", count);
    response.setCount(count);
    response.setStatus(NFS4_OK);
    return response;
}

From source file:com.cloudera.hadoop.hdfs.nfs.PathUtils.java

License:Apache License

public static String realPath(Path path) {
    return path.toUri().getPath();
}

From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java

License:Open Source License

private void testOpen() throws Exception {
    FileSystem fs = FileSystem.get(getHadoopConf());
    Path path = new Path(getHadoopTestDir(), "foo.txt");
    OutputStream os = fs.create(path);
    os.write(1);/*from  w ww  . j  a  v  a  2  s  .  c o  m*/
    os.close();
    fs.close();
    Configuration conf = new Configuration();
    conf.set("fs.http.impl", HoopFileSystem.class.getName());
    fs = FileSystem.get(getJettyURL().toURI(), conf);
    InputStream is = fs.open(new Path(path.toUri().getPath()));
    Assert.assertEquals(is.read(), 1);
    is.close();
    fs.close();
}

From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java

License:Open Source License

private void testCreate(Path path, boolean override) throws Exception {
    Configuration conf = new Configuration();
    conf.set("fs.http.impl", HoopFileSystem.class.getName());
    FileSystem fs = FileSystem.get(getJettyURL().toURI(), conf);
    FsPermission permission = new FsPermission(FsAction.READ_WRITE, FsAction.NONE, FsAction.NONE);
    OutputStream os = fs.create(new Path(path.toUri().getPath()), permission, override, 1024, (short) 2,
            100 * 1024 * 1024, null);//from w ww .  j  ava2s.  c o  m
    os.write(1);
    os.close();
    fs.close();

    fs = FileSystem.get(getHadoopConf());
    FileStatus status = fs.getFileStatus(path);
    Assert.assertEquals(status.getReplication(), 2);
    Assert.assertEquals(status.getBlockSize(), 100 * 1024 * 1024);
    Assert.assertEquals(status.getPermission(), permission);
    InputStream is = fs.open(path);
    Assert.assertEquals(is.read(), 1);
    is.close();
    fs.close();
}

From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java

License:Open Source License

private void testAppend() throws Exception {
    FileSystem fs = FileSystem.get(getHadoopConf());
    Path path = new Path(getHadoopTestDir(), "foo.txt");
    OutputStream os = fs.create(path);
    os.write(1);/*from  w w  w .j a va2 s  . c  o m*/
    os.close();
    fs.close();
    Configuration conf = new Configuration();
    conf.set("fs.http.impl", HoopFileSystem.class.getName());
    fs = FileSystem.get(getJettyURL().toURI(), conf);
    os = fs.append(new Path(path.toUri().getPath()));
    os.write(2);
    os.close();
    fs.close();
    fs = FileSystem.get(getHadoopConf());
    InputStream is = fs.open(path);
    Assert.assertEquals(is.read(), 1);
    Assert.assertEquals(is.read(), 2);
    Assert.assertEquals(is.read(), -1);
    is.close();
    fs.close();
}