List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:com.chinamobile.bcbsp.client.BSPJobClient.java
License:Apache License
/** * Submit a new job to run.//from w ww . j a va2s . co m * @param job BSPJob * @return Review comments: (1)The content of submitJobDir is decided by the * client. I think it is dangerous because two different clients maybe * generate the same submitJobDir. Review time: 2011-11-30; Reviewer: * Hongxu Zhang. Fix log: (1)In order to avoid the conflict, I use the * jobId to generate the submitJobDir. Because the jobId is unique so * this problem can be solved. Fix time: 2011-12-04; Programmer: * Zhigang Wang. Review comments: (2)There, the client must submit * relative information about the job. There maybe some exceptions * during this process. When exceptions occur, this job should not be * executed and the relative submitJobDir must be cleanup. Review * time: 2011-12-04; Reviewer: Hongxu Zhang. Fix log: (2)The process * of submiting files has been surrounded by try-catch. The * submitJobDir will be cleanup in the catch process. Fix time: * 2011-12-04; Programmer: Zhigang Wang. */ public RunningJob submitJobInternal(BSPJob job) { BSPJobID jobId = null; Path submitJobDir = null; try { jobId = jobSubmitClient.getNewJobId(); submitJobDir = new Path(getSystemDir(), "submit_" + jobId.toString()); Path submitJarFile = null; LOG.info("debug: job type is " + job.getJobType()); if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(job.getJobType())) { submitJarFile = new Path(submitJobDir, "jobC"); LOG.info("debug:" + submitJarFile.toString()); } else { LOG.info("debug: before submitJarFile = new " + "Path(submitJobDir,job.jar);"); submitJarFile = new Path(submitJobDir, "job.jar"); LOG.info("debug:" + submitJarFile.toString()); } Path submitJobFile = new Path(submitJobDir, "job.xml"); Path submitSplitFile = new Path(submitJobDir, "job.split"); // set this user's id in job configuration, so later job files can // be accessed using this user's id UnixUserGroupInformation ugi = getUGI(job.getConf()); // Create a number of filenames in the BSPController's fs namespace FileSystem files = getFs(); files.delete(submitJobDir, true); submitJobDir = files.makeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.toUri().getPath()); BSPFsPermission bspSysPerms = new BSPFspermissionImpl(2); FileSystem.mkdirs(files, submitJobDir, bspSysPerms.getFp()); files.mkdirs(submitJobDir); short replication = (short) job.getInt("bsp.submit.replication", 10); String originalJarPath = null; LOG.info("debug: job type is " + job.getJobType()); if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(job.getJobType())) { LOG.info("debug: originalJarPath = job.getJobExe();" + job.getJobExe()); originalJarPath = job.getJobExe(); LOG.info("debug:" + submitJarFile.toString()); job.setJobExe(submitJarFile.toString()); } else { LOG.info("debug: jar"); originalJarPath = job.getJar(); job.setJar(submitJarFile.toString()); } if (originalJarPath != null) { // copy jar to BSPController's fs // use jar name if job is not named. if ("".equals(job.getJobName())) { job.setJobName(new Path(originalJarPath).getName()); } // job.setJar(submitJarFile.toString()); fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile); fs.setReplication(submitJarFile, replication); fs.setPermission(submitJarFile, new BSPFspermissionImpl(0).getFp()); } else { LOG.warn("No job jar file set. User classes may not be found. " + "See BSPJob#setJar(String) or check Your jar file."); } // Set the user's name and working directory job.setUser(ugi.getUserName()); if (ugi.getGroupNames().length > 0) { job.set("group.name", ugi.getGroupNames()[0]); } if (new BSPHdfsImpl().getWorkingDirectory() == null) { job.setWorkingDirectory(fs.getWorkingDirectory()); } int maxClusterStaffs = jobSubmitClient.getClusterStatus(false).getMaxClusterStaffs(); if (job.getNumPartition() == 0) { job.setNumPartition(maxClusterStaffs); } if (job.getNumPartition() > maxClusterStaffs) { job.setNumPartition(maxClusterStaffs); } job.setNumBspStaff(job.getNumPartition()); int splitNum = 0; splitNum = writeSplits(job, submitSplitFile); if (splitNum > job.getNumPartition() && splitNum <= maxClusterStaffs) { job.setNumPartition(splitNum); job.setNumBspStaff(job.getNumPartition()); } if (splitNum > maxClusterStaffs) { LOG.error("Sorry, the number of files is more than maxClusterStaffs:" + maxClusterStaffs); throw new IOException("Could not launch job"); } job.set(Constants.USER_BC_BSP_JOB_SPLIT_FILE, submitSplitFile.toString()); LOG.info("[Max Staff Number] " + maxClusterStaffs); LOG.info("The number of splits for the job is: " + splitNum); LOG.info("The number of staffs for the job is: " + job.getNumBspStaff()); BSPFSDataOutputStream bspout = new BSPFSDataOutputStreamImpl(fs, submitJobFile, new BSPFspermissionImpl(0).getFp()); try { job.writeXml(bspout.getOut()); } finally { bspout.close(); } // Now, actually submit the job (using the submit name) JobStatus status = jobSubmitClient.submitJob(jobId, submitJobFile.toString()); if (status != null) { return new NetworkedJob(status); } else { throw new IOException("Could not launch job"); } } catch (FileNotFoundException fnfE) { LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", fnfE); Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.INDETERMINATE, "null", fnfE.toString()); jobSubmitClient.recordFault(f); jobSubmitClient.recovery(jobId); try { FileSystem files = getFs(); files.delete(submitJobDir, true); } catch (IOException e) { //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir); throw new RuntimeException("Failed to cleanup the submitJobDir", e); } return null; } catch (ClassNotFoundException cnfE) { LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", cnfE); Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.WARNING, "null", cnfE.toString()); jobSubmitClient.recordFault(f); jobSubmitClient.recovery(jobId); try { FileSystem files = getFs(); files.delete(submitJobDir, true); } catch (IOException e) { //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir); throw new RuntimeException("Failed to cleanup the submitJobDir", e); } return null; } catch (InterruptedException iE) { LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", iE); Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.CRITICAL, "null", iE.toString()); jobSubmitClient.recordFault(f); jobSubmitClient.recovery(jobId); try { FileSystem files = getFs(); files.delete(submitJobDir, true); } catch (IOException e) { //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir); throw new RuntimeException("Failed to cleanup the submitJobDir", e); } return null; } catch (Exception ioE) { LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", ioE); Fault f = new Fault(Fault.Type.DISK, Fault.Level.CRITICAL, "null", ioE.toString()); jobSubmitClient.recordFault(f); jobSubmitClient.recovery(jobId); try { FileSystem files = getFs(); files.delete(submitJobDir, true); } catch (IOException e) { //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir); throw new RuntimeException("Failed to cleanup the submitJobDir", e); } return null; } }
From source file:com.cloudera.cdk.data.TestDatasetDescriptor.java
License:Apache License
@Test public void testSchemaFromHdfs() throws IOException { FileSystem fs = getDFS();//from w w w . jav a 2 s. co m // copy a schema to HDFS Path schemaPath = fs.makeQualified(new Path("schema.avsc")); FSDataOutputStream out = fs.create(schemaPath); IOUtils.copyBytes(USER_SCHEMA_URL.toURL().openStream(), out, fs.getConf()); out.close(); // build a schema using the HDFS path and check it's the same Schema schema = new DatasetDescriptor.Builder().schemaUri(schemaPath.toUri()).build().getSchema(); Assert.assertEquals(USER_SCHEMA, schema); }
From source file:com.cloudera.cdk.tools.JobClasspathHelper.java
License:Apache License
/** * /* w ww .ja v a 2s .c o m*/ * @param conf * Configuration object for the Job. Used to get the FileSystem associated with it. * @param libDir * Destination directory in the FileSystem (Usually HDFS) where to upload and look for the libs. * @param classesToInclude * Classes that are needed by the job. JarFinder will look for the jar containing these classes. * @throws Exception */ public void prepareClasspath(final Configuration conf, final Path libDir, Class<?>... classesToInclude) throws Exception { FileSystem fs = null; List<Class<?>> classList = new ArrayList<Class<?>>(Arrays.asList(classesToInclude)); fs = FileSystem.get(conf); Map<String, String> jarMd5Map = new TreeMap<String, String>(); // for each classes we use JarFinder to locate the jar in the local classpath. for (Class<?> clz : classList) { if (clz != null) { String localJarPath = JarFinder.getJar(clz); // we don't want to upload the same jar twice if (!jarMd5Map.containsKey(localJarPath)) { // We should not push core Hadoop classes with this tool. // Should it be the responsibility of the developer or we let // this fence here? if (!clz.getName().startsWith("org.apache.hadoop.")) { // we compute the MD5 sum of the local jar InputStream in = new FileInputStream(localJarPath); boolean threw = true; try { String md5sum = DigestUtils.md5Hex(in); jarMd5Map.put(localJarPath, md5sum); threw = false; } finally { Closeables.close(in, threw); } } else { logger.info("Ignoring {}, since it looks like it's from Hadoop's core libs", localJarPath); } } } } for (Entry<String, String> entry : jarMd5Map.entrySet()) { Path localJarPath = new Path(entry.getKey()); String jarFilename = localJarPath.getName(); String localMd5sum = entry.getValue(); logger.info("Jar {}. MD5 : [{}]", localJarPath, localMd5sum); Path remoteJarPath = new Path(libDir, jarFilename); Path remoteMd5Path = new Path(libDir, jarFilename + ".md5"); // If the jar file does not exist in HDFS or if the MD5 file does not exist in HDFS, // we force the upload of the jar. if (!fs.exists(remoteJarPath) || !fs.exists(remoteMd5Path)) { copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path); } else { // If the jar exist,we validate the MD5 file. // If the MD5 sum is different, we upload the jar FSDataInputStream md5FileStream = null; String remoteMd5sum = ""; try { md5FileStream = fs.open(remoteMd5Path); byte[] md5bytes = new byte[32]; if (32 == md5FileStream.read(md5bytes)) { remoteMd5sum = new String(md5bytes, Charsets.UTF_8); } } finally { if (md5FileStream != null) { md5FileStream.close(); } } if (localMd5sum.equals(remoteMd5sum)) { logger.info("Jar {} already exists [{}] and md5sum are equals", jarFilename, remoteJarPath.toUri().toASCIIString()); } else { logger.info("Jar {} already exists [{}] and md5sum are different!", jarFilename, remoteJarPath.toUri().toASCIIString()); copyJarToHDFS(fs, localJarPath, localMd5sum, remoteJarPath, remoteMd5Path); } } // In all case we want to add the jar to the DistributedCache's classpath DistributedCache.addFileToClassPath(remoteJarPath, conf, fs); } // and we create the symlink (was necessary in earlier versions of Hadoop) DistributedCache.createSymlink(conf); }
From source file:com.cloudera.cdk.tools.JobClasspathHelper.java
License:Apache License
/** * @param fs/*from w w w. j ava 2 s .c o m*/ * File system where to upload the jar. * @param localJarPath * The local path where we find the jar. * @param md5sum * The MD5 sum of the local jar. * @param remoteJarPath * The remote path where to upload the jar. * @param remoteMd5Path * The remote path where to create the MD5 file. * * @throws IOException */ private void copyJarToHDFS(FileSystem fs, Path localJarPath, String md5sum, Path remoteJarPath, Path remoteMd5Path) throws IOException { logger.info("Copying {} to {}", localJarPath.toUri().toASCIIString(), remoteJarPath.toUri().toASCIIString()); fs.copyFromLocalFile(localJarPath, remoteJarPath); // create the MD5 file for this jar. createMd5SumFile(fs, md5sum, remoteMd5Path); // we need to clean the tmp files that are are created by JarFinder after the JVM exits. if (remoteJarPath.getName().startsWith(JarFinder.TMP_HADOOP)) { fs.deleteOnExit(remoteJarPath); } // same for the MD5 file. if (remoteMd5Path.getName().startsWith(JarFinder.TMP_HADOOP)) { fs.deleteOnExit(remoteMd5Path); } }
From source file:com.cloudera.crunch.util.DistCache.java
License:Open Source License
public static void write(Configuration conf, Path path, Object value) throws IOException { ObjectOutputStream oos = new ObjectOutputStream(FileSystem.get(conf).create(path)); oos.writeObject(value);/*www . j a va 2 s. c o m*/ oos.close(); DistributedCache.addCacheFile(path.toUri(), conf); }
From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.handlers.WRITEHandler.java
License:Apache License
@Override protected WRITEResponse doHandle(NFS4Handler server, Session session, WRITERequest request) throws NFS4Exception, IOException { if (session.getCurrentFileHandle() == null) { throw new NFS4Exception(NFS4ERR_NOFILEHANDLE); }//from w w w .j av a 2 s .c o m FileHandle fileHandle = session.getCurrentFileHandle(); Path path = server.getPath(fileHandle); String file = path.toUri().getPath(); FSDataOutputStream out = server.forWrite(request.getStateID(), session.getFileSystem(), fileHandle, false); LOGGER.info(session.getSessionID() + " xid = " + session.getXID() + ", write accepted " + file + " " + request.getOffset()); WriteOrderHandler writeOrderHandler = server.getWriteOrderHandler(file, out); boolean sync = request.getStable() != NFS4_COMMIT_UNSTABLE4; int count = writeOrderHandler.write(path.toUri().getPath(), session.getXID(), request.getOffset(), sync, request.getData(), request.getStart(), request.getLength()); WRITEResponse response = createResponse(); OpaqueData8 verifer = new OpaqueData8(); verifer.setData(Bytes.toBytes(server.getStartTime())); response.setVerifer(verifer); server.incrementMetric("HDFS_BYTES_WRITE", count); response.setCount(count); response.setStatus(NFS4_OK); return response; }
From source file:com.cloudera.hadoop.hdfs.nfs.PathUtils.java
License:Apache License
public static String realPath(Path path) { return path.toUri().getPath(); }
From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java
License:Open Source License
private void testOpen() throws Exception { FileSystem fs = FileSystem.get(getHadoopConf()); Path path = new Path(getHadoopTestDir(), "foo.txt"); OutputStream os = fs.create(path); os.write(1);/*from w ww . j a v a 2 s . c o m*/ os.close(); fs.close(); Configuration conf = new Configuration(); conf.set("fs.http.impl", HoopFileSystem.class.getName()); fs = FileSystem.get(getJettyURL().toURI(), conf); InputStream is = fs.open(new Path(path.toUri().getPath())); Assert.assertEquals(is.read(), 1); is.close(); fs.close(); }
From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java
License:Open Source License
private void testCreate(Path path, boolean override) throws Exception { Configuration conf = new Configuration(); conf.set("fs.http.impl", HoopFileSystem.class.getName()); FileSystem fs = FileSystem.get(getJettyURL().toURI(), conf); FsPermission permission = new FsPermission(FsAction.READ_WRITE, FsAction.NONE, FsAction.NONE); OutputStream os = fs.create(new Path(path.toUri().getPath()), permission, override, 1024, (short) 2, 100 * 1024 * 1024, null);//from w ww . j ava2s. c o m os.write(1); os.close(); fs.close(); fs = FileSystem.get(getHadoopConf()); FileStatus status = fs.getFileStatus(path); Assert.assertEquals(status.getReplication(), 2); Assert.assertEquals(status.getBlockSize(), 100 * 1024 * 1024); Assert.assertEquals(status.getPermission(), permission); InputStream is = fs.open(path); Assert.assertEquals(is.read(), 1); is.close(); fs.close(); }
From source file:com.cloudera.hoop.client.fs.TestHoopFileSystem.java
License:Open Source License
private void testAppend() throws Exception { FileSystem fs = FileSystem.get(getHadoopConf()); Path path = new Path(getHadoopTestDir(), "foo.txt"); OutputStream os = fs.create(path); os.write(1);/*from w w w .j a va2 s . c o m*/ os.close(); fs.close(); Configuration conf = new Configuration(); conf.set("fs.http.impl", HoopFileSystem.class.getName()); fs = FileSystem.get(getJettyURL().toURI(), conf); os = fs.append(new Path(path.toUri().getPath())); os.write(2); os.close(); fs.close(); fs = FileSystem.get(getHadoopConf()); InputStream is = fs.open(path); Assert.assertEquals(is.read(), 1); Assert.assertEquals(is.read(), 2); Assert.assertEquals(is.read(), -1); is.close(); fs.close(); }