List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*www .ja v a2s . co m*/ */ private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); //set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { //skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); //skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:com.pivotal.hawq.mapreduce.TPCHClusterTester.java
License:Apache License
@Override protected void testTPCHTable(HAWQTPCHSpec tpchSpec, String tableName) throws Exception { String caseName = tpchSpec.toString(); System.out.println("Executing test case: " + caseName); final File caseFolder = new File(FT_TEST_FOLDER, caseName); final File answerFile = new File(caseFolder, tableName + ".ans"); final File outputFile = new File(caseFolder, tableName + ".out"); if (caseFolder.exists()) { FileUtils.deleteDirectory(caseFolder); }//from w w w . j a va 2 s . c o m caseFolder.mkdir(); Connection conn = null; List<String> answers; try { conn = MRFormatTestUtils.getTestDBConnection(); // 1. load TPCH data Map<String, String> rs = HAWQJdbcUtils.executeSafeQueryForSingleRow(conn, "SHOW default_hash_table_bucket_number;"); int segnum = Integer.parseInt(rs.get("default_hash_table_bucket_number")); MRFormatTestUtils.runShellCommand(tpchSpec.getLoadCmd(segnum)); // 2. generate answer answers = MRFormatTestUtils.dumpTable(conn, tableName); Collections.sort(answers); Files.write(Joiner.on('\n').join(answers).getBytes(), answerFile); } finally { HAWQJdbcUtils.closeConnection(conn); } // 3. run input format driver final Path hdfsOutput = new Path("/temp/hawqinputformat/part-r-00000"); int exitCode = MRFormatTestUtils.runMapReduceOnCluster(tableName, hdfsOutput.getParent(), null); Assert.assertEquals(0, exitCode); // 4. copy hdfs output to local MRFormatTestUtils.runShellCommand( String.format("hadoop fs -copyToLocal %s %s", hdfsOutput.toString(), outputFile.getPath())); // 5. compare result List<String> outputs = Files.readLines(outputFile, Charsets.UTF_8); if (!answers.equals(outputs)) Assert.fail(String.format("HAWQInputFormat output for table %s differs with DB output:\n%s\n%s", tableName, answerFile, outputFile)); System.out.println("Successfully finish test case: " + caseName); }
From source file:com.quantcast.qfs.hadoop.Qfs.java
License:Apache License
@Override public FSDataOutputStream createInternal(Path path, EnumSet<CreateFlag> createFlag, FsPermission absolutePermission, int bufferSize, short replication, long blockSize, Progressable progress, ChecksumOpt checksumOpt, boolean createParent) throws IOException { CreateFlag.validate(createFlag);/* ww w . j av a2 s. c o m*/ checkPath(path); if (createParent) { mkdir(path.getParent(), absolutePermission, createParent); } return qfsImpl.create(getUriPath(path), replication, bufferSize, createFlag.contains(CreateFlag.OVERWRITE), absolutePermission.toShort(), createFlag.contains(CreateFlag.APPEND)); }
From source file:com.quantcast.qfs.hadoop.QuantcastFileSystem.java
License:Apache License
public FSDataOutputStream create(Path file, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { Path parent = file.getParent(); if (parent != null && !mkdirs(parent)) { throw new IOException("Mkdirs failed to create " + parent); }/*from w w w . jav a2s . c o m*/ return qfsImpl.create(makeAbsolute(file).toUri().getPath(), replication, bufferSize, overwrite, permission.toShort()); }
From source file:com.quantcast.qfs.hadoop.QuantcastFileSystem.java
License:Apache License
public FSDataOutputStream create(Path file, boolean overwrite, String createParams) throws IOException { Path parent = file.getParent(); if (parent != null && !mkdirs(parent)) { throw new IOException("Mkdirs failed to create " + parent); }//from w w w .j a v a2 s.c o m return qfsImpl.create(makeAbsolute(file).toUri().getPath(), overwrite, createParams); }
From source file:com.quixey.hadoop.fs.oss.OSSFileSystem.java
License:Apache License
@Override public boolean rename(Path src, Path dst) throws IOException { src = checkNotNull(src);//w w w . j a v a 2s . co m dst = checkNotNull(dst); String srcKey = pathToKey(makeAbsolute(src)); final String debugPreamble = "Renaming '" + src + "' to '" + dst + "' - "; if (isRoot(srcKey)) { // Cannot rename root of file system LOG.debug("{} returning false as cannot rename the root of a filesystem", debugPreamble); return false; } // get status of source boolean srcIsFile; try { srcIsFile = getFileStatus(src).isFile(); } catch (FileNotFoundException e) { // bail out fast if the source does not exist LOG.debug("{} returning false as src does not exist", debugPreamble); return false; } // figure out the final destination String dstKey = pathToKey(makeAbsolute(dst)); try { boolean dstIsFile = getFileStatus(dst).isFile(); if (dstIsFile) { // destination is a file. // you can't copy a file or a directory onto an existing file // except for the special case of dest==src, which is a no-op LOG.debug("{} returning without rename as dst is an already existing file", debugPreamble); // exit, returning true iff the rename is onto self return srcKey.equals(dstKey); } else { // destination exists and is a directory LOG.debug("{} using dst as output directory", debugPreamble); // destination goes under the dst path, with the name of the // source entry dstKey = pathToKey(makeAbsolute(new Path(dst, src.getName()))); } } catch (FileNotFoundException e) { // destination does not exist => the source file or directory // is copied over with the name of the destination LOG.debug("{} using dst as output destination", debugPreamble); try { if (getFileStatus(dst.getParent()).isFile()) { LOG.debug("{} returning false as dst parent exists and is a file", debugPreamble); return false; } } catch (FileNotFoundException ex) { LOG.debug("{} returning false as dst parent does not exist", debugPreamble); return false; } } // rename to self behavior follows Posix rules and is different // for directories and files -the return code is driven by src type if (srcKey.equals(dstKey)) { // fully resolved destination key matches source: fail LOG.debug("{} renamingToSelf; returning true", debugPreamble); return true; } if (srcIsFile) { renameOneFile(srcKey, dstKey, debugPreamble); } else { // src is a directory LOG.debug("{} src is directory, so copying contents", debugPreamble); // verify dest is not a child of the parent if (isSubDir(dstKey, srcKey)) { LOG.debug("{} cannot rename a directory to a subdirectory of self", debugPreamble); return false; } renameDir(srcKey, dstKey, debugPreamble); LOG.debug("{} done", debugPreamble); } return true; }
From source file:com.quixey.hadoop.fs.oss.OSSFileSystem.java
License:Apache License
/** * rename() and delete() use this method to ensure that the parent directory of the source does not vanish. *//* www. j ava 2 s .co m*/ private void createParent(Path path) throws IOException { Path parent = path.getParent(); if (null != parent) { String key = pathToKey(makeAbsolute(parent)); if (0 < key.length()) store.storeEmptyFile(key + FOLDER_SUFFIX); } }
From source file:com.quixey.hadoop.fs.oss.OSSFileSystem.java
License:Apache License
@Override public boolean mkdirs(Path path, FsPermission permission) throws IOException { path = checkNotNull(path);//from w ww. jav a2s . com Path absolutePath = makeAbsolute(path); List<Path> paths = new LinkedList<>(); // obtain a list of parent directories while (null != absolutePath) { paths.add(0, absolutePath); absolutePath = absolutePath.getParent(); } boolean result = true; for (Path p : paths) result &= mkdir(p); return result; }
From source file:com.rapleaf.hank.hadoop.DomainBuilderOutputCommitter.java
License:Apache License
public static void commitJob(String domainName, JobConf conf) throws IOException { Path outputPath = new Path(DomainBuilderProperties.getOutputPath(domainName, conf)); Path tmpOutputPath = new Path(DomainBuilderProperties.getTmpOutputPath(domainName, conf)); FileSystem fs = outputPath.getFileSystem(conf); // Create outputPath fs.mkdirs(outputPath);/*from ww w .j ava 2 s . com*/ // Move temporary output to final output LOG.info("Moving temporary output files from: " + tmpOutputPath + " to final output path: " + outputPath); FileStatus[] partitions = fs.listStatus(tmpOutputPath); for (FileStatus partition : partitions) { if (partition.isDir()) { FileStatus[] partitionFiles = fs.listStatus(partition.getPath()); for (FileStatus partitionFile : partitionFiles) { Path sourcePath = partitionFile.getPath(); Path targetPath = new Path(new Path(outputPath, partition.getPath().getName()), partitionFile.getPath().getName()); LOG.info("Moving: " + sourcePath + " to: " + targetPath); if (!fs.mkdirs(targetPath.getParent())) { throw new IOException("Failed at creating directory " + targetPath.getParent()); } if (!fs.rename(sourcePath, targetPath)) { throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath); } } } } // Finally, cleanup cleanupJob(domainName, conf); }
From source file:com.rapleaf.ramhdfs.RamFileSystem.java
License:Apache License
@Override public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { if (exists(f) && !overwrite) { throw new IOException("File already exists:" + f); }/* w w w.j av a 2s. c o m*/ Path parent = f.getParent(); if (parent != null && !mkdirs(parent)) { throw new IOException("Mkdirs failed to create " + parent.toString()); } FileObject fo = pathToFileObject(f); fo.createFile(); fo.getContent().setLastModifiedTime(System.currentTimeMillis()); setPermission(f, permission); return new FSDataOutputStream(new BufferedOutputStream(new RamFSOutputStream(fo), bufferSize), statistics); }