List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:com.pigai.hadoop.HttpFSFileSystem.java
License:Apache License
/** * Convenience method that creates a <code>HttpURLConnection</code> for the * HttpFSServer file system operations.// w ww.j av a2s .c o m * <p/> * This methods performs and injects any needed authentication credentials * via the {@link #getConnection(URL, String)} method * * @param method * the HTTP method. * @param params * the query string parameters. * @param path * the file path * @param makeQualified * if the path should be 'makeQualified' * * @return a <code>HttpURLConnection</code> for the HttpFSServer server, * authenticated and ready to use for the specified path and file * system operation. * * @throws IOException * thrown if an IO error occurrs. */ private HttpURLConnection getConnection(String method, Map<String, String> params, Path path, boolean makeQualified) throws IOException { // params.put(DO_AS_PARAM, doAs); //todo ? params.put("user.name", doAs); if (makeQualified) { path = makeQualified(path); } URI uri = path.toUri(); StringBuilder sb = new StringBuilder(); sb.append(uri.getScheme()).append("://").append(uri.getAuthority()).append(SERVICE_PREFIX) .append(uri.getPath()); String separator = "?"; for (Map.Entry<String, String> entry : params.entrySet()) { sb.append(separator).append(entry.getKey()).append("=") .append(URLEncoder.encode(entry.getValue(), "UTF8")); separator = "&"; } URL url = new URL(sb.toString()); return getConnection(url, method); }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*from ww w . ja v a2s.c om*/ */ private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); //set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { //skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); //skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:com.pinterest.secor.util.FileUtil.java
License:Apache License
public static String[] list(String path) throws IOException { FileSystem fs = getFileSystem(path); Path fsPath = new Path(path); ArrayList<String> paths = new ArrayList<String>(); FileStatus[] statuses = fs.listStatus(fsPath); if (statuses != null) { for (FileStatus status : statuses) { Path statusPath = status.getPath(); if (path.startsWith("s3://") || path.startsWith("s3n://")) { paths.add(statusPath.toUri().toString()); } else { paths.add(statusPath.toUri().getPath()); }/* w ww.j a v a 2s . co m*/ } } return paths.toArray(new String[] {}); }
From source file:com.pinterest.secor.util.FileUtil.java
License:Apache License
public static long getModificationTimeMsRecursive(String path) throws IOException { FileSystem fs = getFileSystem(path); Path fsPath = new Path(path); FileStatus status = fs.getFileStatus(fsPath); long modificationTime = status.getModificationTime(); FileStatus[] statuses = fs.listStatus(fsPath); if (statuses != null) { for (FileStatus fileStatus : statuses) { Path statusPath = fileStatus.getPath(); String stringPath;//from w ww . java 2 s . c o m if (path.startsWith("s3://") || path.startsWith("s3n://")) { stringPath = statusPath.toUri().toString(); } else { stringPath = statusPath.toUri().getPath(); } if (!stringPath.equals(path)) { modificationTime = Math.max(modificationTime, getModificationTimeMsRecursive(stringPath)); } } } return modificationTime; }
From source file:com.pinterest.terrapin.hadoop.BaseUploader.java
License:Apache License
/** * Validates the first non-empty partition hfile has right partitioning function. * It reads several keys, then calculates the partition according to the partitioning function * client offering. If the calculated partition number is different with actual partition number * an exception is thrown. If all partition hfiles are empty, an exception is thrown. * * @param parts full absolute path for all partitions * @param partitionerType type of paritioning function * @param numShards total number of partitions * @throws IOException if something goes wrong when reading the hfiles * @throws IllegalArgumentException if the partitioner type is wrong or all partitions are empty *///from www .j av a 2s .c o m public void validate(List<Path> parts, PartitionerType partitionerType, int numShards) throws IOException { boolean hasNonEmptyPartition = false; HColumnDescriptor columnDescriptor = new HColumnDescriptor(); // Disable block cache to ensure it reads the actual file content. columnDescriptor.setBlockCacheEnabled(false); for (int shardIndex = 0; shardIndex < parts.size(); shardIndex++) { Path fileToBeValidated = parts.get(shardIndex); HFile.Reader reader = null; try { FileSystem fs = FileSystem.newInstance(fileToBeValidated.toUri(), conf); CacheConfig cc = new CacheConfig(conf, columnDescriptor); reader = HFile.createReader(fs, fileToBeValidated, cc); Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType); byte[] rowKey = reader.getFirstRowKey(); if (rowKey == null) { LOG.warn(String.format("empty partition %s", fileToBeValidated.toString())); reader.close(); continue; } hasNonEmptyPartition = true; BytesWritable key = new BytesWritable(rowKey); int partition = partitioner.getPartition(key, null, numShards); if (partition != shardIndex) { throw new IllegalArgumentException( String.format("wrong partition type %s for key %s in partition %d, expected %d", partitionerType.toString(), new String(key.getBytes()), shardIndex, partition)); } } finally { if (reader != null) { reader.close(); } } } if (!hasNonEmptyPartition) { throw new IllegalArgumentException("all partitions are empty"); } }
From source file:com.pinterest.terrapin.hadoop.HdfsUploader.java
License:Apache License
public HdfsUploader(TerrapinUploaderOptions uploaderOptions, String absoluteHdfsDir) throws IOException, URISyntaxException { super(uploaderOptions); Path hdfsPathTmp = new Path(absoluteHdfsDir); URI namenodeUri = new URI(hdfsPathTmp.toUri().getScheme(), hdfsPathTmp.toUri().getAuthority(), null, null); this.dfsClient = new DFSClient(namenodeUri, new Configuration()); this.hdfsDir = new Path(hdfsPathTmp.toUri().getPath()); }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Find the final name of a given output file, given the job output directory * and the work directory.//from ww w . j av a2s. c om * @param jobOutputDir the job's output directory * @param taskOutput the specific task output file * @param taskOutputPath the job's work directory * @return the final path for the specific output file * @throws IOException */ private Path getFinalPath(Path jobOutputDir, Path taskOutput, Path taskOutputPath) throws IOException { URI taskOutputUri = taskOutput.toUri(); URI relativePath = taskOutputPath.toUri().relativize(taskOutputUri); if (taskOutputUri == relativePath) { throw new IOException( "Can not get the relative path: base = " + taskOutputPath + " child = " + taskOutput); } if (relativePath.getPath().length() > 0) { return new Path(jobOutputDir, relativePath.getPath()); } else { return jobOutputDir; } }
From source file:com.quantcast.qfs.hadoop.Qfs.java
License:Apache License
@Override public FileStatus getFileStatus(Path path) throws IOException, UnresolvedLinkException { final Path qp = makeQualified(path); final FileStatus fi = qfs.getFileStatus(qp); if (fi == null) { throw new FileNotFoundException(qp.toUri() + ": No such file or directory"); }//from ww w. jav a 2 s. c om return fi; }
From source file:com.quantcast.qfs.hadoop.Qfs.java
License:Apache License
@Override public FileStatus getFileLinkStatus(Path path) throws IOException, UnresolvedLinkException { final Path qp = makeQualified(path); final FileStatus fi = qfs.getFileLinkStatus(qp); if (fi == null) { throw new FileNotFoundException(qp.toUri() + ": No such file or directory"); }/*from w ww .j a v a 2 s . c o m*/ return fi; }
From source file:com.quantcast.qfs.hadoop.Qfs.java
License:Apache License
@Override public FileStatus[] listStatus(Path path) throws IOException, UnresolvedLinkException { final Path qp = makeQualified(path); final FileStatus[] ls = qfs.listStatus(qp); if (ls == null) { throw new FileNotFoundException(qp.toUri() + ": No such file or directory"); }/*from w w w . j a v a2 s .com*/ return ls; }