Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.pigai.hadoop.HttpFSFileSystem.java

License:Apache License

/**
 * Convenience method that creates a <code>HttpURLConnection</code> for the
 * HttpFSServer file system operations.//  w  ww.j  av a2s  .c o m
 * <p/>
 * This methods performs and injects any needed authentication credentials
 * via the {@link #getConnection(URL, String)} method
 * 
 * @param method
 *            the HTTP method.
 * @param params
 *            the query string parameters.
 * @param path
 *            the file path
 * @param makeQualified
 *            if the path should be 'makeQualified'
 * 
 * @return a <code>HttpURLConnection</code> for the HttpFSServer server,
 *         authenticated and ready to use for the specified path and file
 *         system operation.
 * 
 * @throws IOException
 *             thrown if an IO error occurrs.
 */
private HttpURLConnection getConnection(String method, Map<String, String> params, Path path,
        boolean makeQualified) throws IOException {
    //      params.put(DO_AS_PARAM, doAs);
    //todo  ?
    params.put("user.name", doAs);
    if (makeQualified) {
        path = makeQualified(path);
    }
    URI uri = path.toUri();
    StringBuilder sb = new StringBuilder();
    sb.append(uri.getScheme()).append("://").append(uri.getAuthority()).append(SERVICE_PREFIX)
            .append(uri.getPath());

    String separator = "?";
    for (Map.Entry<String, String> entry : params.entrySet()) {
        sb.append(separator).append(entry.getKey()).append("=")
                .append(URLEncoder.encode(entry.getValue(), "UTF8"));
        separator = "&";
    }
    URL url = new URL(sb.toString());
    return getConnection(url, method);
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments/*from ww  w .  ja v  a2s.c  om*/
 */
private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    //set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        //skip file if the src and the dst files are the same.
                        skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                        //skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
}

From source file:com.pinterest.secor.util.FileUtil.java

License:Apache License

public static String[] list(String path) throws IOException {
    FileSystem fs = getFileSystem(path);
    Path fsPath = new Path(path);
    ArrayList<String> paths = new ArrayList<String>();
    FileStatus[] statuses = fs.listStatus(fsPath);
    if (statuses != null) {
        for (FileStatus status : statuses) {
            Path statusPath = status.getPath();
            if (path.startsWith("s3://") || path.startsWith("s3n://")) {
                paths.add(statusPath.toUri().toString());
            } else {
                paths.add(statusPath.toUri().getPath());
            }/*  w ww.j a v  a 2s . co m*/
        }
    }
    return paths.toArray(new String[] {});
}

From source file:com.pinterest.secor.util.FileUtil.java

License:Apache License

public static long getModificationTimeMsRecursive(String path) throws IOException {
    FileSystem fs = getFileSystem(path);
    Path fsPath = new Path(path);
    FileStatus status = fs.getFileStatus(fsPath);
    long modificationTime = status.getModificationTime();
    FileStatus[] statuses = fs.listStatus(fsPath);
    if (statuses != null) {
        for (FileStatus fileStatus : statuses) {
            Path statusPath = fileStatus.getPath();
            String stringPath;//from  w  ww . java 2 s . c  o  m
            if (path.startsWith("s3://") || path.startsWith("s3n://")) {
                stringPath = statusPath.toUri().toString();
            } else {
                stringPath = statusPath.toUri().getPath();
            }
            if (!stringPath.equals(path)) {
                modificationTime = Math.max(modificationTime, getModificationTimeMsRecursive(stringPath));
            }
        }
    }
    return modificationTime;
}

From source file:com.pinterest.terrapin.hadoop.BaseUploader.java

License:Apache License

/**
 * Validates the first non-empty partition hfile has right partitioning function.
 * It reads several keys, then calculates the partition according to the partitioning function
 * client offering. If the calculated partition number is different with actual partition number
 * an exception is thrown. If all partition hfiles are empty, an exception is thrown.
 *
 * @param parts full absolute path for all partitions
 * @param partitionerType type of paritioning function
 * @param numShards total number of partitions
 * @throws IOException if something goes wrong when reading the hfiles
 * @throws IllegalArgumentException if the partitioner type is wrong or all partitions are empty
 *///from www .j av a  2s .c o m
public void validate(List<Path> parts, PartitionerType partitionerType, int numShards) throws IOException {
    boolean hasNonEmptyPartition = false;
    HColumnDescriptor columnDescriptor = new HColumnDescriptor();
    // Disable block cache to ensure it reads the actual file content.
    columnDescriptor.setBlockCacheEnabled(false);
    for (int shardIndex = 0; shardIndex < parts.size(); shardIndex++) {
        Path fileToBeValidated = parts.get(shardIndex);
        HFile.Reader reader = null;
        try {
            FileSystem fs = FileSystem.newInstance(fileToBeValidated.toUri(), conf);
            CacheConfig cc = new CacheConfig(conf, columnDescriptor);
            reader = HFile.createReader(fs, fileToBeValidated, cc);
            Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType);
            byte[] rowKey = reader.getFirstRowKey();
            if (rowKey == null) {
                LOG.warn(String.format("empty partition %s", fileToBeValidated.toString()));
                reader.close();
                continue;
            }
            hasNonEmptyPartition = true;
            BytesWritable key = new BytesWritable(rowKey);
            int partition = partitioner.getPartition(key, null, numShards);
            if (partition != shardIndex) {
                throw new IllegalArgumentException(
                        String.format("wrong partition type %s for key %s in partition %d, expected %d",
                                partitionerType.toString(), new String(key.getBytes()), shardIndex, partition));
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }
    if (!hasNonEmptyPartition) {
        throw new IllegalArgumentException("all partitions are empty");
    }
}

From source file:com.pinterest.terrapin.hadoop.HdfsUploader.java

License:Apache License

public HdfsUploader(TerrapinUploaderOptions uploaderOptions, String absoluteHdfsDir)
        throws IOException, URISyntaxException {
    super(uploaderOptions);
    Path hdfsPathTmp = new Path(absoluteHdfsDir);
    URI namenodeUri = new URI(hdfsPathTmp.toUri().getScheme(), hdfsPathTmp.toUri().getAuthority(), null, null);
    this.dfsClient = new DFSClient(namenodeUri, new Configuration());
    this.hdfsDir = new Path(hdfsPathTmp.toUri().getPath());
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Find the final name of a given output file, given the job output directory
 * and the work directory.//from  ww  w  .  j  av  a2s.  c  om
 * @param jobOutputDir the job's output directory
 * @param taskOutput the specific task output file
 * @param taskOutputPath the job's work directory
 * @return the final path for the specific output file
 * @throws IOException
 */
private Path getFinalPath(Path jobOutputDir, Path taskOutput, Path taskOutputPath) throws IOException {
    URI taskOutputUri = taskOutput.toUri();
    URI relativePath = taskOutputPath.toUri().relativize(taskOutputUri);
    if (taskOutputUri == relativePath) {
        throw new IOException(
                "Can not get the relative path: base = " + taskOutputPath + " child = " + taskOutput);
    }
    if (relativePath.getPath().length() > 0) {
        return new Path(jobOutputDir, relativePath.getPath());
    } else {
        return jobOutputDir;
    }
}

From source file:com.quantcast.qfs.hadoop.Qfs.java

License:Apache License

@Override
public FileStatus getFileStatus(Path path) throws IOException, UnresolvedLinkException {
    final Path qp = makeQualified(path);
    final FileStatus fi = qfs.getFileStatus(qp);
    if (fi == null) {
        throw new FileNotFoundException(qp.toUri() + ": No such file or directory");
    }//from ww w. jav a 2 s. c om
    return fi;
}

From source file:com.quantcast.qfs.hadoop.Qfs.java

License:Apache License

@Override
public FileStatus getFileLinkStatus(Path path) throws IOException, UnresolvedLinkException {
    final Path qp = makeQualified(path);
    final FileStatus fi = qfs.getFileLinkStatus(qp);
    if (fi == null) {
        throw new FileNotFoundException(qp.toUri() + ": No such file or directory");
    }/*from   w  ww .j a v  a  2 s . c  o  m*/
    return fi;
}

From source file:com.quantcast.qfs.hadoop.Qfs.java

License:Apache License

@Override
public FileStatus[] listStatus(Path path) throws IOException, UnresolvedLinkException {
    final Path qp = makeQualified(path);
    final FileStatus[] ls = qfs.listStatus(qp);
    if (ls == null) {
        throw new FileNotFoundException(qp.toUri() + ": No such file or directory");
    }/*from  w w w . j  a v  a2  s .com*/
    return ls;
}