Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.inmobi.conduit.distcp.DistcpBaseService.java

License:Apache License

/**
 * Method to qualify the checkpoint path based on the readurl configured
 * for the source cluster. The readurl of the cluster can change and the
 * checkpoint paths should be re-qualified to the new source cluster read
 * path.//from  ww w. ja va  2  s .c  o m
 *
 * @param lastCheckPointPath path which can be null read from checkpoint
 *                           file.
 * @param srcCluster the cluster for which checkpoint file which should be
 *                   re-qualified.
 * @return path which is re-qualified.
 */
protected Path fullyQualifyCheckPointWithReadURL(Path lastCheckPointPath, Cluster srcCluster) {
    //if checkpoint value was empty or null just fall thro' let the service
    // determine the new path.
    if (lastCheckPointPath == null) {
        return null;
    }
    String readUrl = srcCluster.getReadUrl();
    URI checkpointURI = lastCheckPointPath.toUri();
    String unQualifiedPathStr = checkpointURI.getPath();
    Path newCheckPointPath = new Path(readUrl, unQualifiedPathStr);
    return newCheckPointPath;
}

From source file:com.inmobi.conduit.distcp.tools.DistCp.java

License:Apache License

/**
 * Get default name of the copy listing file. Use the meta folder
 * to create the copy listing file//  w w w .  j  av a2  s. com
 *
 * @return - Path where the copy listing file has to be saved
 * @throws IOException - Exception if any
 */
protected Path getFileListingPath() throws IOException {
    String fileListPathStr = metaFolder + "/fileList.seq";
    Path path = new Path(fileListPathStr);
    return new Path(path.toUri().normalize().toString());
}

From source file:com.inmobi.conduit.distcp.tools.util.DistCpUtils.java

License:Apache License

/**
 * Gets relative path of child path with respect to a root path
 * For ex. If childPath = /tmp/abc/xyz/file and
 *            sourceRootPath = /tmp/abc//  w w w  . j a v a 2s.c om
 * Relative path would be /xyz/file
 *         If childPath = /file and
 *            sourceRootPath = /
 * Relative path would be /file
 * @param sourceRootPath - Source root path
 * @param childPath - Path for which relative path is required
 * @return - Relative portion of the child path (always prefixed with /
 *           unless it is empty 
 */
public static String getRelativePath(Path sourceRootPath, Path childPath) {
    String childPathString = childPath.toUri().getPath();
    String sourceRootPathString = sourceRootPath.toUri().getPath();
    return sourceRootPathString.equals("/") ? childPathString
            : childPathString.substring(sourceRootPathString.length());
}

From source file:com.inmobi.messaging.consumer.util.ConsumerUtil.java

License:Apache License

public static void testConsumerStartUp(ClientConfig config, String streamName, String consumerName,
        boolean hadoop, Date absoluteStartTime, Path rootDir, String chkpointPathPrefix) throws Exception {
    AbstractMessagingDatabusConsumer consumer = createConsumer(hadoop);
    // consumer config has both relative start time and absolute start time
    consumer.init(streamName, consumerName, absoluteStartTime, config);
    Assert.assertEquals(consumer.getTopicName(), streamName);
    Assert.assertEquals(consumer.getConsumerName(), consumerName);
    // consumer is starting from relative start time
    int i;/*from   w w w. j a  v a2 s  . c  om*/
    for (i = 0; i < 120; i++) {
        Message msg = consumer.next();
        Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i));
    }
    consumer.mark();
    for (i = 120; i < 130; i++) {
        Message msg = consumer.next();
        Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i));
    }
    consumer.reset();
    // consumer starts consuming messages from the checkpoint
    for (i = 120; i < 240; i++) {
        Message msg = consumer.next();
        Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i));
    }
    consumer.close();
    Assert.assertEquals(((BaseMessageConsumerStatsExposer) (consumer.getMetrics())).getNumMessagesConsumed(),
            250);
    consumer = createConsumer(hadoop);
    config.set(MessagingConsumerConfig.clustersNameConfig, "testCluster");
    consumer.init(streamName, consumerName, absoluteStartTime, config);
    // consumer starts consuming messages from the checkpoint
    for (i = 120; i < 240; i++) {
        Message msg = consumer.next();
        Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i));
    }
    consumer.mark();
    ConsumerCheckpoint temp = consumer.getCurrentCheckpoint();
    Map<PartitionId, PartitionCheckpoint> lastCheckpoint = new HashMap<PartitionId, PartitionCheckpoint>();
    Map<Integer, Checkpoint> checkpointMap = new HashMap<Integer, Checkpoint>();
    //create consumer checkpoint
    createCheckpointList(temp, checkpointMap, lastCheckpoint, consumer);
    for (i = 240; i < 260; i++) {
        Message msg = consumer.next();
        Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i));
    }
    consumer.close();
    Assert.assertEquals(((BaseMessageConsumerStatsExposer) (consumer.getMetrics())).getNumMessagesConsumed(),
            140);
    consumer = createConsumer(hadoop);
    if (!hadoop) {
        config = ClientConfig.loadFromClasspath(MessageConsumerFactory.MESSAGE_CLIENT_CONF_FILE);
        config.set(DatabusConsumer.checkpointDirConfig,
                new Path(chkpointPathPrefix, "random-databus").toString());
        config.set(DatabusConsumerConfig.databusRootDirsConfig, rootDir.toUri().toString());
        config.set(MessagingConsumerConfig.clustersNameConfig, "testCluster");
    } else {
        config = ClientConfig.loadFromClasspath("messaging-consumer-hadoop-conf.properties");
        config.set(HadoopConsumer.checkpointDirConfig,
                new Path(chkpointPathPrefix, "random-hadoop").toString());
        config.set(HadoopConsumerConfig.rootDirsConfig, rootDir.toString());
        config.set(MessagingConsumerConfig.clustersNameConfig, "testCluster");
    }

    // consumer starts from absolute start time
    consumer.init(streamName, consumerName, absoluteStartTime, config);
    for (i = 100; i < 300; i++) {
        Message msg = consumer.next();
        Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i));
    }
    consumer.mark();
    consumer.close();
    Assert.assertEquals(((BaseMessageConsumerStatsExposer) (consumer.getMetrics())).getNumMessagesConsumed(),
            200);
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Make a path relative with respect to a root path.
 * absPath is always assumed to descend from root.
 * Otherwise returned path is null./*  ww w.j a va2  s .  c  o m*/
 */
static String makeRelative(Path root, Path absPath) {
    if (!absPath.isAbsolute()) {
        throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath);
    }
    String p = absPath.toUri().getPath();

    StringTokenizer pathTokens = new StringTokenizer(p, "/");
    for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/"); rootTokens
            .hasMoreTokens();) {
        if (!rootTokens.nextToken().equals(pathTokens.nextToken())) {
            return null;
        }
    }
    StringBuilder sb = new StringBuilder();
    for (; pathTokens.hasMoreTokens();) {
        sb.append(pathTokens.nextToken());
        if (pathTokens.hasMoreTokens()) {
            sb.append(Path.SEPARATOR);
        }
    }
    return sb.length() == 0 ? "." : sb.toString();
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Initialize ExecFilesMapper specific job-configuration.
 *
 * @param conf    : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args    Arguments/*ww w .  j  ava2s. c  om*/
 * @return true if it is necessary to launch a job.
 */
private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());
    jobConf.set(EXEC_CMD_LABEL, args.execCmd);

    //set boolean values
    jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname,
            args.flags.contains(Options.REDIRECT_ERROR_TO_OUT));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path stagingArea;
    try {
        stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }

    Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf);

    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_" + NAME + "_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists);
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_" + NAME + "_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("sourcePathsCount=" + srcCount);
    LOG.info("filesToExecCount=" + fileCount);
    LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount));
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(fileCount, jobConf);
    return fileCount > 0;
}

From source file:com.knewton.mapreduce.SSTableColumnRecordReaderTest.java

License:Apache License

@Test
public void testNextKeyValue() throws Exception {
    Path inputPath = inputSplit.getPath();
    FileSystem remoteFS = FileSystem.get(inputPath.toUri(), conf);
    FileSystem localFS = FileSystem.getLocal(conf);
    TaskAttemptContext context = getTaskAttemptContext();
    ssTableColumnRecordReader.initialize(inputSplit, context);
    verify(ssTableColumnRecordReader).copyTablesToLocal(remoteFS, localFS, inputPath, context);

    assertEquals(0, ssTableColumnRecordReader.getProgress(), 0);
    assertTrue(ssTableColumnRecordReader.nextKeyValue());
    assertEquals(key.getKey(), ssTableColumnRecordReader.getCurrentKey());
    assertEquals(value, ssTableColumnRecordReader.getCurrentValue());

    assertEquals(0.5, ssTableColumnRecordReader.getProgress(), 0);
    assertTrue(ssTableColumnRecordReader.nextKeyValue());
    assertEquals(key.getKey(), ssTableColumnRecordReader.getCurrentKey());
    assertEquals(value, ssTableColumnRecordReader.getCurrentValue());

    assertEquals(1, ssTableColumnRecordReader.getProgress(), 0);
    assertFalse(ssTableColumnRecordReader.nextKeyValue());
    assertNull(ssTableColumnRecordReader.getCurrentKey());
    assertNull(ssTableColumnRecordReader.getCurrentValue());
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Performs all the necessary actions to initialize and prepare this record reader.
 *///from ww  w .  ja v a  2  s .c o  m
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    this.ctx = context;
    conf = context.getConfiguration();
    keysRead = 0;
    components = Sets.newHashSetWithExpectedSize(3);
    FileSplit split = (FileSplit) inputSplit;
    validateConfiguration(conf);

    // Get comparator. Subcomparator can be null.
    AbstractType<?> comparator = getConfComparator(conf);
    AbstractType<?> subcomparator = getConfSubComparator(conf);

    // Get partitioner for keys
    IPartitioner partitioner = getConfPartitioner(conf);

    // Move minimum required db tables to local disk.
    Path dataTablePath = split.getPath();
    FileSystem remoteFS = FileSystem.get(dataTablePath.toUri(), conf);
    FileSystem localFS = FileSystem.getLocal(conf);
    copyTablesToLocal(remoteFS, localFS, dataTablePath, context);
    CFMetaData cfMetaData;
    if (getConfIsSparse(conf)) {
        cfMetaData = CFMetaData.sparseCFMetaData(getDescriptor().ksname, getDescriptor().cfname, comparator);
    } else {
        cfMetaData = CFMetaData.denseCFMetaData(getDescriptor().ksname, getDescriptor().cfname, comparator,
                subcomparator);
    }
    // Open table and get scanner
    SSTableReader tableReader = openSSTableReader(partitioner, cfMetaData);
    setTableScanner(tableReader);
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Moves all the minimum required tables for the table reader to work to local disk.
 *
 * @param split The table to work on.//from   w w w .j a  v  a 2s.c  o m
 */
@VisibleForTesting
void copyTablesToLocal(FileSystem remoteFS, FileSystem localFS, Path dataTablePath, TaskAttemptContext context)
        throws IOException {
    Configuration conf = context.getConfiguration();
    String hdfsDataTablePathStr = dataTablePath.toUri().getPath();
    String localDataTablePathStr = dataTablePath.toUri().getHost() + File.separator
            + dataTablePath.toUri().getPath();
    // Make path relative due to EMR permissions
    if (localDataTablePathStr.startsWith("/")) {
        String mapTaskId = conf.get("mapreduce.task.attempt.id");
        String mapTempDir = conf.get("mapreduce.cluster.temp.dir");
        String taskWorkDir = mapTempDir + File.separator + mapTaskId;
        LOG.info("Appending {} to {}", taskWorkDir, localDataTablePathStr);
        localDataTablePathStr = taskWorkDir + localDataTablePathStr;
    }
    Path localDataTablePath = new Path(localDataTablePathStr);
    LOG.info("Copying hdfs file from {} to local disk at {}.", dataTablePath.toUri(),
            localDataTablePath.toUri());
    copyToLocalFile(remoteFS, localFS, dataTablePath, localDataTablePath);
    boolean isCompressed = conf.getBoolean(PropertyConstants.COMPRESSION_ENABLED.txt, false);
    if (isCompressed) {
        decompress(localDataTablePath, context);
    }
    components.add(Component.DATA);
    desc = Descriptor.fromFilename(localDataTablePathStr);
    Descriptor hdfsDesc = Descriptor.fromFilename(hdfsDataTablePathStr);
    String indexPathStr = hdfsDesc.filenameFor(Component.PRIMARY_INDEX);
    components.add(Component.PRIMARY_INDEX);
    Path localIdxPath = new Path(desc.filenameFor(Component.PRIMARY_INDEX));
    LOG.info("Copying hdfs file from {} to local disk at {}.", indexPathStr, localIdxPath);
    copyToLocalFile(remoteFS, localFS, new Path(indexPathStr), localIdxPath);
    if (isCompressed) {
        decompress(localIdxPath, context);
    }
    String compressionTablePathStr = hdfsDesc.filenameFor(Component.COMPRESSION_INFO.name());
    Path compressionTablePath = new Path(compressionTablePathStr);
    if (remoteFS.exists(compressionTablePath)) {
        Path localCompressionPath = new Path(desc.filenameFor(Component.COMPRESSION_INFO.name()));
        LOG.info("Copying hdfs file from {} to local disk at {}.", compressionTablePath.toUri(),
                localCompressionPath);
        copyToLocalFile(remoteFS, localFS, compressionTablePath, localCompressionPath);
        if (isCompressed) {
            decompress(localCompressionPath, context);
        }
        components.add(Component.COMPRESSION_INFO);
    }
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Decompresses input files that were snappy compressed before opening them with the sstable
 * reader. It writes a new decompressed file with the same name as the compressed one. The old
 * one gets deleted.//w  w  w.  j av a2  s.c om
 */
private void decompress(Path localTablePath, TaskAttemptContext context) throws IOException {
    context.setStatus(String.format("Decompressing %s", localTablePath.toUri()));
    int compressionBufSize = context.getConfiguration().getInt(PropertyConstants.DECOMPRESS_BUFFER.txt,
            DEFAULT_DECOMPRESS_BUFFER_SIZE);
    compressionBufSize *= 1024;
    LOG.info("Decompressing {} with buffer size {}.", localTablePath, compressionBufSize);
    File compressedFile = new File(localTablePath.toString());
    InputStream fis = new FileInputStream(compressedFile);
    InputStream bis = new BufferedInputStream(fis, compressionBufSize);
    InputStream sip = new SnappyInputStream(bis);
    File decompressedFile = new File(localTablePath.toString() + ".tmp");

    OutputStream os = new FileOutputStream(decompressedFile);
    OutputStream bos = new BufferedOutputStream(os, compressionBufSize);
    byte[] inByteArr = new byte[compressionBufSize];
    int bytesRead = 0;
    int bytesSinceLastReport = 0;
    while ((bytesRead = sip.read(inByteArr)) > 0) {
        bos.write(inByteArr, 0, bytesRead);
        bytesSinceLastReport += bytesRead;
        // Avoid timeouts. Report progress to the jobtracker.
        if (bytesSinceLastReport % REPORT_DECOMPRESS_PROGRESS_EVERY_GBS > 0) {
            context.setStatus(String.format("Decompressed %d bytes.", bytesSinceLastReport));
            bytesSinceLastReport -= REPORT_DECOMPRESS_PROGRESS_EVERY_GBS;
        }
    }
    sip.close();
    bos.close();
    compressedFile.delete();
    decompressedFile.renameTo(compressedFile);
}