Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:cascading.tap.Hfs.java

License:Open Source License

private void makeLocal(JobConf conf, Path qualifiedPath, String infoMessage) {
    if (!conf.get("mapred.job.tracker", "").equalsIgnoreCase("local")
            && qualifiedPath.toUri().getScheme().equalsIgnoreCase("file")) {
        if (LOG.isInfoEnabled())
            LOG.info(infoMessage + toString());

        conf.set("mapred.job.tracker", "local"); // force job to run locally
    }//from  w  ww .  j  ava2 s  . c  o  m
}

From source file:cascading.util.S3Util.java

License:Open Source License

@Deprecated
public static String getKeyFrom(Path path) {
    return path.toUri().getPath().substring(1);
}

From source file:cc.slda.AnnotateDocuments.java

License:Apache License

/**
 * Runs this tool.//www  .jav a2 s .c o  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));
    options.addOption(OptionBuilder.withArgName(PCUTOFF).hasArg()
            .withDescription("probability of topic assignment").create(PCUTOFF));
    options.addOption(OptionBuilder.withArgName(INDEX).hasArg()
            .withDescription("path to data directory containing term and title indices").create(INDEX));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(INDEX)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    float cutoff = 0.9f;
    if (cmdline.hasOption(PCUTOFF)) {
        cutoff = Float.parseFloat(cmdline.getOptionValue(PCUTOFF));
    }
    LOG.info("Tool: " + AnnotateDocuments.class.getSimpleName());
    LOG.info(" - indices path: " + indexPath);
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    LOG.info(" - log(probCutoff): " + Math.log(cutoff));

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Job job = Job.getInstance(conf);
    job.setJobName(AnnotateDocuments.class.getSimpleName());
    job.setJarByClass(AnnotateDocuments.class);

    String termIndex = indexPath + Path.SEPARATOR + TERM;
    String titleIndex = indexPath + Path.SEPARATOR + TITLE;

    Path termIndexPath = new Path(termIndex);
    Path titleIndexPath = new Path(titleIndex);

    Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files... " + termIndexPath);
    DistributedCache.addCacheFile(termIndexPath.toUri(), job.getConfiguration());
    Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files... " + titleIndexPath);
    DistributedCache.addCacheFile(titleIndexPath.toUri(), job.getConfiguration());

    job.setNumReduceTasks(reduceTasks);
    conf.setFloat(PCUTOFF, cutoff);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(HMapSIW.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(HMapSIW.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:cc.solr.lucene.store.hdfs.ChangeFileExt.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path p = new Path(args[0]);
    FileSystem fileSystem = FileSystem.get(p.toUri(), new Configuration());
    FileStatus[] listStatus = fileSystem.listStatus(p);
    for (FileStatus fileStatus : listStatus) {
        Path path = fileStatus.getPath();
        fileSystem.rename(path, new Path(path.toString() + ".lf"));
    }// ww  w .  j  a va  2  s .c  om
}

From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java

License:Apache License

public static void convert(Path path) throws IOException {
    FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration());
    if (!fileSystem.exists(path)) {
        System.out.println(path + " does not exists.");
        return;/*  w  w w  .j a v a2 s.c  o m*/
    }
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (fileStatus.isDir()) {
        FileStatus[] listStatus = fileSystem.listStatus(path);
        for (FileStatus status : listStatus) {
            convert(status.getPath());
        }
    } else {
        System.out.println("Converting file [" + path + "]");
        HdfsMetaBlock block = new HdfsMetaBlock();
        block.realPosition = 0;
        block.logicalPosition = 0;
        block.length = fileStatus.getLen();
        FSDataOutputStream outputStream = fileSystem.append(path);
        block.write(outputStream);
        outputStream.writeInt(1);
        outputStream.writeLong(fileStatus.getLen());
        outputStream.writeInt(HdfsFileWriter.VERSION);
        outputStream.close();
    }
}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_SECOND)) {
            paths.put(pathName, path);/*from  ww  w.  j  a v a 2  s  . c o  m*/
        }
    }

    File outFile = new File(outPath, Names.indexId2Node + ".dat");
    OutputStream out = new FileOutputStream(outFile);
    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile());
        InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat"));
        IOUtils.copyBytes(in, out, configuration, false);
        in.close();
    }
    out.close();
}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    // Find all the right paths and copy .gz files locally
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_FOURTH)) {
            paths.put(pathName, path);//from  ww  w . j  a va  2  s  .  c o  m
        }
    }

    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        status = fs.listStatus(path);
        for (FileStatus fileStatus : status) {
            Path p = fileStatus.getPath();
            log.debug("Copying {} to {}...", p.toUri(), outPath);
            fs.copyToLocalFile(p, new Path(outPath, p.getName()));
        }
    }

    // Merge .gz files into indexName.gz
    File fileOutputPath = new File(outPath);
    File[] files = fileOutputPath.listFiles(new FileFilter() {
        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(".gz");
        }
    });
    Arrays.sort(files);
    String prevIndexName = null;
    OutputStream out = null;
    for (File file : files) {
        log.debug("Processing {}... ", file.getName());
        String indexName = file.getName().substring(0, file.getName().indexOf("_"));
        if (prevIndexName == null)
            prevIndexName = indexName;
        if (out == null)
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        if (!prevIndexName.equals(indexName)) {
            if (out != null)
                out.close();
            log.debug("Index name set to {}", indexName);
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        }
        InputStream in = new GZIPInputStream(new FileInputStream(file));
        log.debug("Copying {} into {}.gz ...", file.getName(), indexName);
        IOUtils.copyBytes(in, out, 8192, false);
        in.close();
        file.delete();
        prevIndexName = indexName;
    }
    if (out != null)
        out.close();

    // build B+Tree indexes
    Location location = new Location(outPath);
    for (String idxName : Constants.indexNames) {
        log.debug("Creating {} index...", idxName);
        String indexFilename = location.absolute(idxName, "gz");
        if (new File(indexFilename).exists()) {
            new File(outPath, idxName + ".dat").delete();
            new File(outPath, idxName + ".idn").delete();
            CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename);
            // To save some disk space
            new File(indexFilename).delete();
        }
    }
}

From source file:cmd.tdbloader4.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from w  ww  . j  a va 2 s . c  o  m

    Configuration configuration = getConf();
    configuration.set(Constants.RUN_ID, String.valueOf(System.currentTimeMillis()));
    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    boolean copyToLocal = configuration.getBoolean(Constants.OPTION_COPY_TO_LOCAL,
            Constants.OPTION_COPY_TO_LOCAL_DEFAULT);
    boolean verify = configuration.getBoolean(Constants.OPTION_VERIFY, Constants.OPTION_VERIFY_DEFAULT);
    boolean runLocal = configuration.getBoolean(Constants.OPTION_RUN_LOCAL, Constants.OPTION_RUN_LOCAL_DEFAULT);

    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
        fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_1), true);
        fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_2), true);
        fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_3), true);
        fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_4), true);
    }

    if ((copyToLocal) || (runLocal)) {
        File path = new File(args[1]);
        path.mkdirs();
    }

    Tool first = new FirstDriver(configuration);
    int status = first.run(new String[] { args[0], args[1] + OUTPUT_PATH_POSTFIX_1 });
    if (status != 0) {
        return status;
    }

    createOffsetsFile(fs, args[1] + OUTPUT_PATH_POSTFIX_1, args[1] + OUTPUT_PATH_POSTFIX_1);
    Path offsets = new Path(args[1] + OUTPUT_PATH_POSTFIX_1, Constants.OFFSETS_FILENAME);
    DistributedCache.addCacheFile(offsets.toUri(), configuration);

    Tool second = new SecondDriver(configuration);
    status = second.run(new String[] { args[0], args[1] + OUTPUT_PATH_POSTFIX_2 });
    if (status != 0) {
        return status;
    }

    Tool third = new ThirdDriver(configuration);
    status = third.run(new String[] { args[1] + OUTPUT_PATH_POSTFIX_2, args[1] + OUTPUT_PATH_POSTFIX_3 });
    if (status != 0) {
        return status;
    }

    Tool fourth = new FourthDriver(configuration);
    status = fourth.run(new String[] { args[1] + OUTPUT_PATH_POSTFIX_3, args[1] + OUTPUT_PATH_POSTFIX_4 });
    if (status != 0) {
        return status;
    }

    if (copyToLocal) {
        Tool download = new download(configuration);
        download.run(
                new String[] { args[1] + OUTPUT_PATH_POSTFIX_2, args[1] + OUTPUT_PATH_POSTFIX_4, args[1] });
    }

    if (verify) {
        DatasetGraphTDB dsgMem = load(args[0]);
        Location location = new Location(args[1]);

        if (!copyToLocal) {
            // TODO: this is a sort of a cheat and it could go away (if it turns out to be too slow)!
            download.fixNodeTable2(location);
        }

        DatasetGraphTDB dsgDisk = SetupTDB.buildDataset(location);
        boolean isomorphic = isomorphic(dsgMem, dsgDisk);
        System.out.println("> " + isomorphic);
    }

    return status;
}

From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java

License:Apache License

static ParquetMetadata mergeFooters(Path root, List<Footer> footers) {
    String rootPath = root.toUri().getPath();
    GlobalMetaData fileMetaData = null;//from  w ww.j a v  a2  s.  c om
    List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
    for (Footer footer : footers) {
        String footerPath = footer.getFile().toUri().getPath();
        if (!footerPath.startsWith(rootPath)) {
            throw new ParquetEncodingException(
                    footerPath + " invalid: all the files must be contained in the root " + root);
        }
        footerPath = footerPath.substring(rootPath.length());
        while (footerPath.startsWith("/")) {
            footerPath = footerPath.substring(1);
        }
        fileMetaData = mergeInto(footer.getParquetMetadata().getFileMetaData(), fileMetaData);
        for (BlockMetaData block : footer.getParquetMetadata().getBlocks()) {
            block.setPath(footerPath);
            blocks.add(block);
        }
    }
    return new ParquetMetadata(fileMetaData.merge(), blocks);
}

From source file:co.cask.cdap.data.stream.StreamDataFileSplitter.java

License:Apache License

private Path getIndexFile(Path eventFile) {
    String eventPath = eventFile.toUri().toString();
    int extLength = StreamFileType.EVENT.getSuffix().length();
    return new Path(URI.create(String.format("%s%s", eventPath.substring(0, eventPath.length() - extLength),
            StreamFileType.INDEX.getSuffix())));
}