Example usage for org.apache.hadoop.fs FileSystem get

List of usage examples for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException 

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:PartitionGraph.java

License:Apache License

/**
 * Runs this tool./*from w  ww.  jav a 2  s.c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(RANGE, "use range partitioner"));

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions")
            .create(NUM_PARTITIONS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(NUM_PARTITIONS)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inPath = cmdline.getOptionValue(INPUT);
    String outPath = cmdline.getOptionValue(OUTPUT);
    int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS));
    boolean useRange = cmdline.hasOption(RANGE);

    LOG.info("Tool name: " + PartitionGraph.class.getSimpleName());
    LOG.info(" - input dir: " + inPath);
    LOG.info(" - output dir: " + outPath);
    LOG.info(" - num partitions: " + numParts);
    LOG.info(" - node cnt: " + nodeCount);
    LOG.info(" - use range partitioner: " + useRange);

    Configuration conf = getConf();
    conf.setInt("NodeCount", nodeCount);

    Job job = Job.getInstance(conf);
    job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath);
    job.setJarByClass(PartitionGraph.class);

    job.setNumReduceTasks(numParts);

    FileInputFormat.setInputPaths(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNodeMultiSrc.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNodeMultiSrc.class);

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    FileSystem.get(conf).delete(new Path(outPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:HadoopUtilsTest.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration confgiruration = HBaseConfiguration.create();
    FileSystem fileSystem = null;
    try {//  w  w  w  .j a  va  2s. c  om
        fileSystem = FileSystem.get(confgiruration);
        FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/icntv/grade/correlate-result/2013-12-12"),
                new PathFilter() {
                    @Override
                    public boolean accept(Path path) {

                        return path.getName().matches("part-r-\\d*");
                    }
                });
        for (FileStatus f : fileStatuses) {
            IOUtils.copyBytes(fileSystem.open(f.getPath()), System.out, 4096, false);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (null != fileSystem) {
            fileSystem.close();
        }
    }
}

From source file:CountJob.java

License:Apache License

public static void doJob(String param, String args[], String msgs)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    conf.set(TokenizerMapper.PATTERN, args[2]);
    FileSystem hdfs = FileSystem.get(conf);
    Path tempOutput1 = new Path("/data/output/temp/" + param + "1");
    Path tempOutput2 = new Path("/data/output/temp/" + param + "2");
    if (hdfs.exists(tempOutput1) || hdfs.exists(tempOutput2)) {
        hdfs.delete(tempOutput1, true);//from  w w w.j  a  va 2 s  . c  o m
        hdfs.delete(tempOutput2, true);
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(CountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempOutput1);
    job.waitForCompletion(true);

    Job sortJob1 = new Job(conf);
    sortJob1.setJobName("grep-sort");
    FileInputFormat.setInputPaths(sortJob1, tempOutput1);
    sortJob1.setInputFormatClass(SequenceFileInputFormat.class);
    sortJob1.setMapperClass(InverseMapper.class);
    sortJob1.setNumReduceTasks(1); // write a single file
    FileOutputFormat.setOutputPath(sortJob1, tempOutput2);
    sortJob1.setSortComparatorClass( // sort by decreasing freq
            LongWritable.DecreasingComparator.class);
    sortJob1.waitForCompletion(true);
    hdfs.delete(tempOutput1, true);

}

From source file:CountJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String msgs = "";
    doJob("1", args, msgs);
    doJob("2", args, msgs);
    FileSystem hdfs = FileSystem.get(conf);

    BufferedReader bfr = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000"))));
    BufferedReader bfr2 = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000"))));
    Boolean same = true;//from www .j  a v a  2 s  .c  o  m
    String line1;
    String line2;
    line1 = bfr.readLine();
    line2 = bfr2.readLine();
    while (same == true) {
        if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) {
            same = false;
            break;
        } else if ((line1 == null && line2 == null)) {
            break;
        } else {
            if (line1.equals(line2)) {
                line1 = bfr.readLine();
                line2 = bfr2.readLine();
            } else {
                same = false;
                break;
            }
        }
    }
    if (same == true) {
        System.out.print("same " + same + "\n");
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    } else {

        System.out.print("Different");
        doJob("3", args, msgs);
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    }
    hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true);
    hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true);

}

From source file:FileAnalyzerTest.java

License:Open Source License

@BeforeTest
public void setUp() throws IOException {
    //TODO mockito?
    fs = FileSystem.get(new Configuration()); // set up local file system
}

From source file:lab2_3.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem.get(conf).delete(new Path(args[1]), true);
    FileSystem.get(conf).delete(TMPDIR, true);

    Job first = Job.getInstance(conf, "drive time lab 2.2");
    first.setJarByClass(lab2_3.class);
    first.setMapperClass(lab2_2.PartitioningMapper.class);
    first.setPartitionerClass(lab2_2.TypePartitioner.class);
    first.setReducerClass(lab2_2.IdentityReducer.class);
    first.setNumReduceTasks(6);//from  w w  w.jav  a  2 s.  co  m

    first.setOutputKeyClass(IntWritable.class);
    first.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(first, new Path(args[0]));

    FileOutputFormat.setOutputPath(first, TMPDIR);

    int code = first.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Job second = Job.getInstance(conf, "drive time lab 2.3");
        second.setJarByClass(lab2_3.class);
        second.setMapperClass(MMMaper.class);
        second.setReducerClass(Reeeducer.class);
        second.setPartitionerClass(Partitioneeeer.class);

        second.setNumReduceTasks(6);
        second.setOutputKeyClass(Text.class);
        second.setOutputValueClass(lab2_1.Statistics.class);

        FileInputFormat.addInputPath(second, TMPDIR);
        FileOutputFormat.setOutputPath(second, new Path(args[1]));
        code = second.waitForCompletion(true) ? 0 : 1;
    }
    //        FileSystem.get(conf).delete(TMPDIR, true);
    System.exit(code);
}

From source file:Script.java

License:Open Source License

/** Serialize the Javascript object into a file on HDFS and then add
 *  the file to the distributed cache./*from   w  ww.ja  v a 2 s  .  c o m*/
 *  @param conf       The Hadoop configuration object
 *  @param o          The Javascript object to serialize
 *  @param name       The name of file to save the serialized object to
 */
public void serialize(Configuration conf, Object o, String name) throws IOException {
    FileSystem hdfs = FileSystem.get(conf);
    Path path = new Path(Eggshell.SCRIPT_DIR + "/" + name);
    FSDataOutputStream out = hdfs.create(path); // create the file
    String buf;
    if (!(o instanceof NativeObject)) {
        buf = cx.toString(o); // serialize
        if (o instanceof NativeArray)
            buf = "[" + buf + "]"; // if array
    } else {
        buf = "{";
        NativeObject obj = (NativeObject) o;
        Object[] propIds = obj.getPropertyIds(obj);
        for (Object propId : propIds) {
            String key = propId.toString();
            Object value = obj.getProperty(obj, key);
            buf += key + ":" + cx.toString(value) + ",";
        }
        buf += "}";
    }
    buf = "(" + buf + ")"; // force evaluation
    out.writeUTF(buf);
    out.close();
    DistributedCache.addCacheFile(path.toUri(), conf);
}

From source file:BuildPersonalizedPageRankRecords.java

License:Apache License

/**
 * Runs this tool.// w  w w  . j  a  v a  2 s.c o  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    //parsing more than 1 integer later;
    options.addOption(
            OptionBuilder.withArgName("src").hasArg().withDescription("source of pagerank").create(SOURCES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(SOURCES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    //Change to array later
    String src = cmdline.getOptionValue(SOURCES);

    LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numNodes: " + n);

    Configuration conf = getConf();
    conf.setInt(NODE_CNT_FIELD, n);
    //more to be set later;
    conf.set(NODE_SRC, src);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(BuildPersonalizedPageRankRecords.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNodeMultiSrc.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNodeMultiSrc.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:LookupPostingsCompressed.java

License:Apache License

/**
 * Runs this tool.// w ww . j av  a  2s.  com
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostingsCompressed.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<VIntWritable, BytesWritable> value = new PairOfWritables<VIntWritable, BytesWritable>();

    System.out.println("Looking up postings for the term \"starcross'd\"");
    key.set("starcross'd");

    reader.get(key, value);

    BytesWritable postings = value.getRightElement();
    ByteArrayInputStream buffer = new ByteArrayInputStream(postings.copyBytes());
    DataInputStream in = new DataInputStream(buffer);
    int OFFSET = 0;
    int count;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        collection.seek(OFFSET);
        System.out.println(d.readLine());
    }

    OFFSET = 0;
    key.set("gold");
    reader.get(key, value);
    postings = value.getRightElement();
    buffer = new ByteArrayInputStream(postings.copyBytes());
    in = new DataInputStream(buffer);
    System.out.println("Complete postings list for 'gold': (" + value.getLeftElement() + ", [");
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        //collection.seek(OFFSET);
        //System.out.println(d.readLine());
        System.out.print(", ");
    }
    System.out.print("])\n");

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    buffer.reset();

    OFFSET = 0;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        goldHist.increment(count);
    }

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    buffer.close();
    //Silver

    key.set("silver");
    reader.get(key, value);
    postings = value.getRightElement();
    buffer = new ByteArrayInputStream(postings.copyBytes());
    in = new DataInputStream(buffer);
    System.out.println("Complete postings list for 'silver': (" + value.getLeftElement() + ", [");
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        //collection.seek(OFFSET);
        //System.out.println(d.readLine());
        System.out.print(", ");
    }
    System.out.print("])\n");

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    buffer.reset();

    OFFSET = 0;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        silverHist.increment(count);
    }

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    buffer.close();

    key.set("bronze");
    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");
    }

    collection.close();
    reader.close();

    return 0;
}

From source file:FormatStoragePerformanceTest.java

License:Open Source License

static void doInitFile(int count, boolean var) {
    try {// ww  w .  j  a v a 2s .  co m
        String textFile = "MR_input_text/testPerformanceReadText";
        if (var) {
            textFile += "_var";
        }
        Path path = new Path(textFile);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        OutputStream stream = new BufferedOutputStream(out);
        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream));

        String value = null;
        if (var) {
            value = "111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten\n";
        } else {
            value = "111,22222,33333333,444444444444,5555555.5555,6666666666.666666\n";
        }

        long begin = System.currentTimeMillis();

        for (int i = 0; i < count; i++) {
            writer.write(value);

            if (i % 10000000 == 0) {
                String string = "write " + i + " record, delay: "
                        + ((System.currentTimeMillis() - begin) / 1000) + " s \n";
                output.write(string.getBytes());
            }
        }
        writer.close();

        long end = System.currentTimeMillis();

        String string = "write " + count + " record over, delay: " + ((end - begin) / 1000) + " s \n";
        output.write(string.getBytes());
    } catch (Exception e) {
        e.printStackTrace();
        System.out.println(e.getMessage());
    }
}