Example usage for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:BigramRelativeFrequency.java

License:Apache License

/**
 * Runs this tool./*  w  w  w . j av  a2s  .  c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + BigramRelativeFrequency.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(BigramRelativeFrequency.class.getSimpleName());
    job.setJarByClass(BigramRelativeFrequency.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(FloatWritable.class);
    //job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:TestIndexMergeMR.java

License:Open Source License

public void testIndexMergeMR() throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    String indexdir = "indexdir";
    String indexdir1 = "indexdir1";
    int filenum = 10;
    int recnum = 1000;
    short idx = 0;
    TestUtil.genifdfindex(indexdir, filenum, recnum, idx, true);
    StringBuffer sb = new StringBuffer();
    FileStatus[] ss = fs.listStatus(new Path(indexdir));
    for (FileStatus fileStatus : ss) {
        sb.append(fileStatus.getPath().toString()).append(",");
    }//from w  w  w.j  a v a2s. com
    IndexMergeMR.running(sb.substring(0, sb.length() - 1), indexdir1, conf);

    IFormatDataFile ifdf = new IFormatDataFile(conf);
    ifdf.open(indexdir1 + "/part-00000");
    for (int i = 0; i < 100; i++) {
        ifdf.next().show();
    }

    ifdf.close();

    fs.delete(new Path(indexdir), true);
    fs.delete(new Path(indexdir1), true);

}

From source file:TestColumnStorageOutputFormat.java

License:Open Source License

public static void main(String[] argv) throws IOException {
    try {/* www . j ava  2s  .  c  o m*/
        if (argv.length != 2) {
            System.out.println("TestColumnStorageOutputFormat <output> <count>");
            System.exit(-1);
        }

        JobConf conf = new JobConf(TestColumnStorageOutputFormat.class);

        conf.setJobName("TestColumnStorageOutputFormat");

        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(1);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Unit.Record.class);

        conf.setOutputFormat(ColumnStorageOutputFormat.class);
        conf.set("mapred.output.compress", "flase");

        conf.set("mapred.output.dir", argv[0]);

        Head head = new Head();
        initHead(head);

        head.toJobConf(conf);

        Path outputPath = new Path(argv[0]);
        FileOutputFormat.setOutputPath(conf, outputPath);

        FileSystem fs = FileSystem.get(conf);
        MyColumnOutputFormat output = new MyColumnOutputFormat(head, conf, outputPath);

        long begin = System.currentTimeMillis();
        int count = Integer.valueOf(argv[1]);
        String string = "hello konten";
        for (int i = 0; i < count; i++) {
            Record record = new Record((short) 210);

            for (short j = 0; j < 30; j++) {
                record.addValue(new FieldValue((byte) 1, (short) (j * 7 + 0)));
                record.addValue(new FieldValue((short) 2, (short) (j * 7 + 1)));
                record.addValue(new FieldValue((int) 3, (short) (j * 7 + 2)));
                record.addValue(new FieldValue((long) 4, (short) (j * 7 + 3)));
                record.addValue(new FieldValue((float) 5.5, (short) (j * 7 + 4)));
                record.addValue(new FieldValue((double) 6.6, (short) (j * 7 + 5)));
                record.addValue(new FieldValue((double) 7.7, (short) (j * 7 + 6)));

            }
            output.doWrite(record);

            if (i % 100000 == 0) {
                long end = System.currentTimeMillis();
                System.out.println(i + "record write, delay:" + (end - begin) / 1000 + "s");
            }
        }

        long end = System.currentTimeMillis();
        System.out.println(count + "record write over, delay:" + (end - begin) / 1000 + "s");
    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("get exception:" + e.getMessage());
    }
}

From source file:CouchbaseExportStressTest.java

License:Apache License

public void createFile(int fileId) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("CouchbaseExportStressTest");
    fs.mkdirs(dirPath);/*  w  w  w  .  j  a  v  a  2s. c  o m*/
    Path filePath = new Path(dirPath, "input-" + fileId);

    OutputStream os = fs.create(filePath);
    Writer w = new BufferedWriter(new OutputStreamWriter(os));
    for (int i = 0; i < RECORDS_PER_FILE; i++) {
        long v = (long) i + ((long) RECORDS_PER_FILE * (long) fileId);
        w.write("" + v + "," + ALPHABET + ALPHABET + ALPHABET + ALPHABET + "\n");

    }
    w.close();
    os.close();
}

From source file:CouchbaseExportStressTest.java

License:Apache License

/** Create a set of data files to export. */
public void createData() throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("CouchbaseExportStressTest");
    if (fs.exists(dirPath)) {
        System.out.println("Export directory appears to already exist. Skipping data-gen.");
        return;//from  w  w w.  j av  a  2 s  . c o  m
    }

    for (int i = 0; i < NUM_FILES; i++) {
        createFile(i);
    }
}

From source file:PerformanceEvaluation.java

License:Apache License

private Path writeInputFile(final Configuration c) throws IOException {
    FileSystem fs = FileSystem.get(c);
    if (!fs.exists(PERF_EVAL_DIR)) {
        fs.mkdirs(PERF_EVAL_DIR);/*from   w  ww .j  a  va  2  s  .  c  om*/
    }
    SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
    Path subdir = new Path(PERF_EVAL_DIR, formatter.format(new Date()));
    fs.mkdirs(subdir);
    Path inputFile = new Path(subdir, "input.txt");
    PrintStream out = new PrintStream(fs.create(inputFile));
    // Make input random.
    Map<Integer, String> m = new TreeMap<Integer, String>();
    Hash h = MurmurHash.getInstance();
    int perClientRows = (this.R / this.N);
    try {
        for (int i = 0; i < 10; i++) {
            for (int j = 0; j < N; j++) {
                String s = "startRow=" + ((j * perClientRows) + (i * (perClientRows / 10)))
                        + ", perClientRunRows=" + (perClientRows / 10) + ", totalRows=" + this.R + ", clients="
                        + this.N + ", flushCommits=" + this.flushCommits + ", writeToWAL=" + this.writeToWAL
                        + ", scanCache=" + this.S;
                int hash = h.hash(Bytes.toBytes(s));
                m.put(hash, s);
            }
        }
        for (Map.Entry<Integer, String> e : m.entrySet()) {
            out.println(e.getValue());
        }
    } finally {
        out.close();
    }
    return subdir;
}

From source file:Eggshell.java

License:Open Source License

/** In charge of setting up and submitting the Hadoop job
 *  The method receives the remaining command-line arguments
 *  after first being processed by Hadoop.
 *
 * @param args        Hadoop processed command-line arguments
 * @return            Returns 0 for sucess
 *//*from ww  w.  ja v a 2 s  .com*/
public int run(String[] args) throws Exception {
    String name = args[0];
    String[] params = Arrays.copyOfRange(args, 1, args.length);
    Object[] arguments = Arrays.copyOf(params, params.length, Object[].class);

    script = new Script(); // start the Javascript interpreter
    script.putProperty("arguments", script.newArray(arguments));

    EggGlobal.script = script;
    Egg.script = script;
    Egg.name = name;
    Egg.conf = getConf();

    Scriptable global = script.newObject("EggGlobal", null);
    script.setGlobalScope(global);

    script.evalLibrary(); // load the Eggshell Javascript library
    script.evalFile(name); // load the javascript job file

    /* create a temporary directory in hdfs to hold the seralized functions */
    FileSystem fs = FileSystem.get(getConf());
    Path dir = new Path(SCRIPT_DIR);
    if (fs.exists(dir))
        fs.delete(dir, true);
    fs.mkdirs(dir);

    /* call the 'eggshell' function */
    Object o = script.getProperty("eggshell");
    if (o instanceof Function) {
        Scriptable thisObj = script.newObject("Egg", null);
        Function f = (Function) o;
        o = script.callFunction(f, thisObj, params);
        script.exit();

        /* return the result of the 'eggshell' function */
        if (o instanceof NativeJavaObject)
            o = ((NativeJavaObject) o).unwrap();
        if (o instanceof Boolean)
            return (Boolean) o ? 0 : 1;
        if (o instanceof Integer)
            return (Integer) o;
        if (o instanceof Double)
            return ((Double) o).intValue();
    }
    return 0;
}

From source file:LungDriver.java

License:Creative Commons License

private void cleanOutputPath(Configuration conf, String outputPath) {
    try {//from  w  w  w  .java  2s  .c  o m
        FileSystem fs = FileSystem.get(conf);
        Path output = new Path(outputPath);
        fs.delete(output, true);
    } catch (IOException e) {
        System.err.println("Failed to delete temporary path");
        e.printStackTrace();
    }

    System.out.println("[DONE]\n");
}

From source file:TestDistinct.java

License:Apache License

public void testDistinct() throws IOException {
    FileSystem fs = FileSystem.get(new Configuration());
    fs.delete(new Path("/tmp/test_distinct_file"), true);
    fs.delete(new Path("/tmp/test_distinct_file_results"), true);

    FSDataOutputStream out = fs.create(new Path("/tmp/test_distinct_file"));
    PrintWriter pw = new PrintWriter(out);
    pw.println("distinct1");
    pw.println("distinct2");
    pw.println("distinct2");
    pw.println("distinct3");
    pw.println("distinct2");
    pw.flush();/*from w  w w  .jav a2 s  .  co  m*/
    out.close();

    Map<String, Tap> sources = new HashMap<String, Tap>();
    Map<String, Tap> sinks = new HashMap<String, Tap>();

    Tap inTap = new Hfs(new TextLine(new Fields("line")), "/tmp/test_distinct_file");
    Pipe inPipe = new Pipe("inPipe");
    sources.put("inPipe", inTap);

    Distinct distinct = new Distinct(inPipe);

    Tap outTap = new Hfs(new TextLine(new Fields("line")), "/tmp/test_distinct_file_results");
    Pipe outPipe = new Pipe("outPipe", distinct);
    sinks.put("outPipe", outTap);

    Flow flow = new FlowConnector().connect(sources, sinks, inPipe, outPipe);
    flow.complete();

    FSDataInputStream in = fs.open(new Path("/tmp/test_distinct_file_results/part-00000"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));

    ArrayList<String> results = new ArrayList<String>();
    results.add("distinct1");
    results.add("distinct2");
    results.add("distinct3");

    try {
        while (true) {
            String s = reader.readLine();
            if (s == null) {
                break;
            }

            assertEquals(results.remove(0), s);
        }
    } catch (Exception e) {
        fail("Got an exception while trying to verify the results: " + e.toString());
    }

    assertEquals("All results must be consumed!", 0, results.size());
}

From source file:FindMaxPageRankNodes.java

License:Apache License

/**
 * Runs this tool./*w ww .  j av a 2 s.c o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("top n").create(TOP));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(TOP)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(TOP));

    LOG.info("Tool name: " + FindMaxPageRankNodes.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);
    LOG.info(" - top: " + n);

    Configuration conf = getConf();
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.setInt("n", n);

    Job job = Job.getInstance(conf);
    job.setJobName(FindMaxPageRankNodes.class.getName() + ":" + inputPath);
    job.setJarByClass(FindMaxPageRankNodes.class);

    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(FloatWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}