Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri) 

Source Link

Document

Construct a path from a URI

Usage

From source file:CouchbaseExportStressTest.java

License:Apache License

public void createFile(int fileId) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("CouchbaseExportStressTest");
    fs.mkdirs(dirPath);/*from   ww  w. j a va2  s  .com*/
    Path filePath = new Path(dirPath, "input-" + fileId);

    OutputStream os = fs.create(filePath);
    Writer w = new BufferedWriter(new OutputStreamWriter(os));
    for (int i = 0; i < RECORDS_PER_FILE; i++) {
        long v = (long) i + ((long) RECORDS_PER_FILE * (long) fileId);
        w.write("" + v + "," + ALPHABET + ALPHABET + ALPHABET + ALPHABET + "\n");

    }
    w.close();
    os.close();
}

From source file:CouchbaseExportStressTest.java

License:Apache License

/** Create a set of data files to export. */
public void createData() throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("CouchbaseExportStressTest");
    if (fs.exists(dirPath)) {
        System.out.println("Export directory appears to already exist. Skipping data-gen.");
        return;// ww w . j a  va2s .com
    }

    for (int i = 0; i < NUM_FILES; i++) {
        createFile(i);
    }
}

From source file:BMTColumnLoader.java

License:Apache License

public int run(String[] args) {
    JobConf conf = new JobConf(getConf(), BMTColumnLoader.class);
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);

    conf.setJobName("BMTColumnLoader");
    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);// w  w w  .j  a va  2  s. co  m
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        other_args.add(args[i]);
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return 0;
}

From source file:Eggshell.java

License:Open Source License

/** In charge of setting up and submitting the Hadoop job
 *  The method receives the remaining command-line arguments
 *  after first being processed by Hadoop.
 *
 * @param args        Hadoop processed command-line arguments
 * @return            Returns 0 for sucess
 *//*from w w  w  . java2s. c o  m*/
public int run(String[] args) throws Exception {
    String name = args[0];
    String[] params = Arrays.copyOfRange(args, 1, args.length);
    Object[] arguments = Arrays.copyOf(params, params.length, Object[].class);

    script = new Script(); // start the Javascript interpreter
    script.putProperty("arguments", script.newArray(arguments));

    EggGlobal.script = script;
    Egg.script = script;
    Egg.name = name;
    Egg.conf = getConf();

    Scriptable global = script.newObject("EggGlobal", null);
    script.setGlobalScope(global);

    script.evalLibrary(); // load the Eggshell Javascript library
    script.evalFile(name); // load the javascript job file

    /* create a temporary directory in hdfs to hold the seralized functions */
    FileSystem fs = FileSystem.get(getConf());
    Path dir = new Path(SCRIPT_DIR);
    if (fs.exists(dir))
        fs.delete(dir, true);
    fs.mkdirs(dir);

    /* call the 'eggshell' function */
    Object o = script.getProperty("eggshell");
    if (o instanceof Function) {
        Scriptable thisObj = script.newObject("Egg", null);
        Function f = (Function) o;
        o = script.callFunction(f, thisObj, params);
        script.exit();

        /* return the result of the 'eggshell' function */
        if (o instanceof NativeJavaObject)
            o = ((NativeJavaObject) o).unwrap();
        if (o instanceof Boolean)
            return (Boolean) o ? 0 : 1;
        if (o instanceof Integer)
            return (Integer) o;
        if (o instanceof Double)
            return ((Double) o).intValue();
    }
    return 0;
}

From source file:LungDriver.java

License:Creative Commons License

private void cleanOutputPath(Configuration conf, String outputPath) {
    try {//from  w w  w .j  av a  2 s .  c  o  m
        FileSystem fs = FileSystem.get(conf);
        Path output = new Path(outputPath);
        fs.delete(output, true);
    } catch (IOException e) {
        System.err.println("Failed to delete temporary path");
        e.printStackTrace();
    }

    System.out.println("[DONE]\n");
}

From source file:DFSBenchmark.java

License:Apache License

public static void main(String[] args) throws Exception {
    Preconditions.checkArgument(args.length == 2, "Please provide the threads and base directory path details");
    DFSBenchmark benchmark = new DFSBenchmark((Integer.parseInt(args[0])), new Path(args[1]));
    benchmark.runBenchmark();//from   w  w  w. j  a v  a2 s . c o m
}

From source file:TestDistinct.java

License:Apache License

public void testDistinct() throws IOException {
    FileSystem fs = FileSystem.get(new Configuration());
    fs.delete(new Path("/tmp/test_distinct_file"), true);
    fs.delete(new Path("/tmp/test_distinct_file_results"), true);

    FSDataOutputStream out = fs.create(new Path("/tmp/test_distinct_file"));
    PrintWriter pw = new PrintWriter(out);
    pw.println("distinct1");
    pw.println("distinct2");
    pw.println("distinct2");
    pw.println("distinct3");
    pw.println("distinct2");
    pw.flush();/*w w w  . j  a  va  2s.  c o  m*/
    out.close();

    Map<String, Tap> sources = new HashMap<String, Tap>();
    Map<String, Tap> sinks = new HashMap<String, Tap>();

    Tap inTap = new Hfs(new TextLine(new Fields("line")), "/tmp/test_distinct_file");
    Pipe inPipe = new Pipe("inPipe");
    sources.put("inPipe", inTap);

    Distinct distinct = new Distinct(inPipe);

    Tap outTap = new Hfs(new TextLine(new Fields("line")), "/tmp/test_distinct_file_results");
    Pipe outPipe = new Pipe("outPipe", distinct);
    sinks.put("outPipe", outTap);

    Flow flow = new FlowConnector().connect(sources, sinks, inPipe, outPipe);
    flow.complete();

    FSDataInputStream in = fs.open(new Path("/tmp/test_distinct_file_results/part-00000"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));

    ArrayList<String> results = new ArrayList<String>();
    results.add("distinct1");
    results.add("distinct2");
    results.add("distinct3");

    try {
        while (true) {
            String s = reader.readLine();
            if (s == null) {
                break;
            }

            assertEquals(results.remove(0), s);
        }
    } catch (Exception e) {
        fail("Got an exception while trying to verify the results: " + e.toString());
    }

    assertEquals("All results must be consumed!", 0, results.size());
}

From source file:DateExample_Month.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*  w  w w.ja va2  s . c  om*/
    }
    Job job = new Job(conf, "word count fs");
    job.setJarByClass(DateExample_Month.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(IsValidKeyFormat.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:WordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    ///start// ww  w  . j a  v a 2 s .c  om
    final long startTime = System.currentTimeMillis();
    String outputReducerType = "filesystem";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];
    }
    logger.info("output reducer type: " + outputReducerType);

    // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better
    ConfigHelper.setRangeBatchSize(getConf(), 99);

    for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) {
        String columnName = "userId";
        Job job = new Job(getConf(), "wordcount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);
        if (outputReducerType.equalsIgnoreCase("filesystem")) {
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        } else {
            job.setReducerClass(ReducerToCassandra.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);

            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
            job.getConfiguration().set(CONF_COLUMN_NAME, "sum");
        }
        job.setInputFormatClass(ColumnFamilyInputFormat.class);
        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");

        //Change partitioner here
        ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);

        SlicePredicate predicate = new SlicePredicate()
                .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        // this will cause the predicate to be ignored in favor of scanning everything as a wide row
        //Son degisiklik Super Column Support ?
        // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true);

        ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner");
        job.waitForCompletion(true);
    }

    final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
    System.out.println();
    System.out.println("Job Finished in " + duration + " seconds");
    System.out.println();

    return 0;
}

From source file:TestPerformance.java

License:Open Source License

private static void genfdf(String filedir, int filenum, int recordnum) throws Exception {

    int size = 0;
    FileStatus[] fss = fs.listStatus(new Path(filedir));
    if (fss != null) {
        size += fss.length;/* ww  w . jav  a2  s .c o m*/
    }

    for (int i = 0; i < filenum; i++) {
        System.err.println("generate a fdf");
        FormatDataFile fdf = createfdf(filedir, false, (short) -1);
        for (int j = 0; j < recordnum; j++) {
            Record record = new Record((short) 6);
            record.addValue(new FieldValue((byte) r.nextInt(), (short) 0));
            record.addValue(new FieldValue((short) r.nextInt(), (short) 1));
            record.addValue(new FieldValue(r.nextInt(), (short) 2));
            record.addValue(new FieldValue(r.nextLong(), (short) 3));
            record.addValue(new FieldValue(r.nextFloat() * 10000, (short) 4));
            record.addValue(new FieldValue(r.nextDouble() * 100000000, (short) 5));
            if ((j + 1) % 1000000 == 0)
                System.err.println((j + 1) + "records written");
            fdf.addRecord(record);
        }
        fdf.close();
    }
}