Example usage for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri)

Source Link

Document

Construct a path from a URI

Usage

From source file:CouchbaseExportStressTest.java

License:Apache License

public void createFile(int fileId) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("CouchbaseExportStressTest");
    fs.mkdirs(dirPath);/*from   ww  w. j a va2  s  .com*/
    Path filePath = new Path(dirPath, "input-" + fileId);

    OutputStream os = fs.create(filePath);
    Writer w = new BufferedWriter(new OutputStreamWriter(os));
    for (int i = 0; i < RECORDS_PER_FILE; i++) {
        long v = (long) i + ((long) RECORDS_PER_FILE * (long) fileId);
        w.write("" + v + "," + ALPHABET + ALPHABET + ALPHABET + ALPHABET + "\n");

    }
    w.close();
    os.close();
}

From source file:CouchbaseExportStressTest.java

License:Apache License

/** Create a set of data files to export. */
public void createData() throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("CouchbaseExportStressTest");
    if (fs.exists(dirPath)) {
        System.out.println("Export directory appears to already exist. Skipping data-gen.");
        return;// ww w . j a  va2s .com
    }

    for (int i = 0; i < NUM_FILES; i++) {
        createFile(i);
    }
}

From source file:BMTColumnLoader.java

License:Apache License

public int run(String[] args) {
    JobConf conf = new JobConf(getConf(), BMTColumnLoader.class);
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);

    conf.setJobName("BMTColumnLoader");
    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);// w  w w  .j  a va  2  s. co  m
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        other_args.add(args[i]);
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return 0;
}

From source file:Eggshell.java

License:Open Source License

/** In charge of setting up and submitting the Hadoop job
 *  The method receives the remaining command-line arguments
 *  after first being processed by Hadoop.
 *
 * @param args        Hadoop processed command-line arguments
 * @return            Returns 0 for sucess
 *//*from w w  w  . java2s. c o  m*/
public int run(String[] args) throws Exception {
    String name = args[0];
    String[] params = Arrays.copyOfRange(args, 1, args.length);
    Object[] arguments = Arrays.copyOf(params, params.length, Object[].class);

    script = new Script(); // start the Javascript interpreter
    script.putProperty("arguments", script.newArray(arguments));

    EggGlobal.script = script;
    Egg.script = script;
    Egg.name = name;
    Egg.conf = getConf();

    Scriptable global = script.newObject("EggGlobal", null);
    script.setGlobalScope(global);

    script.evalLibrary(); // load the Eggshell Javascript library
    script.evalFile(name); // load the javascript job file

    /* create a temporary directory in hdfs to hold the seralized functions */
    FileSystem fs = FileSystem.get(getConf());
    Path dir = new Path(SCRIPT_DIR);
    if (fs.exists(dir))
        fs.delete(dir, true);
    fs.mkdirs(dir);

    /* call the 'eggshell' function */
    Object o = script.getProperty("eggshell");
    if (o instanceof Function) {
        Scriptable thisObj = script.newObject("Egg", null);
        Function f = (Function) o;
        o = script.callFunction(f, thisObj, params);
        script.exit();

        /* return the result of the 'eggshell' function */
        if (o instanceof NativeJavaObject)
            o = ((NativeJavaObject) o).unwrap();
        if (o instanceof Boolean)
            return (Boolean) o ? 0 : 1;
        if (o instanceof Integer)
            return (Integer) o;
        if (o instanceof Double)
            return ((Double) o).intValue();
    }
    return 0;
}

From source file:LungDriver.java

License:Creative Commons License

private void cleanOutputPath(Configuration conf, String outputPath) {
    try {//from  w w  w .j  av a  2 s .  c  o  m
        FileSystem fs = FileSystem.get(conf);
        Path output = new Path(outputPath);
        fs.delete(output, true);
    } catch (IOException e) {
        System.err.println("Failed to delete temporary path");
        e.printStackTrace();
    }

    System.out.println("[DONE]\n");
}

From source file:DFSBenchmark.java

License:Apache License

public static void main(String[] args) throws Exception {
    Preconditions.checkArgument(args.length == 2, "Please provide the threads and base directory path details");
    DFSBenchmark benchmark = new DFSBenchmark((Integer.parseInt(args[0])), new Path(args[1]));
    benchmark.runBenchmark();//from   w  w  w. j  a v  a2 s . c o m
}

From source file:TestDistinct.java

License:Apache License

public void testDistinct() throws IOException {
    FileSystem fs = FileSystem.get(new Configuration());
    fs.delete(new Path("/tmp/test_distinct_file"), true);
    fs.delete(new Path("/tmp/test_distinct_file_results"), true);

    FSDataOutputStream out = fs.create(new Path("/tmp/test_distinct_file"));
    PrintWriter pw = new PrintWriter(out);
    pw.println("distinct1");
    pw.println("distinct2");
    pw.println("distinct2");
    pw.println("distinct3");
    pw.println("distinct2");
    pw.flush();/*w w w  . j  a  va  2s.  c o  m*/
    out.close();

    Map<String, Tap> sources = new HashMap<String, Tap>();
    Map<String, Tap> sinks = new HashMap<String, Tap>();

    Tap inTap = new Hfs(new TextLine(new Fields("line")), "/tmp/test_distinct_file");
    Pipe inPipe = new Pipe("inPipe");
    sources.put("inPipe", inTap);

    Distinct distinct = new Distinct(inPipe);

    Tap outTap = new Hfs(new TextLine(new Fields("line")), "/tmp/test_distinct_file_results");
    Pipe outPipe = new Pipe("outPipe", distinct);
    sinks.put("outPipe", outTap);

    Flow flow = new FlowConnector().connect(sources, sinks, inPipe, outPipe);
    flow.complete();

    FSDataInputStream in = fs.open(new Path("/tmp/test_distinct_file_results/part-00000"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));

    ArrayList<String> results = new ArrayList<String>();
    results.add("distinct1");
    results.add("distinct2");
    results.add("distinct3");

    try {
        while (true) {
            String s = reader.readLine();
            if (s == null) {
                break;
            }

            assertEquals(results.remove(0), s);
        }
    } catch (Exception e) {
        fail("Got an exception while trying to verify the results: " + e.toString());
    }

    assertEquals("All results must be consumed!", 0, results.size());
}

From source file:DateExample_Month.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*  w  w w.ja va2  s . c  om*/
    }
    Job job = new Job(conf, "word count fs");
    job.setJarByClass(DateExample_Month.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(IsValidKeyFormat.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:WordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    ///start// ww  w  . j a  v a 2 s .c  om
    final long startTime = System.currentTimeMillis();
    String outputReducerType = "filesystem";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];
    }
    logger.info("output reducer type: " + outputReducerType);

    // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better
    ConfigHelper.setRangeBatchSize(getConf(), 99);

    for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) {
        String columnName = "userId";
        Job job = new Job(getConf(), "wordcount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);
        if (outputReducerType.equalsIgnoreCase("filesystem")) {
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        } else {
            job.setReducerClass(ReducerToCassandra.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);

            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
            job.getConfiguration().set(CONF_COLUMN_NAME, "sum");
        }
        job.setInputFormatClass(ColumnFamilyInputFormat.class);
        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");

        //Change partitioner here
        ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);

        SlicePredicate predicate = new SlicePredicate()
                .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        // this will cause the predicate to be ignored in favor of scanning everything as a wide row
        //Son degisiklik Super Column Support ?
        // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true);

        ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner");
        job.waitForCompletion(true);
    }

    final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
    System.out.println();
    System.out.println("Job Finished in " + duration + " seconds");
    System.out.println();

    return 0;
}

From source file:TestPerformance.java

License:Open Source License

private static void genfdf(String filedir, int filenum, int recordnum) throws Exception {

    int size = 0;
    FileStatus[] fss = fs.listStatus(new Path(filedir));
    if (fss != null) {
        size += fss.length;/* ww  w . jav  a2  s .c o m*/
    }

    for (int i = 0; i < filenum; i++) {
        System.err.println("generate a fdf");
        FormatDataFile fdf = createfdf(filedir, false, (short) -1);
        for (int j = 0; j < recordnum; j++) {
            Record record = new Record((short) 6);
            record.addValue(new FieldValue((byte) r.nextInt(), (short) 0));
            record.addValue(new FieldValue((short) r.nextInt(), (short) 1));
            record.addValue(new FieldValue(r.nextInt(), (short) 2));
            record.addValue(new FieldValue(r.nextLong(), (short) 3));
            record.addValue(new FieldValue(r.nextFloat() * 10000, (short) 4));
            record.addValue(new FieldValue(r.nextDouble() * 100000000, (short) 5));
            if ((j + 1) % 1000000 == 0)
                System.err.println((j + 1) + "records written");
            fdf.addRecord(record);
        }
        fdf.close();
    }
}