Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri) 

Source Link

Document

Construct a path from a URI

Usage

From source file:Top20AZRestaurants.java

@Override
public int run(String[] args) throws Exception {
    Job job1 = new Job(getConf());
    job1.setSortComparatorClass(MyDecreasingDoubleComparator.class);
    job1.setJobName("Top20 AZ Restaurants ChainJob");
    job1.setJarByClass(Top20AZRestaurants.class);

    JobConf map1Conf = new JobConf(false);
    ChainMapper.addMapper(job1, Top20Mapper.class, LongWritable.class, Text.class, Text.class, Text.class,
            map1Conf);//from  w  ww  .j a v  a 2  s  .c  om
    JobConf map2Conf = new JobConf(false);
    ChainMapper.addMapper(job1, Top20MapperRedo.class, Text.class, Text.class, DoubleWritable.class, Text.class,
            map2Conf);
    JobConf reduceConf = new JobConf(false);
    ChainReducer.setReducer(job1, Top20ReducerRedo.class, DoubleWritable.class, Text.class, Text.class,
            DoubleWritable.class, reduceConf);
    FileInputFormat.setInputPaths(job1, new Path(args[0]));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));

    boolean success = job1.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:WordCountB.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/* w w  w .  ja  v  a 2s .com*/
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCountB.class);
    job.setMapperClass(TokenizerMapper.class);
    // Setup the Combiner
    job.setCombinerClass(IntSumReducer.class);
    // Setup the Partitioner
    job.setPartitionerClass(Letterpartitioner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:BytesRelevance.java

License:Apache License

@Override
protected void writeOutBloomFilter(Tap keysTap, String keyField, FileSystem fs, String path, int bloom_bits,
        int bloom_hashes) throws IOException {
    BytesBloomFilter filter = new BytesBloomFilter(bloom_bits, bloom_hashes);
    TupleEntryIterator it = new TupleEntryIterator(keysTap.getSourceFields(),
            new TapIterator(keysTap, new JobConf()));
    while (it.hasNext()) {
        TupleEntry t = it.next();//from  w  w w .j a v a  2s.  com
        byte[] b = getBytes((BytesWritable) t.get("keyField"));
        filter.add(b);
    }
    it.close();
    filter.writeToFileSystem(fs, new Path(path));

}

From source file:TestColumnStorageInputFormat.java

License:Open Source License

public static void main(String[] argv) throws IOException, SerDeException {
    try {/* w  w w  .j a  v  a 2 s .  c  o  m*/
        if (argv.length != 2) {
            System.out.println("TestColumnStorageInputFormat <input> idx");
            System.exit(-1);
        }

        JobConf conf = new JobConf(TestColumnStorageInputFormat.class);

        conf.setJobName("TestColumnStorageInputFormat");

        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(1);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Unit.Record.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.set("mapred.output.compress", "flase");

        conf.set("mapred.input.dir", argv[0]);

        conf.set("hive.io.file.readcolumn.ids", argv[1]);

        FormatStorageSerDe serDe = initSerDe(conf);
        StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();

        FileInputFormat.setInputPaths(conf, argv[0]);
        Path outputPath = new Path(argv[1]);
        FileOutputFormat.setOutputPath(conf, outputPath);

        InputFormat inputFormat = new ColumnStorageInputFormat();
        long begin = System.currentTimeMillis();
        InputSplit[] inputSplits = inputFormat.getSplits(conf, 1);
        long end = System.currentTimeMillis();
        System.out.println("getsplit delay " + (end - begin) + " ms");

        if (inputSplits.length == 0) {
            System.out.println("inputSplits is empty");
            return;
        } else {
            System.out.println("get Splits:" + inputSplits.length);
        }

        int size = inputSplits.length;
        System.out.println("getSplits return size:" + size);
        for (int i = 0; i < size; i++) {
            ColumnStorageSplit split = (ColumnStorageSplit) inputSplits[i];
            System.out.printf("split:" + i + " offset:" + split.getStart() + "len:" + split.getLength()
                    + "path:" + split.getPath().toString() + "beginLine:" + split.getBeginLine() + "endLine:"
                    + split.getEndLine());
            if (split.getFileName() != null) {
                System.out.println("fileName:" + split.getFileName());
            } else {
                System.out.println("fileName null");
            }
            if (split.fileList() != null) {
                System.out.println("fileList.num:" + split.fileList().size());
                for (int j = 0; j < split.fileList().size(); j++) {
                    System.out.println("filelist " + j + ":" + split.fileList().get(j));
                }
            }
        }

        while (true) {
            int totalDelay = 0;
            RecordReader<WritableComparable, Writable> currRecReader = null;
            for (int i = 0; i < inputSplits.length; i++) {
                currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);

                WritableComparable key;
                Writable value;

                key = currRecReader.createKey();
                value = currRecReader.createValue();

                begin = System.currentTimeMillis();
                int count = 0;
                while (currRecReader.next(key, value)) {

                    Record record = (Record) value;

                    Object row = serDe.deserialize(record);
                    count++;

                }
                end = System.currentTimeMillis();

                long delay = (end - begin) / 1000;
                totalDelay += delay;
                System.out.println(count + " record read over, delay " + delay + " s");
            }

            System.out.println("total delay:" + totalDelay + "\n");
        }

    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("get exception:" + e.getMessage());
    }
}

From source file:HoodieClientExample.java

License:Apache License

public void run() throws Exception {

    SparkConf sparkConf = new SparkConf().setAppName("hoodie-client-example");
    sparkConf.setMaster("local[1]");
    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    sparkConf.set("spark.kryoserializer.buffer.max", "512m");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);

    // Generator of some records to be loaded in.
    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();

    // initialize the table, if not done already
    Path path = new Path(tablePath);
    FileSystem fs = FSUtils.getFs(tablePath, jsc.hadoopConfiguration());
    if (!fs.exists(path)) {
        HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath,
                HoodieTableType.valueOf(tableType), tableName, HoodieAvroPayload.class.getName());
    }//from   w w  w.j  ava  2 s  .  c o  m

    // Create the write client to write some records in
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
            .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable(tableName)
            .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build())
            .withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 3).build()).build();
    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);

    /**
     * Write 1 (only inserts)
     */
    String newCommitTime = client.startCommit();
    logger.info("Starting commit " + newCommitTime);

    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
    JavaRDD<HoodieRecord> writeRecords = jsc.<HoodieRecord>parallelize(records, 1);
    client.upsert(writeRecords, newCommitTime);

    /**
     * Write 2 (updates)
     */
    newCommitTime = client.startCommit();
    logger.info("Starting commit " + newCommitTime);
    records.addAll(dataGen.generateUpdates(newCommitTime, 100));
    writeRecords = jsc.<HoodieRecord>parallelize(records, 1);
    client.upsert(writeRecords, newCommitTime);

    /**
     * Schedule a compaction and also perform compaction on a MOR dataset
     */
    if (HoodieTableType.valueOf(tableType) == HoodieTableType.MERGE_ON_READ) {
        Optional<String> instant = client.scheduleCompaction(Optional.empty());
        JavaRDD<WriteStatus> writeStatues = client.compact(instant.get());
        client.commitCompaction(instant.get(), writeStatues, Optional.empty());
    }
}

From source file:PT1.java

License:Open Source License

static void testgenrawfile(String filename, int recordnum) throws IOException {
    Random r = new Random();
    FSDataOutputStream fos = fs.create(new Path(filename));
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < recordnum; i++) {
        fos.writeByte(i / 1000);//from   w ww.jav a2  s.co  m
        fos.writeShort(i / 1000);
        fos.writeInt(i / 1000);
        fos.writeLong(i / 1000);
        fos.writeFloat(i / 1000);
        fos.writeDouble(i / 1000);
        int strnum = r.nextInt(12) + 7;
        sb.setLength(0);
        for (int j = 0; j < strnum; j++) {
            sb.append((char) ('a' + j));
        }
        fos.writeUTF(sb.toString());
        if (i % 1000000 == 0) {
        }
    }
    fos.close();
}

From source file:PT1.java

License:Open Source License

static void testgenrawfiler(String filename, int recordnum) throws IOException {
    Random r = new Random();
    FSDataOutputStream fos = fs.create(new Path(filename));
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < recordnum; i++) {
        fos.writeByte(r.nextInt());/*ww w .  j av a  2 s .co m*/
        fos.writeShort(r.nextInt());
        fos.writeInt(r.nextInt());
        fos.writeLong(r.nextLong());
        fos.writeFloat(r.nextFloat());
        fos.writeDouble(r.nextDouble());
        int strnum = r.nextInt(12) + 7;
        sb.setLength(0);
        for (int j = 0; j < strnum; j++) {
            sb.append((char) ('a' + r.nextInt(26)));
        }
        fos.writeUTF(sb.toString());
        if (i % 1000000 == 0) {
        }
    }
    fos.close();

}

From source file:PT1.java

License:Open Source License

static void testwritefdf(String filename, String fromfile, boolean compress) throws Exception {
    FSDataInputStream fis = fs.open(new Path(fromfile));
    FormatDataFile fdf = createfdf(filename, compress);
    for (int i = 0; i < 10000 * 10000; i++) {
        Record record = readrecord(fis);
        if (record == null) {
            break;
        }/* w ww. java2s.co  m*/
        fdf.addRecord(record);
        if (i % 1000000 == 0) {
        }
    }
    fdf.close();
    fis.close();
}

From source file:PT1.java

License:Open Source License

static void testwritecolumn(String filename, String fromfile, boolean compress) throws Exception {

    FormatDataFile[] fdfs = createfdfs(filename, compress);
    FSDataInputStream fis = fs.open(new Path(fromfile));
    for (int i = 0; i < 10000 * 10000; i++) {

        Record record = readrecord(fis);
        if (record == null) {
            break;
        }//from  w w  w . j  ava2  s  .c  om
        ArrayList<FieldValue> fvs = record.fieldValues();
        for (int j = 0; j < fvs.size(); j++) {
            Record re = new Record(1);
            re.addValue(fvs.get(j));
            fdfs[j].addRecord(re);
        }
        if (i % 1000000 == 0) {
        }

    }
    for (int i = 0; i < fdfs.length; i++) {
        fdfs[i].close();
    }
    fis.close();

}

From source file:PT1.java

License:Open Source License

static void testreadcolumnseq(String filename, int num, boolean compress, String mode) throws Exception {

    Path path = new Path(filename);
    ArrayList<Short> vector = new ArrayList<Short>(10);

    if (mode == null || mode.equals("-1")) {
        for (short i = 0; i < 7; i++) {
            vector.add(i);/*from   w w  w.j  a v  a  2 s. c o m*/
        }
    } else if (mode.equals("half")) {
        short x = 0;
        vector.add(x);
        x = 1;
        vector.add(x);
        x = 2;
        vector.add(x);
        x = 3;
        vector.add(x);
    } else {
        vector.add(Short.valueOf(mode));
    }

    Configuration conf = new Configuration();
    ColumnStorageClient client = new ColumnStorageClient(path, vector, conf);

    if (compress) {
        for (int i = 0; i < num; i++) {
            client.getNextRecord();
        }

    } else {

        for (int i = 0; i < num; i++) {
            client.getRecordByLine(i);
        }
    }

    client.close();

}