Example usage for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri)

Source Link

Document

Construct a path from a URI

Usage

From source file:Top20AZRestaurants.java

@Override
public int run(String[] args) throws Exception {
    Job job1 = new Job(getConf());
    job1.setSortComparatorClass(MyDecreasingDoubleComparator.class);
    job1.setJobName("Top20 AZ Restaurants ChainJob");
    job1.setJarByClass(Top20AZRestaurants.class);

    JobConf map1Conf = new JobConf(false);
    ChainMapper.addMapper(job1, Top20Mapper.class, LongWritable.class, Text.class, Text.class, Text.class,
            map1Conf);//from  w  ww  .j a v  a 2  s  .c  om
    JobConf map2Conf = new JobConf(false);
    ChainMapper.addMapper(job1, Top20MapperRedo.class, Text.class, Text.class, DoubleWritable.class, Text.class,
            map2Conf);
    JobConf reduceConf = new JobConf(false);
    ChainReducer.setReducer(job1, Top20ReducerRedo.class, DoubleWritable.class, Text.class, Text.class,
            DoubleWritable.class, reduceConf);
    FileInputFormat.setInputPaths(job1, new Path(args[0]));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));

    boolean success = job1.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:WordCountB.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/* w w  w .  ja  v  a 2s .com*/
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCountB.class);
    job.setMapperClass(TokenizerMapper.class);
    // Setup the Combiner
    job.setCombinerClass(IntSumReducer.class);
    // Setup the Partitioner
    job.setPartitionerClass(Letterpartitioner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:BytesRelevance.java

License:Apache License

@Override
protected void writeOutBloomFilter(Tap keysTap, String keyField, FileSystem fs, String path, int bloom_bits,
        int bloom_hashes) throws IOException {
    BytesBloomFilter filter = new BytesBloomFilter(bloom_bits, bloom_hashes);
    TupleEntryIterator it = new TupleEntryIterator(keysTap.getSourceFields(),
            new TapIterator(keysTap, new JobConf()));
    while (it.hasNext()) {
        TupleEntry t = it.next();//from  w  w w .j a v a  2s.  com
        byte[] b = getBytes((BytesWritable) t.get("keyField"));
        filter.add(b);
    }
    it.close();
    filter.writeToFileSystem(fs, new Path(path));

}

From source file:TestColumnStorageInputFormat.java

License:Open Source License

public static void main(String[] argv) throws IOException, SerDeException {
    try {/* w  w w  .j a  v  a 2 s .  c  o  m*/
        if (argv.length != 2) {
            System.out.println("TestColumnStorageInputFormat <input> idx");
            System.exit(-1);
        }

        JobConf conf = new JobConf(TestColumnStorageInputFormat.class);

        conf.setJobName("TestColumnStorageInputFormat");

        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(1);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Unit.Record.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.set("mapred.output.compress", "flase");

        conf.set("mapred.input.dir", argv[0]);

        conf.set("hive.io.file.readcolumn.ids", argv[1]);

        FormatStorageSerDe serDe = initSerDe(conf);
        StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();

        FileInputFormat.setInputPaths(conf, argv[0]);
        Path outputPath = new Path(argv[1]);
        FileOutputFormat.setOutputPath(conf, outputPath);

        InputFormat inputFormat = new ColumnStorageInputFormat();
        long begin = System.currentTimeMillis();
        InputSplit[] inputSplits = inputFormat.getSplits(conf, 1);
        long end = System.currentTimeMillis();
        System.out.println("getsplit delay " + (end - begin) + " ms");

        if (inputSplits.length == 0) {
            System.out.println("inputSplits is empty");
            return;
        } else {
            System.out.println("get Splits:" + inputSplits.length);
        }

        int size = inputSplits.length;
        System.out.println("getSplits return size:" + size);
        for (int i = 0; i < size; i++) {
            ColumnStorageSplit split = (ColumnStorageSplit) inputSplits[i];
            System.out.printf("split:" + i + " offset:" + split.getStart() + "len:" + split.getLength()
                    + "path:" + split.getPath().toString() + "beginLine:" + split.getBeginLine() + "endLine:"
                    + split.getEndLine());
            if (split.getFileName() != null) {
                System.out.println("fileName:" + split.getFileName());
            } else {
                System.out.println("fileName null");
            }
            if (split.fileList() != null) {
                System.out.println("fileList.num:" + split.fileList().size());
                for (int j = 0; j < split.fileList().size(); j++) {
                    System.out.println("filelist " + j + ":" + split.fileList().get(j));
                }
            }
        }

        while (true) {
            int totalDelay = 0;
            RecordReader<WritableComparable, Writable> currRecReader = null;
            for (int i = 0; i < inputSplits.length; i++) {
                currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);

                WritableComparable key;
                Writable value;

                key = currRecReader.createKey();
                value = currRecReader.createValue();

                begin = System.currentTimeMillis();
                int count = 0;
                while (currRecReader.next(key, value)) {

                    Record record = (Record) value;

                    Object row = serDe.deserialize(record);
                    count++;

                }
                end = System.currentTimeMillis();

                long delay = (end - begin) / 1000;
                totalDelay += delay;
                System.out.println(count + " record read over, delay " + delay + " s");
            }

            System.out.println("total delay:" + totalDelay + "\n");
        }

    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("get exception:" + e.getMessage());
    }
}

From source file:HoodieClientExample.java

License:Apache License

public void run() throws Exception {

    SparkConf sparkConf = new SparkConf().setAppName("hoodie-client-example");
    sparkConf.setMaster("local[1]");
    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    sparkConf.set("spark.kryoserializer.buffer.max", "512m");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);

    // Generator of some records to be loaded in.
    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();

    // initialize the table, if not done already
    Path path = new Path(tablePath);
    FileSystem fs = FSUtils.getFs(tablePath, jsc.hadoopConfiguration());
    if (!fs.exists(path)) {
        HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath,
                HoodieTableType.valueOf(tableType), tableName, HoodieAvroPayload.class.getName());
    }//from   w w  w.j  ava  2 s  .  c o  m

    // Create the write client to write some records in
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
            .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable(tableName)
            .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build())
            .withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 3).build()).build();
    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);

    /**
     * Write 1 (only inserts)
     */
    String newCommitTime = client.startCommit();
    logger.info("Starting commit " + newCommitTime);

    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
    JavaRDD<HoodieRecord> writeRecords = jsc.<HoodieRecord>parallelize(records, 1);
    client.upsert(writeRecords, newCommitTime);

    /**
     * Write 2 (updates)
     */
    newCommitTime = client.startCommit();
    logger.info("Starting commit " + newCommitTime);
    records.addAll(dataGen.generateUpdates(newCommitTime, 100));
    writeRecords = jsc.<HoodieRecord>parallelize(records, 1);
    client.upsert(writeRecords, newCommitTime);

    /**
     * Schedule a compaction and also perform compaction on a MOR dataset
     */
    if (HoodieTableType.valueOf(tableType) == HoodieTableType.MERGE_ON_READ) {
        Optional<String> instant = client.scheduleCompaction(Optional.empty());
        JavaRDD<WriteStatus> writeStatues = client.compact(instant.get());
        client.commitCompaction(instant.get(), writeStatues, Optional.empty());
    }
}

From source file:PT1.java

License:Open Source License

static void testgenrawfile(String filename, int recordnum) throws IOException {
    Random r = new Random();
    FSDataOutputStream fos = fs.create(new Path(filename));
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < recordnum; i++) {
        fos.writeByte(i / 1000);//from   w ww.jav a2  s.co  m
        fos.writeShort(i / 1000);
        fos.writeInt(i / 1000);
        fos.writeLong(i / 1000);
        fos.writeFloat(i / 1000);
        fos.writeDouble(i / 1000);
        int strnum = r.nextInt(12) + 7;
        sb.setLength(0);
        for (int j = 0; j < strnum; j++) {
            sb.append((char) ('a' + j));
        }
        fos.writeUTF(sb.toString());
        if (i % 1000000 == 0) {
        }
    }
    fos.close();
}

From source file:PT1.java

License:Open Source License

static void testgenrawfiler(String filename, int recordnum) throws IOException {
    Random r = new Random();
    FSDataOutputStream fos = fs.create(new Path(filename));
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < recordnum; i++) {
        fos.writeByte(r.nextInt());/*ww w .  j av a  2 s .co m*/
        fos.writeShort(r.nextInt());
        fos.writeInt(r.nextInt());
        fos.writeLong(r.nextLong());
        fos.writeFloat(r.nextFloat());
        fos.writeDouble(r.nextDouble());
        int strnum = r.nextInt(12) + 7;
        sb.setLength(0);
        for (int j = 0; j < strnum; j++) {
            sb.append((char) ('a' + r.nextInt(26)));
        }
        fos.writeUTF(sb.toString());
        if (i % 1000000 == 0) {
        }
    }
    fos.close();

}

From source file:PT1.java

License:Open Source License

static void testwritefdf(String filename, String fromfile, boolean compress) throws Exception {
    FSDataInputStream fis = fs.open(new Path(fromfile));
    FormatDataFile fdf = createfdf(filename, compress);
    for (int i = 0; i < 10000 * 10000; i++) {
        Record record = readrecord(fis);
        if (record == null) {
            break;
        }/* w ww. java2s.co  m*/
        fdf.addRecord(record);
        if (i % 1000000 == 0) {
        }
    }
    fdf.close();
    fis.close();
}

From source file:PT1.java

License:Open Source License

static void testwritecolumn(String filename, String fromfile, boolean compress) throws Exception {

    FormatDataFile[] fdfs = createfdfs(filename, compress);
    FSDataInputStream fis = fs.open(new Path(fromfile));
    for (int i = 0; i < 10000 * 10000; i++) {

        Record record = readrecord(fis);
        if (record == null) {
            break;
        }//from  w w  w . j  ava2  s  .c  om
        ArrayList<FieldValue> fvs = record.fieldValues();
        for (int j = 0; j < fvs.size(); j++) {
            Record re = new Record(1);
            re.addValue(fvs.get(j));
            fdfs[j].addRecord(re);
        }
        if (i % 1000000 == 0) {
        }

    }
    for (int i = 0; i < fdfs.length; i++) {
        fdfs[i].close();
    }
    fis.close();

}

From source file:PT1.java

License:Open Source License

static void testreadcolumnseq(String filename, int num, boolean compress, String mode) throws Exception {

    Path path = new Path(filename);
    ArrayList<Short> vector = new ArrayList<Short>(10);

    if (mode == null || mode.equals("-1")) {
        for (short i = 0; i < 7; i++) {
            vector.add(i);/*from   w w  w.j  a v  a  2 s. c o m*/
        }
    } else if (mode.equals("half")) {
        short x = 0;
        vector.add(x);
        x = 1;
        vector.add(x);
        x = 2;
        vector.add(x);
        x = 3;
        vector.add(x);
    } else {
        vector.add(Short.valueOf(mode));
    }

    Configuration conf = new Configuration();
    ColumnStorageClient client = new ColumnStorageClient(path, vector, conf);

    if (compress) {
        for (int i = 0; i < num; i++) {
            client.getNextRecord();
        }

    } else {

        for (int i = 0; i < num; i++) {
            client.getRecordByLine(i);
        }
    }

    client.close();

}