Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri) 

Source Link

Document

Construct a path from a URI

Usage

From source file:acromusashi.stream.bolt.hdfs.HdfsStoreBolt.java

License:Open Source License

/**
 * {@inheritDoc}/*from  w  ww  . ja v  a  2 s .co m*/
 */
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
    super.prepare(stormConf, context, collector);

    String componentId = context.getThisComponentId();
    int taskId = context.getThisTaskId();

    HdfsStoreConfig config = new HdfsStoreConfig();

    config.setOutputUri((String) stormConf.get("hdfsstorebolt.outputuri"));
    config.setFileNameHeader((String) stormConf.get("hdfsstorebolt.filenameheader"));
    config.setFileSwitchIntarval(((Long) stormConf.get("hdfsstorebolt.interval")).intValue());
    config.setFileNameBody("_" + componentId + "_" + taskId + "_");

    boolean isPreprocess = true;
    Object isPreprocessObj = stormConf.get("hdfsstorebolt.executepreprocess");
    if (isPreprocessObj != null && isPreprocessObj instanceof Boolean) {
        isPreprocess = ((Boolean) isPreprocessObj).booleanValue();
    }

    try {
        // HDFS?
        Configuration conf = new Configuration();
        Path dstPath = new Path(config.getOutputUri());
        FileSystem fileSystem = dstPath.getFileSystem(conf);

        // HDFS???????
        if (isPreprocess) {
            HdfsPreProcessor.execute(fileSystem, config.getOutputUri(),
                    config.getFileNameHeader() + config.getFileNameBody(), config.getTmpFileSuffix());
        }

        this.delegate = new HdfsOutputSwitcher();
        this.delegate.initialize(fileSystem, config, System.currentTimeMillis());
    } catch (Exception ex) {
        logger.warn("Failed to HDFS write initialize.", ex);
        throw new InitFailException(ex);
    }
}

From source file:acromusashi.stream.bolt.hdfs.HdfsStreamWriter.java

License:Open Source License

/**
 * ??HDFS??Open?/*from w w  w  .jav  a2s . c o  m*/
 * 
 * @param filePath HDFS
 * @param fs 
 * @param isFileSyncEachTime ?????????
 * @throws IOException Open
 */
public void open(String filePath, FileSystem fs, boolean isFileSyncEachTime) throws IOException {
    Path dstPath = new Path(filePath);

    if (fs.exists(dstPath) == true) {
        this.delegateStream = fs.append(dstPath);
    } else {
        this.delegateStream = fs.create(dstPath);
    }

    this.isFileSyncEachTime = isFileSyncEachTime;
}

From source file:adept.mapreduce.MapReduce.java

License:Apache License

public JobConf getConfiguration(String inputPath, String outputPath, String mapClass) throws Exception {
    //Configuration conf = getConf();
    Class thisclass = getClass();
    JobConf job = new JobConf(new Configuration(), thisclass);

    try {//  w w w . j  a  va  2  s . c o m
        Path in = new Path(inputPath);

        Path out = new Path(outputPath);
        FileInputFormat.setInputPaths(job, in);
        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("Algorithm Map-Reduce");
        job.setMapperClass((Class<? extends Mapper>) Class.forName(mapClass));

    } catch (Exception e) {
        throw new RuntimeException("Exception occurred: " + e.getMessage());
    }

    job.setReducerClass(AdeptReducer.class);
    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.set("key.value.separator.in.input.line", "\t");

    return job;
}

From source file:adept.mapreduce.MapReduceExample.java

License:Apache License

public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    //Configuration conf = new Configuration();

    JobConf job = new JobConf(conf, MapReduceExample.class);

    Path in = new Path(args[0]);

    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);

    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("MapReduecExample");

    job.setMapperClass(MapClass.class);

    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);

    job.setOutputFormat(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(Text.class);

    job.set("key.value.separator.in.input.line", ",");

    JobClient.runJob(job);/*  www.ja v  a 2 s . c o m*/

    return 0;

}

From source file:adts.ContainingArticle.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "ContainingArticle");
    job.setJarByClass(ContainingArticle.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path queriesInputPath = new Path(args[0]);
    Path articlesInputPath = new Path(args[1]);
    MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, QueriesMap.class);
    MultipleInputs.addInputPath(job, articlesInputPath, TextInputFormat.class, ArticlesMap.class);

    FileOutputFormat.setOutputPath(job, new Path("/root/temporary"));
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.waitForCompletion(true);//from   w w w  . j  a v a  2s .c  om

    Job collectingJob = new Job(conf, "ContainingArticle");
    collectingJob.setJarByClass(ContainingArticle.class);

    collectingJob.setOutputKeyClass(IntWritable.class);
    collectingJob.setOutputValueClass(Text.class);

    collectingJob.setMapperClass(CollectMap.class);
    collectingJob.setReducerClass(CollectReduce.class);

    collectingJob.setInputFormatClass(TextInputFormat.class);
    collectingJob.setOutputFormatClass(TextOutputFormat.class);
    collectingJob.setMapOutputKeyClass(Text.class);
    collectingJob.setMapOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(collectingJob, new Path("/root/temporary"));
    FileOutputFormat.setOutputPath(collectingJob, new Path("/root/temporary2"));

    collectingJob.waitForCompletion(true);

    Job countingJob = new Job(conf, "ContainingArticle");
    countingJob.setJarByClass(ContainingArticle.class);

    countingJob.setOutputKeyClass(Text.class);
    countingJob.setOutputValueClass(IntWritable.class);

    countingJob.setMapperClass(CountMap.class);
    countingJob.setReducerClass(CountReduce.class);

    countingJob.setInputFormatClass(TextInputFormat.class);
    countingJob.setOutputFormatClass(TextOutputFormat.class);
    countingJob.setMapOutputKeyClass(IntWritable.class);
    countingJob.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(countingJob, new Path("/root/temporary2"));
    FileOutputFormat.setOutputPath(countingJob, new Path(args[2]));

    countingJob.waitForCompletion(true);
}

From source file:adts.CreateCorrespondences.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration config = HBaseConfiguration.create();
    Job job = new Job(config, "CreateCorrespondences");
    job.setJarByClass(CreateCorrespondences.class);

    Scan scan = new Scan();
    scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
    scan.setCacheBlocks(false); // don't set to true for MR jobs

    TableMapReduceUtil.initTableMapperJob("content", // input HBase table name
            scan, // Scan instance to control CF and attribute selection
            Map.class, // mapper
            Text.class, // mapper output key
            Text.class, // mapper output value
            job);/* w w w  . j ava2s  .c  o m*/
    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(1);
    FileOutputFormat.setOutputPath(job, new Path(args[0]));

    boolean b = job.waitForCompletion(true);
    if (!b) {
        throw new IOException("error with job!");
    }
}

From source file:adts.HbaseClient.java

License:Open Source License

public static void main(String[] args) throws IOException {
    String[] keys = new String[5];
    int keywords_counter = 0;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path inFile = new Path(args[0]);
    if (!fs.exists(inFile))
        System.out.println("Input file not found");
    if (!fs.isFile(inFile))
        System.out.println("Input should be a file");
    else {/*from  w  w w  .j  a v a2  s  .  c  om*/
        FSDataInputStream fsDataInputStream = fs.open(inFile);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fsDataInputStream));
        String line;
        while (((line = bufferedReader.readLine()) != null) && (keywords_counter < 5)) {
            String[] array = line.split("\t");
            String keyword = array[0];
            System.out.println("Record :   " + keyword);
            keys[keywords_counter] = keyword;
            keywords_counter++;
        }
        bufferedReader.close();
        fs.close();

        Configuration config = HBaseConfiguration.create();
        HTable table = new HTable(config, "index");

        Random randomGenerator = new Random();
        for (int i = 0; i < 10; i++) {
            int randomInt = randomGenerator.nextInt(5);
            System.out.println("Random chosen keyword : " + keys[randomInt]);

            FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ALL);
            SingleColumnValueFilter filter_by_name = new SingleColumnValueFilter(Bytes.toBytes("keyword"),
                    Bytes.toBytes(""), CompareOp.EQUAL, Bytes.toBytes(keys[randomInt]));
            //filter_by_name.setFilterIfMissing(true);
            list.addFilter(filter_by_name);

            Scan scan = new Scan();
            scan.setFilter(list);
            //scan.addFamily(Bytes.toBytes("keyword"));
            ResultScanner scanner = table.getScanner(scan);
            try {

                for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
                    // print out the row we found and the columns we were looking for
                    byte[] cells = rr.getValue(Bytes.toBytes("article"), Bytes.toBytes(""));
                    System.out.println("Keyword " + keys[randomInt] + "belonging to article with md5 : "
                            + Bytes.toString(cells));
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                scanner.close();
            }

        }
        table.close();

    }

}

From source file:adts.PopularKeywords.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "PopularKeywords");
    job.setJarByClass(PopularKeywords.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path queriesInputPath = new Path(args[0]);
    Path StopWordsInputPath = new Path(args[1]);
    MultipleInputs.addInputPath(job, queriesInputPath, TextInputFormat.class, Map.class);
    MultipleInputs.addInputPath(job, StopWordsInputPath, TextInputFormat.class, StopwordsMap.class);

    FileOutputFormat.setOutputPath(job, new Path("/root/temporary"));

    job.waitForCompletion(true);//from   w w w.j a  v  a 2 s.c om

    Job sortingJob = new Job(conf, "PopularKeywords");
    sortingJob.setJarByClass(PopularKeywords.class);

    sortingJob.setOutputKeyClass(Text.class);
    sortingJob.setOutputValueClass(LongWritable.class);

    sortingJob.setMapperClass(ReverseMap.class);
    sortingJob.setReducerClass(ReverseReduce.class);

    sortingJob.setInputFormatClass(TextInputFormat.class);
    sortingJob.setOutputFormatClass(TextOutputFormat.class);
    sortingJob.setSortComparatorClass(LongWritable.DecreasingComparator.class);
    sortingJob.setMapOutputKeyClass(LongWritable.class);
    sortingJob.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(sortingJob, new Path("/root/temporary"));
    FileOutputFormat.setOutputPath(sortingJob, new Path(args[2]));

    sortingJob.setNumReduceTasks(1);
    sortingJob.waitForCompletion(true);
}

From source file:adts.PrepareInput.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "PrepareInput");
    job.setJarByClass(PrepareInput.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setNumReduceTasks(1);//from   ww  w  . ja  v a 2  s .  c o m
    job.waitForCompletion(true);
}

From source file:adts.SuccessfullQueries.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "SuccessfullQueries");
    job.setJarByClass(SuccessfullQueries.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);/* www . j a  va  2 s  . com*/
}