Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri) 

Source Link

Document

Construct a path from a URI

Usage

From source file:HDFSRandomAccessFile.java

License:Apache License

public HDFSRandomAccessFile(String fileSystemURI, String location, int bufferSize) throws IOException {
    super(bufferSize);
    fsURI = URI.create(fileSystemURI);
    filePath = new Path(location);
    this.location = location;
    if (debugLeaks) {
        openFiles.add(location);// ww  w  .ja v a2  s. c  om
    }

    FileSystem fs = FileSystem.get(fsURI, new Configuration());
    hfile = fs.open(filePath);

    fileStatus = fs.getFileStatus(filePath);
}

From source file:LookupPostingsCompressed1.java

License:Apache License

@SuppressWarnings({ "static-access" })
public static void main(String[] args) throws IOException {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(//from w  w  w.  j ava2  s  . c o  m
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostingsCompressed1.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<VIntWritable, ArrayListWritable<PairOfVInts>> value = new PairOfWritables<VIntWritable, ArrayListWritable<PairOfVInts>>();

    System.out.println("Looking up postings for the term \"starcross'd\"");
    key.set("starcross'd");

    reader.get(key, value);

    ArrayListWritable<PairOfVInts> postings = value.getRightElement();
    for (PairOfVInts pair : postings) {
        System.out.println(pair);
        collection.seek(pair.getLeftElement());
        System.out.println(d.readLine());
    }

    key.set("gold");
    reader.get(key, value);
    System.out.println("Complete postings list for 'gold': " + value);

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfVInts pair : postings) {
        goldHist.increment(pair.getRightElement());
    }

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    key.set("silver");
    reader.get(key, value);
    System.out.println("Complete postings list for 'silver': " + value);

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfVInts pair : postings) {
        silverHist.increment(pair.getRightElement());
    }

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : silverHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    key.set("bronze");
    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");
    }

    collection.close();
    reader.close();
}

From source file:LoadClue.java

License:Apache License

/**
 * Job configuration.// w w w.  j a v a  2 s. co m
 */
public static Job configureJob(Configuration conf, String[] args) throws IOException {
    Path inputPath = new Path(args[0]);
    String tableName = args[1];
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(Uploader.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(ClueWebInputFormat.class);
    job.setMapperClass(Uploader.class);
    LoadClue.setTableName(tableName);

    // No reducers.  Just write straight to table.  Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    TableMapReduceUtil.addDependencyJars(conf, TableOutputFormat.class);
    job.setNumReduceTasks(0);
    return job;
}

From source file:Text2FormatStorageMR.java

License:Open Source License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.println("FormatFileMR <input> <output>");
        System.exit(-1);//from  w w  w  . j  a va 2s .c o m
    }

    JobConf conf = new JobConf(FormatStorageMR.class);

    conf.setJobName("Text2FormatMR");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(4);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Unit.Record.class);

    conf.setMapperClass(TextFileTestMapper.class);
    conf.setReducerClass(FormatFileTestReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(FormatStorageOutputFormat.class);
    conf.set("mapred.output.compress", "flase");

    Head head = new Head();
    initHead(head);

    head.toJobConf(conf);

    FileInputFormat.setInputPaths(conf, args[0]);
    Path outputPath = new Path(args[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);

    FileSystem fs = outputPath.getFileSystem(conf);
    fs.delete(outputPath, true);

    JobClient jc = new JobClient(conf);
    RunningJob rj = null;
    rj = jc.submitJob(conf);

    String lastReport = "";
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
    long reportTime = System.currentTimeMillis();
    long maxReportInterval = 3 * 1000;
    while (!rj.isComplete()) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        }

        int mapProgress = Math.round(rj.mapProgress() * 100);
        int reduceProgress = Math.round(rj.reduceProgress() * 100);

        String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

        if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

            String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
            System.out.println(output);
            lastReport = report;
            reportTime = System.currentTimeMillis();
        }
    }

    System.exit(0);

}

From source file:WriteHdfs.java

License:Open Source License

static void writehdfs(String filename, long length) throws IOException {
    FileSystem fs = FileSystem.get(new Configuration());
    FSDataOutputStream fos = fs.create(new Path(filename), true, 1024 * 1024, null);
    for (int i = 0; i < length / data.length; i++) {
        fos.write(data);//from   w  ww.j av a2s  .c  om
    }
    fos.close();
}

From source file:AnalyzeBigramRelativeFrequency.java

License:Apache License

public static void main(String[] args) {
    if (args.length != 1) {
        System.out.println("usage: [input-path]");
        System.exit(-1);//from w  w  w  .ja v  a 2  s  . c o  m
    }

    System.out.println("input path: " + args[0]);

    List<PairOfWritables<PairOfStrings, FloatWritable>> pairs = SequenceFileUtils
            .readDirectory(new Path(args[0]));

    List<PairOfWritables<PairOfStrings, FloatWritable>> list1 = Lists.newArrayList();
    List<PairOfWritables<PairOfStrings, FloatWritable>> list2 = Lists.newArrayList();

    for (PairOfWritables<PairOfStrings, FloatWritable> p : pairs) {
        PairOfStrings bigram = p.getLeftElement();

        if (bigram.getLeftElement().equals("light")) {
            list1.add(p);
        }
        if (bigram.getLeftElement().equals("contain")) {
            list2.add(p);
        }
    }

    Collections.sort(list1, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() {
        public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1,
                PairOfWritables<PairOfStrings, FloatWritable> e2) {
            if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) {
                return e1.getLeftElement().compareTo(e2.getLeftElement());
            }

            return e2.getRightElement().compareTo(e1.getRightElement());
        }
    });

    int i = 0;
    for (PairOfWritables<PairOfStrings, FloatWritable> p : list1) {
        PairOfStrings bigram = p.getLeftElement();
        System.out.println(bigram + "\t" + p.getRightElement());
        i++;

        if (i > 10) {
            break;
        }
    }

    Collections.sort(list2, new Comparator<PairOfWritables<PairOfStrings, FloatWritable>>() {
        public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1,
                PairOfWritables<PairOfStrings, FloatWritable> e2) {
            if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) {
                return e1.getLeftElement().compareTo(e2.getLeftElement());
            }

            return e2.getRightElement().compareTo(e1.getRightElement());
        }
    });

    i = 0;
    for (PairOfWritables<PairOfStrings, FloatWritable> p : list2) {
        PairOfStrings bigram = p.getLeftElement();
        System.out.println(bigram + "\t" + p.getRightElement());
        i++;

        if (i > 10) {
            break;
        }
    }
}

From source file:BigramRelativeFrequency.java

License:Apache License

/**
 * Runs this tool.//from w w w.  j a  v a 2  s  . co m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + BigramRelativeFrequency.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(BigramRelativeFrequency.class.getSimpleName());
    job.setJarByClass(BigramRelativeFrequency.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(FloatWritable.class);
    //job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:TestFormatStorageInputFormat.java

License:Open Source License

public static void main(String[] argv) throws IOException, SerDeException {
    try {//from w  ww .  j  a  v a  2  s . co m
        if (argv.length != 2) {
            System.out.println("TestFormatStorageInputFormat <input> <output>");
            System.exit(-1);
        }

        JobConf conf = new JobConf(TestFormatStorageInputFormat.class);

        conf.setJobName("TestFormatStorageInputFormat");

        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(1);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Unit.Record.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(FormatStorageOutputFormat.class);
        conf.set("mapred.output.compress", "flase");

        conf.set("mapred.input.dir", argv[0]);

        Head head = new Head();
        initHead(head);

        head.toJobConf(conf);

        FormatStorageSerDe serDe = initSerDe(conf);
        StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();

        FileInputFormat.setInputPaths(conf, argv[0]);
        Path outputPath = new Path(argv[1]);
        FileOutputFormat.setOutputPath(conf, outputPath);

        InputFormat inputFormat = new FormatStorageInputFormat();
        InputSplit[] inputSplits = inputFormat.getSplits(conf, 1);
        if (inputSplits.length == 0) {
            System.out.println("inputSplits is empty");
            return;
        } else {
            System.out.println("get Splits:" + inputSplits.length);
        }

        int size = inputSplits.length;
        System.out.println("getSplits return size:" + size);
        for (int i = 0; i < size; i++) {
            FormatStorageSplit split = (FormatStorageSplit) inputSplits[i];
            System.out.printf("split:" + i + "offset:" + split.getStart() + "len:" + split.getLength() + "path:"
                    + conf.get(ConstVar.InputPath) + "beginLine:" + split.getBeginLine() + "endLine:"
                    + split.getEndLine() + "\n");
        }

        {
            int totalDelay = 0;
            RecordReader<WritableComparable, Writable> currRecReader = null;
            for (int i = 0; i < inputSplits.length; i++) {
                currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);

                WritableComparable key;
                Writable value;

                key = currRecReader.createKey();
                value = currRecReader.createValue();

                long begin = System.currentTimeMillis();
                int count = 0;
                while (currRecReader.next(key, value)) {
                    Record record = (Record) value;

                    Object row = serDe.deserialize(record);
                    count++;
                }
                long end = System.currentTimeMillis();

                long delay = (end - begin) / 1000;
                totalDelay += delay;
                System.out.println(count + " record read over, delay " + delay + " s");
            }

            System.out.println("total delay:" + totalDelay);
        }
    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("get exception:" + e.getMessage());
    }
}

From source file:CategoriesInvertedIndex.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Inverted Index");
    job.setJarByClass(CategoriesInvertedIndex.class);
    job.setMapperClass(CategoriesMapper.class);
    job.setReducerClass(CategoriesReducer.class);
    job.setCombinerClass(CategoriesReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ComRoughSetApproInputSampler.java

License:Apache License

/**
 * Write a partition file for the given job, using the Sampler provided.
 * Queries the sampler for a sample keyset, sorts by the output key
 * comparator, selects the keys for each rank, and writes to the destination
 * returned from {@link TotalOrderPartitioner#getPartitionFile}.
 *///from  ww  w .  j  a v a 2  s  .c  o  m
@SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = job.getConfiguration();
    final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[]) sampler.getSample(inf, job);
    LOG.info("Using " + samples.length + " samples");
    RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
    FileSystem fs = dst.getFileSystem(conf);
    if (fs.exists(dst)) {
        fs.delete(dst, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(),
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
            ++k;
        }
        writer.append(samples[k], nullValue);
        last = k;
    }
    writer.close();
}