Example usage for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri)

Source Link

Document

Construct a path from a URI

Usage

From source file:ComRoughSetApproInputSampler.java

License:Apache License

/**
 * Driver for InputSampler from the command line.
 * Configures a JobConf instance and calls {@link #writePartitionFile}.
 *///w ww  .  ja va2  s  . c o m
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    ArrayList<String> otherArgs = new ArrayList<String>();
    Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                job.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-inFormat".equals(args[i])) {
                job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class));
            } else if ("-keyClass".equals(args[i])) {
                job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class));
            } else if ("-splitSample".equals(args[i])) {
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new SplitSampler<K, V>(numSamples, maxSplits);
            } else if ("-splitRandom".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else if ("-splitInterval".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new IntervalSampler<K, V>(pcnt, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    if (job.getNumReduceTasks() <= 1) {
        System.err.println("Sampler requires more than one reducer");
        return printUsage();
    }
    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }
    if (null == sampler) {
        sampler = new RandomSampler<K, V>(0.1, 10000, 10);
    }

    Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
    TotalOrderPartitioner.setPartitionFile(getConf(), outf);
    for (String s : otherArgs) {
        FileInputFormat.addInputPath(job, new Path(s));
    }
    ComRoughSetApproInputSampler.<K, V>writePartitionFile(job, sampler);

    return 0;
}

From source file:First.java

License:Apache License

public int run(String[] args) throws Exception {
    String outputReducerType = "filesystem";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];/*from  w w w .  java 2s  .  c o m*/
    }
    logger.info("output reducer type: " + outputReducerType);

    for (int i = 2000; i < 2012; i++) {
        String columnName = Integer.toString(i);
        getConf().set(CONF_COLUMN_NAME, columnName);

        Job job = new Job(getConf(), "app");
        job.setJarByClass(First.class);
        job.setMapperClass(TokenizerMapper.class);

        if (outputReducerType.equalsIgnoreCase("filesystem")) {
            job.setCombinerClass(ReducerToFilesystem.class);
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        } else {
            job.setReducerClass(ReducerToCassandra.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);

            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
        }

        job.setInputFormatClass(ColumnFamilyInputFormat.class);

        ConfigHelper.setRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
        SlicePredicate predicate = new SlicePredicate()
                .setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes())));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        job.waitForCompletion(true);
    }
    return 0;
}

From source file:MapReduce3.java

public static void main(String[] args) throws Exception {

    ///* w  w  w. j a  v  a 2 s .c o m*/
    String dst = "hdfs://localhost:9000/data/2006a.csv";

    //??
    //  String dstOut = "hdfs://localhost:9000/mapreduce/result3/1";
    String dstOut = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/1";
    String outFiles = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/2";
    Configuration hadoopConfig = new Configuration();

    hadoopConfig.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());

    hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    Job job = new Job(hadoopConfig);
    Job job2 = new Job(hadoopConfig);

    FileInputFormat.addInputPath(job, new Path(dst));
    FileOutputFormat.setOutputPath(job, new Path(dstOut));
    FileInputFormat.addInputPath(job2, new Path(dstOut));
    FileOutputFormat.setOutputPath(job2, new Path(outFiles));

    JobConf map1Conf = new JobConf(false);
    ChainMapper.addMapper(job, TempMapper.class, LongWritable.class, Text.class, CompositeKey_wd.class,
            IntWritable.class, map1Conf);
    JobConf reduceConf = new JobConf(false);
    ChainReducer.setReducer(job, TempReducer.class, CompositeKey_wd.class, IntWritable.class,
            CompositeKey_wd.class, IntWritable.class, reduceConf);

    JobConf map2Conf = new JobConf(false);
    ChainMapper.addMapper(job2, TempMapper2.class, LongWritable.class, Text.class, IntWritable.class,
            CompositeKey_wd.class, map2Conf);
    JobConf map3Conf = new JobConf(false);
    ChainReducer.setReducer(job2, TempReduce2.class, IntWritable.class, CompositeKey_wd.class, Text.class,
            IntWritable.class, map3Conf);
    //       
    //  JobClient.runJob(job);

    //MapperReducer?
    //        job.setMapperClass(TempMapper.class);
    //
    //        job.setReducerClass(TempReducer.class);

    //?KeyValue
    job.setOutputKeyClass(CompositeKey_wd.class);

    job.setOutputValueClass(IntWritable.class);

    job2.setMapOutputKeyClass(IntWritable.class);
    job2.setMapOutputValueClass(CompositeKey_wd.class);

    //  job2.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    //job?
    job.waitForCompletion(true);
    System.out.println("Finished1");
    job2.waitForCompletion(true);
    System.out.println("Finished2");

}

From source file:DistribCountingDriver.java

License:Apache License

public int run(String args[]) throws Exception {
    long job_start_time, job_end_time;
    long job_runtime;

    JobConf conf = new JobConf(getConf());

    int minFreqPercent = Integer.parseInt(args[0]);
    int datasetSize = Integer.parseInt(args[1]);
    conf.setInt("DISTRCOUNT.datasetSize", datasetSize);
    conf.setInt("DISTRCOUNT.minFreqPercent", minFreqPercent);

    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    conf.setInt("mapred.task.timeout", 60000000);

    conf.setJarByClass(DistribCountingDriver.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(DistribCountingMapper.class);
    conf.setCombinerClass(DistribCountingCombiner.class);
    conf.setReducerClass(DistribCountingReducer.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(conf, new Path(args[2]));
    FileOutputFormat.setOutputPath(conf, new Path(args[3]));

    job_start_time = System.currentTimeMillis();
    JobClient.runJob(conf);//  www  .  j  a v  a2 s .  c om
    job_end_time = System.currentTimeMillis();

    job_runtime = (job_end_time - job_start_time) / 1000;

    System.out.println("total job runtime (seconds): " + job_runtime);

    return 0;
}

From source file:Txt2SeqConverter.java

License:Apache License

public static void main(String[] args) {
    if (args.length != 2) {
        //System.out.println("Usage: env HADOOP_CLASSPATH=.:$HADOOP_CLASSPATH hadoop Txt2SeqConverter input output");
        System.out.println("Usage: hadoop Txt2SeqConverter input output");
        System.exit(1);//from  ww w  .j a v  a 2  s .c o m
    }
    FileSystem fs = null;
    String seqFileName = args[1];
    Configuration conf = new Configuration();
    try {
        fs = FileSystem.get(URI.create(seqFileName), conf);
    } catch (IOException e) {
        System.out.println("ERROR: " + e.getMessage());
    }

    Path path = new Path(seqFileName);

    LongWritable key = new LongWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        //writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class, SequenceFile.CompressionType.BLOCK);
        writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class,
                SequenceFile.CompressionType.BLOCK, new com.hadoop.compression.lzo.LzoCodec());
        BufferedReader br = new BufferedReader(new FileReader(args[0]));

        int transactionID = 0;
        String transaction = null;
        while ((transaction = br.readLine()) != null) {
            key.set(transactionID);
            value.set(transaction);
            writer.append(key, value);

            transactionID++;
        }
    } catch (IOException e) {
        System.out.println("ERROR: " + e.getMessage());
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:AnalyzeBigramRelativeFrequencyJson.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.out.println("usage: [input-path]");
        System.exit(-1);//  www.  ja v a2s. c om
    }

    System.out.println("input path: " + args[0]);

    List<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>> pairs = SequenceFileUtils
            .readDirectory(new Path(args[0]));

    List<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>> list1 = Lists.newArrayList();
    List<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>> list2 = Lists.newArrayList();

    for (PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> p : pairs) {
        BigramRelativeFrequencyJson.MyTuple bigram = p.getLeftElement();

        if (bigram.getJsonObject().get("Left").getAsString().equals("light")) {
            list1.add(p);
        }

        if (bigram.getJsonObject().get("Left").getAsString().equals("contain")) {
            list2.add(p);
        }
    }

    Collections.sort(list1,
            new Comparator<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>>() {
                public int compare(PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> e1,
                        PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> e2) {
                    if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) {
                        return e1.getLeftElement().compareTo(e2.getLeftElement());
                    }

                    return e2.getRightElement().compareTo(e1.getRightElement());
                }
            });

    int i = 0;
    for (PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> p : list1) {
        BigramRelativeFrequencyJson.MyTuple bigram = p.getLeftElement();
        System.out.println(bigram + "\t" + p.getRightElement());
        i++;

        if (i > 10) {
            break;
        }
    }

    Collections.sort(list2,
            new Comparator<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>>() {
                public int compare(PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> e1,
                        PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> e2) {
                    if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) {
                        return e1.getLeftElement().compareTo(e2.getLeftElement());
                    }

                    return e2.getRightElement().compareTo(e1.getRightElement());
                }
            });

    i = 0;
    for (PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> p : list2) {
        BigramRelativeFrequencyJson.MyTuple bigram = p.getLeftElement();
        System.out.println(bigram + "\t" + p.getRightElement());
        i++;

        if (i > 10) {
            break;
        }
    }
}

From source file:NgramMatrixBuilder.java

License:Apache License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the
 *                                         job tracker.
 *//*from   w ww . j ava2s.  c o m*/
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), NgramMatrixBuilder.class);
    conf.setJobName("ngrammatrixbuilder");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    TextInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:TestIndexMergeMR.java

License:Open Source License

public void testIndexMergeMR() throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    String indexdir = "indexdir";
    String indexdir1 = "indexdir1";
    int filenum = 10;
    int recnum = 1000;
    short idx = 0;
    TestUtil.genifdfindex(indexdir, filenum, recnum, idx, true);
    StringBuffer sb = new StringBuffer();
    FileStatus[] ss = fs.listStatus(new Path(indexdir));
    for (FileStatus fileStatus : ss) {
        sb.append(fileStatus.getPath().toString()).append(",");
    }/*w w  w.jav a 2s. co m*/
    IndexMergeMR.running(sb.substring(0, sb.length() - 1), indexdir1, conf);

    IFormatDataFile ifdf = new IFormatDataFile(conf);
    ifdf.open(indexdir1 + "/part-00000");
    for (int i = 0; i < 100; i++) {
        ifdf.next().show();
    }

    ifdf.close();

    fs.delete(new Path(indexdir), true);
    fs.delete(new Path(indexdir1), true);

}

From source file:TorrentWeb.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = this.getConf();

    Job job = Job.getInstance(conf, "Torrent Web");
    job.setJarByClass(TorrentWeb.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(TorrentWebExtracter.class);
    job.setReducerClass(TorrentWebReducer.class);
    job.setInputFormatClass(WarcInputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Execute job and return status
    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:TestColumnStorageOutputFormat.java

License:Open Source License

public static void main(String[] argv) throws IOException {
    try {/*from   w  w  w  .ja  v  a  2 s .c om*/
        if (argv.length != 2) {
            System.out.println("TestColumnStorageOutputFormat <output> <count>");
            System.exit(-1);
        }

        JobConf conf = new JobConf(TestColumnStorageOutputFormat.class);

        conf.setJobName("TestColumnStorageOutputFormat");

        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(1);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Unit.Record.class);

        conf.setOutputFormat(ColumnStorageOutputFormat.class);
        conf.set("mapred.output.compress", "flase");

        conf.set("mapred.output.dir", argv[0]);

        Head head = new Head();
        initHead(head);

        head.toJobConf(conf);

        Path outputPath = new Path(argv[0]);
        FileOutputFormat.setOutputPath(conf, outputPath);

        FileSystem fs = FileSystem.get(conf);
        MyColumnOutputFormat output = new MyColumnOutputFormat(head, conf, outputPath);

        long begin = System.currentTimeMillis();
        int count = Integer.valueOf(argv[1]);
        String string = "hello konten";
        for (int i = 0; i < count; i++) {
            Record record = new Record((short) 210);

            for (short j = 0; j < 30; j++) {
                record.addValue(new FieldValue((byte) 1, (short) (j * 7 + 0)));
                record.addValue(new FieldValue((short) 2, (short) (j * 7 + 1)));
                record.addValue(new FieldValue((int) 3, (short) (j * 7 + 2)));
                record.addValue(new FieldValue((long) 4, (short) (j * 7 + 3)));
                record.addValue(new FieldValue((float) 5.5, (short) (j * 7 + 4)));
                record.addValue(new FieldValue((double) 6.6, (short) (j * 7 + 5)));
                record.addValue(new FieldValue((double) 7.7, (short) (j * 7 + 6)));

            }
            output.doWrite(record);

            if (i % 100000 == 0) {
                long end = System.currentTimeMillis();
                System.out.println(i + "record write, delay:" + (end - begin) / 1000 + "s");
            }
        }

        long end = System.currentTimeMillis();
        System.out.println(count + "record write over, delay:" + (end - begin) / 1000 + "s");
    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("get exception:" + e.getMessage());
    }
}