Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri) 

Source Link

Document

Construct a path from a URI

Usage

From source file:PT1.java

License:Open Source License

static void testreadcolumnrand(String filename, int num, int size, String mode) throws Exception {
    Path path = new Path(filename);
    ArrayList<Short> vector = new ArrayList<Short>();

    if (mode == null || mode.equals("-1")) {
        for (short i = 0; i < 7; i++) {
            vector.add(i);/*  ww  w.j a va 2  s. c  om*/
        }
    } else if (mode.equals("half")) {
        short x = 0;
        vector.add(x);
        x = 1;
        vector.add(x);
        x = 2;
        vector.add(x);
        x = 3;
        vector.add(x);
    } else {
        vector.add(Short.valueOf(mode));
    }

    Configuration conf = new Configuration();
    ColumnStorageClient client = new ColumnStorageClient(path, vector, conf);
    Random r = new Random();
    for (int i = 0; i < num; i++) {
        client.getRecordByLine(r.nextInt(size));
        if (i % 1000000 == 0) {
        }
    }
    client.close();

}

From source file:BP.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 10) {
        for (int i = 0; i < args.length; i++) {
            System.out.println("Args: " + i + " " + args[i]);
        }//from w  ww  . j ava2s. c o m
        System.out.println(args.length);
        return printUsage();
    }

    edge_path = new Path(args[0]);
    prior_path = new Path(args[1]);
    output_path = new Path(args[2]);

    number_msg = Long.parseLong(args[3]);
    nreducer = Integer.parseInt(args[4]);
    nreducer = 1;
    max_iter = Integer.parseInt(args[5]);

    nstate = Integer.parseInt(args[7]);
    edge_potential_str = read_edge_potential(args[8]);

    int cur_iter = 1;
    if (args[9].startsWith("new") == false) {
        cur_iter = Integer.parseInt(args[9].substring(4));
    }

    System.out.println("edge_path=" + edge_path.toString() + ", prior_path=" + prior_path.toString()
            + ", output_path=" + output_path.toString() + ", |E|=" + number_msg + ", nreducer=" + nreducer
            + ", maxiter=" + max_iter + ", nstate=" + nstate + ", edge_potential_str=" + edge_potential_str
            + ", cur_iter=" + cur_iter);

    fs = FileSystem.get(getConf());

    // Run Stage1 and Stage2.
    if (cur_iter == 1) {
        System.out.println("BP: Initializing messages...");
        JobClient.runJob(configInitMessage());
    }

    double converge_threshold = number_msg * EPS * nstate;

    int i;
    for (i = cur_iter; i <= max_iter; i++) {
        System.out.println("   *** ITERATION " + (i) + "/" + max_iter + " ***");
        JobClient.runJob(configUpdateMessage());
        JobClient.runJob(configCheckErr());
        JobClient.runJob(configSumErr());
        String line = readLocaldirOneline(sum_error_path.toString());
        fs.delete(check_error_path, true);
        fs.delete(sum_error_path, true);
        String[] parts = line.split("\t");
        int n = Integer.parseInt(parts[0]);
        double sum = Double.parseDouble(parts[1]);
        System.out.println("Converged Msg: " + (number_msg - n));
        System.out.println("Sum Error: " + sum);
        if (sum < converge_threshold) {
            break;
        }

        // rotate directory
        fs.delete(message_cur_path);
        fs.rename(message_next_path, message_cur_path);
    }
    System.out.println("CONVERGE_ITER " + i);
    System.out.println("BP: Computing beliefs...");
    JobClient.runJob(configComputeBelief());

    System.out.println("BP finished. The belief vector is in the HDFS " + args[2]);

    return 0;
}

From source file:StressTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // conf.addResource(new Path("../../env_vars"));

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TweetCategorizer <in> <out>");
        System.exit(2);// w  w  w  .ja v  a2 s  . c o m
    }

    Job job = new Job(conf, "categorize tweets");
    job.setJarByClass(TweetCategorizer.class);
    job.setMapperClass(TokenizerMapper.class);
    // job.setCombinerClass(IntSumReducer.class);
    // job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:DisplayFuzzyKMeans.java

License:Apache License

public static void main(String[] args) throws Exception {
    DistanceMeasure measure = new ManhattanDistanceMeasure();

    Path samples = new Path("samples");
    Path output = new Path("output");
    Configuration conf = new Configuration();
    HadoopUtil.delete(conf, output);/*ww  w. j a va  2 s . c  o  m*/
    HadoopUtil.delete(conf, samples);
    RandomUtils.useTestSeed();
    DisplayClustering.generateSamples();
    writeSampleData(samples);
    boolean runClusterer = true;
    int maxIterations = 10;
    float threshold = 0.001F;
    float m = 1.1F;
    if (runClusterer) {
        runSequentialFuzzyKClusterer(conf, samples, output, measure, maxIterations, m, threshold);
    } else {
        int numClusters = 3;
        runSequentialFuzzyKClassifier(conf, samples, output, measure, numClusters, maxIterations, m, threshold);
    }
    new DisplayFuzzyKMeans();
}

From source file:FriendsJob.java

License:Apache License

/**
 * @param args/*from w w  w  .ja v a2s  .  co m*/
 */
public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(FriendsJob.class);
    conf.setJobName("anagramcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(FriendsMapper.class);
    // conf.setCombinerClass(AnagramReducer.class);
    conf.setReducerClass(FriendsReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);

}

From source file:HoodieJavaStreamingApp.java

License:Apache License

/**
 *
 * @throws Exception//from  w  w w  .j a v a  2 s. c o m
 */
public void run() throws Exception {
    // Spark session setup..
    SparkSession spark = SparkSession.builder().appName("Hoodie Spark Streaming APP")
            .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").master("local[1]")
            .getOrCreate();
    JavaSparkContext jssc = new JavaSparkContext(spark.sparkContext());

    // folder path clean up and creation, preparing the environment
    FileSystem fs = FileSystem.get(jssc.hadoopConfiguration());
    fs.delete(new Path(streamingSourcePath), true);
    fs.delete(new Path(streamingCheckpointingPath), true);
    fs.delete(new Path(tablePath), true);
    fs.mkdirs(new Path(streamingSourcePath));

    // Generator of some records to be loaded in.
    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();

    List<String> records1 = DataSourceTestUtils.convertToStringList(dataGen.generateInserts("001", 100));
    Dataset<Row> inputDF1 = spark.read().json(jssc.parallelize(records1, 2));

    List<String> records2 = DataSourceTestUtils.convertToStringList(dataGen.generateUpdates("002", 100));

    Dataset<Row> inputDF2 = spark.read().json(jssc.parallelize(records2, 2));

    // setup the input for streaming
    Dataset<Row> streamingInput = spark.readStream().schema(inputDF1.schema()).json(streamingSourcePath);

    // start streaming and showing
    ExecutorService executor = Executors.newFixedThreadPool(2);

    // thread for spark strucutured streaming
    Future<Void> streamFuture = executor.submit(new Callable<Void>() {
        public Void call() throws Exception {
            logger.info("===== Streaming Starting =====");
            stream(streamingInput);
            logger.info("===== Streaming Ends =====");
            return null;
        }
    });

    // thread for adding data to the streaming source and showing results over time
    Future<Void> showFuture = executor.submit(new Callable<Void>() {
        public Void call() throws Exception {
            logger.info("===== Showing Starting =====");
            show(spark, fs, inputDF1, inputDF2);
            logger.info("===== Showing Ends =====");
            return null;
        }
    });

    // let the threads run
    streamFuture.get();
    showFuture.get();

    executor.shutdown();
}

From source file:PerTaskTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from  w  ww . j  av a  2  s  .com*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(PerMapTally.class);
    job.setMapperClass(TokenizerMapper.class);
    // Aniket changes starts
    /* Here the partitioner is being called*/
    job.setPartitionerClass(WordPartitioner.class);
    // Aniket changes ends
    // Part 4 Aniket changes starts
    /* Here I am just disabling the combiner */
    // job.setCombinerClass(IntSumReducer.class);
    // Part 4 Aniket changes ends
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:AnalyzeBigramCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length != 1) {
        System.out.println("usage: [input-path]");
        System.exit(-1);/*from   w w  w  . j a v a 2 s.c  o  m*/
    }

    System.out.println("input path: " + args[0]);
    List<PairOfWritables<Text, IntWritable>> bigrams = SequenceFileUtils.readDirectory(new Path(args[0]));

    Collections.sort(bigrams, new Comparator<PairOfWritables<Text, IntWritable>>() {
        public int compare(PairOfWritables<Text, IntWritable> e1, PairOfWritables<Text, IntWritable> e2) {
            if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) {
                return e1.getLeftElement().compareTo(e2.getLeftElement());
            }

            return e2.getRightElement().compareTo(e1.getRightElement());
        }
    });

    int singletons = 0;
    int sum = 0;
    for (PairOfWritables<Text, IntWritable> bigram : bigrams) {
        sum += bigram.getRightElement().get();

        if (bigram.getRightElement().get() == 1) {
            singletons++;
        }
    }

    System.out.println("total number of unique bigrams: " + bigrams.size());
    System.out.println("total number of bigrams: " + sum);
    System.out.println("number of bigrams that appear only once: " + singletons);

    System.out.println("\nten most frequent bigrams: ");

    int cnt = 0;
    for (PairOfWritables<Text, IntWritable> bigram : bigrams) {
        System.out.println(bigram.getLeftElement() + "\t" + bigram.getRightElement());
        cnt++;

        if (cnt > 10) {
            break;
        }
    }
}

From source file:WordCount_PerTaskTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/* w w  w  .  j ava  2  s  .  co  m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_PerTaskTally.class);
    job.setMapperClass(TokenizerMapper.class);

    // disable combiner

    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartitioner.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ComputeCooccurrenceMatrixStripes.java

License:Apache License

/**
 * Runs this tool.//  w  w w.ja va2  s.  com
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;
    int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2;

    LOG.info("Tool: " + ComputeCooccurrenceMatrixStripes.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - window: " + window);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(ComputeCooccurrenceMatrixStripes.class.getSimpleName());
    job.setJarByClass(ComputeCooccurrenceMatrixStripes.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.getConfiguration().setInt("window", window);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setOutputValueClass(String2IntOpenHashMapWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(String2IntOpenHashMapWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}