Example usage for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri)

Source Link

Document

Construct a path from a URI

Usage

From source file:PT1.java

License:Open Source License

static void testreadcolumnrand(String filename, int num, int size, String mode) throws Exception {
    Path path = new Path(filename);
    ArrayList<Short> vector = new ArrayList<Short>();

    if (mode == null || mode.equals("-1")) {
        for (short i = 0; i < 7; i++) {
            vector.add(i);/*  ww  w.j a va 2  s. c  om*/
        }
    } else if (mode.equals("half")) {
        short x = 0;
        vector.add(x);
        x = 1;
        vector.add(x);
        x = 2;
        vector.add(x);
        x = 3;
        vector.add(x);
    } else {
        vector.add(Short.valueOf(mode));
    }

    Configuration conf = new Configuration();
    ColumnStorageClient client = new ColumnStorageClient(path, vector, conf);
    Random r = new Random();
    for (int i = 0; i < num; i++) {
        client.getRecordByLine(r.nextInt(size));
        if (i % 1000000 == 0) {
        }
    }
    client.close();

}

From source file:BP.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 10) {
        for (int i = 0; i < args.length; i++) {
            System.out.println("Args: " + i + " " + args[i]);
        }//from w  ww  . j ava2s. c o m
        System.out.println(args.length);
        return printUsage();
    }

    edge_path = new Path(args[0]);
    prior_path = new Path(args[1]);
    output_path = new Path(args[2]);

    number_msg = Long.parseLong(args[3]);
    nreducer = Integer.parseInt(args[4]);
    nreducer = 1;
    max_iter = Integer.parseInt(args[5]);

    nstate = Integer.parseInt(args[7]);
    edge_potential_str = read_edge_potential(args[8]);

    int cur_iter = 1;
    if (args[9].startsWith("new") == false) {
        cur_iter = Integer.parseInt(args[9].substring(4));
    }

    System.out.println("edge_path=" + edge_path.toString() + ", prior_path=" + prior_path.toString()
            + ", output_path=" + output_path.toString() + ", |E|=" + number_msg + ", nreducer=" + nreducer
            + ", maxiter=" + max_iter + ", nstate=" + nstate + ", edge_potential_str=" + edge_potential_str
            + ", cur_iter=" + cur_iter);

    fs = FileSystem.get(getConf());

    // Run Stage1 and Stage2.
    if (cur_iter == 1) {
        System.out.println("BP: Initializing messages...");
        JobClient.runJob(configInitMessage());
    }

    double converge_threshold = number_msg * EPS * nstate;

    int i;
    for (i = cur_iter; i <= max_iter; i++) {
        System.out.println("   *** ITERATION " + (i) + "/" + max_iter + " ***");
        JobClient.runJob(configUpdateMessage());
        JobClient.runJob(configCheckErr());
        JobClient.runJob(configSumErr());
        String line = readLocaldirOneline(sum_error_path.toString());
        fs.delete(check_error_path, true);
        fs.delete(sum_error_path, true);
        String[] parts = line.split("\t");
        int n = Integer.parseInt(parts[0]);
        double sum = Double.parseDouble(parts[1]);
        System.out.println("Converged Msg: " + (number_msg - n));
        System.out.println("Sum Error: " + sum);
        if (sum < converge_threshold) {
            break;
        }

        // rotate directory
        fs.delete(message_cur_path);
        fs.rename(message_next_path, message_cur_path);
    }
    System.out.println("CONVERGE_ITER " + i);
    System.out.println("BP: Computing beliefs...");
    JobClient.runJob(configComputeBelief());

    System.out.println("BP finished. The belief vector is in the HDFS " + args[2]);

    return 0;
}

From source file:StressTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // conf.addResource(new Path("../../env_vars"));

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TweetCategorizer <in> <out>");
        System.exit(2);// w  w  w  .ja v  a2 s  . c o m
    }

    Job job = new Job(conf, "categorize tweets");
    job.setJarByClass(TweetCategorizer.class);
    job.setMapperClass(TokenizerMapper.class);
    // job.setCombinerClass(IntSumReducer.class);
    // job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:DisplayFuzzyKMeans.java

License:Apache License

public static void main(String[] args) throws Exception {
    DistanceMeasure measure = new ManhattanDistanceMeasure();

    Path samples = new Path("samples");
    Path output = new Path("output");
    Configuration conf = new Configuration();
    HadoopUtil.delete(conf, output);/*ww  w. j a va  2 s . c  o  m*/
    HadoopUtil.delete(conf, samples);
    RandomUtils.useTestSeed();
    DisplayClustering.generateSamples();
    writeSampleData(samples);
    boolean runClusterer = true;
    int maxIterations = 10;
    float threshold = 0.001F;
    float m = 1.1F;
    if (runClusterer) {
        runSequentialFuzzyKClusterer(conf, samples, output, measure, maxIterations, m, threshold);
    } else {
        int numClusters = 3;
        runSequentialFuzzyKClassifier(conf, samples, output, measure, numClusters, maxIterations, m, threshold);
    }
    new DisplayFuzzyKMeans();
}

From source file:FriendsJob.java

License:Apache License

/**
 * @param args/*from w w  w  .ja v a2s  .  co m*/
 */
public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(FriendsJob.class);
    conf.setJobName("anagramcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(FriendsMapper.class);
    // conf.setCombinerClass(AnagramReducer.class);
    conf.setReducerClass(FriendsReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);

}

From source file:HoodieJavaStreamingApp.java

License:Apache License

/**
 *
 * @throws Exception//from  w  w w  .j a v a  2 s. c o m
 */
public void run() throws Exception {
    // Spark session setup..
    SparkSession spark = SparkSession.builder().appName("Hoodie Spark Streaming APP")
            .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").master("local[1]")
            .getOrCreate();
    JavaSparkContext jssc = new JavaSparkContext(spark.sparkContext());

    // folder path clean up and creation, preparing the environment
    FileSystem fs = FileSystem.get(jssc.hadoopConfiguration());
    fs.delete(new Path(streamingSourcePath), true);
    fs.delete(new Path(streamingCheckpointingPath), true);
    fs.delete(new Path(tablePath), true);
    fs.mkdirs(new Path(streamingSourcePath));

    // Generator of some records to be loaded in.
    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();

    List<String> records1 = DataSourceTestUtils.convertToStringList(dataGen.generateInserts("001", 100));
    Dataset<Row> inputDF1 = spark.read().json(jssc.parallelize(records1, 2));

    List<String> records2 = DataSourceTestUtils.convertToStringList(dataGen.generateUpdates("002", 100));

    Dataset<Row> inputDF2 = spark.read().json(jssc.parallelize(records2, 2));

    // setup the input for streaming
    Dataset<Row> streamingInput = spark.readStream().schema(inputDF1.schema()).json(streamingSourcePath);

    // start streaming and showing
    ExecutorService executor = Executors.newFixedThreadPool(2);

    // thread for spark strucutured streaming
    Future<Void> streamFuture = executor.submit(new Callable<Void>() {
        public Void call() throws Exception {
            logger.info("===== Streaming Starting =====");
            stream(streamingInput);
            logger.info("===== Streaming Ends =====");
            return null;
        }
    });

    // thread for adding data to the streaming source and showing results over time
    Future<Void> showFuture = executor.submit(new Callable<Void>() {
        public Void call() throws Exception {
            logger.info("===== Showing Starting =====");
            show(spark, fs, inputDF1, inputDF2);
            logger.info("===== Showing Ends =====");
            return null;
        }
    });

    // let the threads run
    streamFuture.get();
    showFuture.get();

    executor.shutdown();
}

From source file:PerTaskTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from  w  ww . j  av a  2  s  .com*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(PerMapTally.class);
    job.setMapperClass(TokenizerMapper.class);
    // Aniket changes starts
    /* Here the partitioner is being called*/
    job.setPartitionerClass(WordPartitioner.class);
    // Aniket changes ends
    // Part 4 Aniket changes starts
    /* Here I am just disabling the combiner */
    // job.setCombinerClass(IntSumReducer.class);
    // Part 4 Aniket changes ends
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:AnalyzeBigramCount.java

License:Apache License

public static void main(String[] args) {
    if (args.length != 1) {
        System.out.println("usage: [input-path]");
        System.exit(-1);/*from   w w  w  . j a v a 2 s.c  o  m*/
    }

    System.out.println("input path: " + args[0]);
    List<PairOfWritables<Text, IntWritable>> bigrams = SequenceFileUtils.readDirectory(new Path(args[0]));

    Collections.sort(bigrams, new Comparator<PairOfWritables<Text, IntWritable>>() {
        public int compare(PairOfWritables<Text, IntWritable> e1, PairOfWritables<Text, IntWritable> e2) {
            if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) {
                return e1.getLeftElement().compareTo(e2.getLeftElement());
            }

            return e2.getRightElement().compareTo(e1.getRightElement());
        }
    });

    int singletons = 0;
    int sum = 0;
    for (PairOfWritables<Text, IntWritable> bigram : bigrams) {
        sum += bigram.getRightElement().get();

        if (bigram.getRightElement().get() == 1) {
            singletons++;
        }
    }

    System.out.println("total number of unique bigrams: " + bigrams.size());
    System.out.println("total number of bigrams: " + sum);
    System.out.println("number of bigrams that appear only once: " + singletons);

    System.out.println("\nten most frequent bigrams: ");

    int cnt = 0;
    for (PairOfWritables<Text, IntWritable> bigram : bigrams) {
        System.out.println(bigram.getLeftElement() + "\t" + bigram.getRightElement());
        cnt++;

        if (cnt > 10) {
            break;
        }
    }
}

From source file:WordCount_PerTaskTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/* w w  w  .  j ava  2  s  .  co  m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_PerTaskTally.class);
    job.setMapperClass(TokenizerMapper.class);

    // disable combiner

    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartitioner.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ComputeCooccurrenceMatrixStripes.java

License:Apache License

/**
 * Runs this tool.//  w  w w.ja va2  s.  com
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;
    int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2;

    LOG.info("Tool: " + ComputeCooccurrenceMatrixStripes.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - window: " + window);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(ComputeCooccurrenceMatrixStripes.class.getSimpleName());
    job.setJarByClass(ComputeCooccurrenceMatrixStripes.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.getConfiguration().setInt("window", window);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setOutputValueClass(String2IntOpenHashMapWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(String2IntOpenHashMapWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}