Example usage for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:BooleanRetrievalCompressed.java

License:Apache License

/**
 * Runs this tool./*  w ww .j ava 2 s  .  c o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws IOException {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostingsCompressed.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    FileSystem fs = FileSystem.get(new Configuration());

    initialize(indexPath, collectionPath, fs);

    String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND",
            "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" };

    for (String q : queries) {
        System.out.println("Query: " + q);

        runQuery(q);
        System.out.println("");
    }
    return 1;
}

From source file:HdfsReader.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]");
        return 1;
    }/*  w  w w . j a va2 s.  c o m*/

    double fileSize;
    double fileSizeInMB;
    if (args[0].equals("1g")) {
        fileSize = 1073741824.0;
        fileSizeInMB = 1024.0;
    } else if (args[0].equals("10g")) {
        fileSize = 10737418240.0;
        fileSizeInMB = 10240.0;
    } else if (args[0].equals("100g")) {
        fileSize = 107374182400.0;
        fileSizeInMB = 102400.0;
    } else if (args[0].equals("200g")) {
        fileSize = 214748364800.0;
        fileSizeInMB = 204800.0;
    } else {
        throw new IllegalArgumentException("Invalid arg: " + args[0]);
    }

    String fileName = "read-" + args[0] + "-avg.txt";
    File avgFile = new File(fileName);
    PrintWriter avgPW = new PrintWriter(avgFile);
    fileName = "read-" + args[0] + "-min.txt";
    File minFile = new File(fileName);
    PrintWriter minPW = new PrintWriter(minFile);
    fileName = "read-" + args[0] + "-max.txt";
    File maxFile = new File(fileName);
    PrintWriter maxPW = new PrintWriter(maxFile);

    int numIters = 10;
    int bufferSize = 4096;
    long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 };
    short replication[] = new short[] { 1, 4 };
    String hdfsFile = "/hdfs_test/" + args[0] + "/1.in";
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path hdfsFilePath = new Path(hdfsFile);

    for (int i = 0; i < 5; i++) { // blockSize
        for (int j = 0; j < 2; j++) { // replication
            OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]);
            byte[] buf = new byte[bufferSize];
            for (int m = 0; m < bufferSize; m += 4) {
                buf[m] = (byte) m;
            }
            double numBufPerFile = fileSize / (double) bufferSize;

            for (double m = 0.0; m < numBufPerFile; m++) {
                os.write(buf);
            }
            os.close();
            long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE;
            for (int k = 0; k < numIters; k++) {
                InputStream is = fs.open(hdfsFilePath);

                long startTime = System.currentTimeMillis();
                int bytesRead = is.read(buf);
                while (bytesRead != -1) {
                    bytesRead = is.read(buf);
                }
                is.close();
                long endTime = System.currentTimeMillis();
                long duration = (endTime - startTime);
                avg += duration;
                if (duration < min) {
                    min = duration;
                }
                if (duration > max) {
                    max = duration;
                }
            }
            // write result to output
            double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg;
            avgPW.print(avgBW);
            avgPW.print("\t");
            double minBW = fileSizeInMB * 1000.0 / (double) max;
            minPW.print(minBW);
            minPW.print("\t");
            double maxBW = fileSizeInMB * 1000.0 / (double) min;
            maxPW.print(maxBW);
            maxPW.print("\t");
        }
        avgPW.println();
        minPW.println();
        maxPW.println();
    }
    avgPW.close();
    minPW.close();
    maxPW.close();
    return 0;
}

From source file:Relevance.java

License:Apache License

/**
 * Exact relevance is slower, non-exact relevance will have false positives
 *//*from   w w  w. ja  va  2 s  . com*/
protected void batch_query(Tap source, Tap output, Fields wantedFields, RelevanceFunction func, Tap keysTap,
        String keyField, boolean useBloom, int bloom_bits, int bloom_hashes, boolean exact) throws IOException {
    if (!useBloom && !exact)
        throw new IllegalArgumentException("Must either use bloom filter or be exact, or both!");

    FileSystem fs = FileSystem.get(new Configuration());
    Pipe finalPipe = new Pipe("data");
    finalPipe = new Each(finalPipe, wantedFields, new Identity());

    Map<String, Tap> sources = new HashMap<String, Tap>();

    sources.put("data", source);
    Map properties = new HashMap();

    String bloomFilterPath = "/tmp/" + UUID.randomUUID().toString() + ".bloomfilter";
    if (useBloom) {
        String jobId = UUID.randomUUID().toString();

        LOG.info("Creating bloom filter");
        writeOutBloomFilter(keysTap, keyField, fs, bloomFilterPath, bloom_bits, bloom_hashes);
        properties.put("mapred.job.reuse.jvm.num.tasks", -1);
        if (!TEST_MODE) {
            properties.put("mapred.cache.files", "hdfs://" + bloomFilterPath);
        } else {
            properties.put("batch_query.relevance.file", bloomFilterPath);
        }
        LOG.info("Done creating bloom filter");

        finalPipe = new Each(finalPipe, wantedFields, getRelevanceFilter(func, jobId));

    }

    if (exact) {
        sources.put("relevant", keysTap);

        Pipe relevantRecords = new Pipe("relevant");
        relevantRecords = new Each(relevantRecords, new Fields(keyField), new Identity());

        finalPipe = new Each(finalPipe, wantedFields, getExactFilter(func),
                Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT)));

        finalPipe = new CoGroup(finalPipe, new Fields(RELEVANT_OBJECT), relevantRecords, new Fields(keyField),
                Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT), new Fields("__ignored")));

        finalPipe = new Each(finalPipe, Fields.join(wantedFields, new Fields(ID)), new Identity());

        if (func.canHaveMultipleMatches()) {
            finalPipe = new Distinct(finalPipe, new Fields(ID));
        }
        finalPipe = new Each(finalPipe, wantedFields, new Identity());
    }

    Flow flow = new FlowConnector(properties).connect("Relevance: " + func.getClass().getSimpleName(), sources,
            output, finalPipe);
    flow.complete();

    if (useBloom)
        fs.delete(new Path(bloomFilterPath), false);
}

From source file:FlinkBootstrap.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        throw new IllegalArgumentException(
                "Provide `TaskManager` or `JobManager` parameter with config folder");
    }/*  www.  ja  v a 2s  .co m*/

    //Load Hadoop S3 wrapper classes, due to ClassNotFound Exception without
    Class.forName("org.apache.flink.runtime.fs.hdfs.HadoopFileSystem");
    Class.forName("org.apache.hadoop.fs.s3a.S3AFileSystem");

    //Verify s3 is accessible
    Configuration conf = new Configuration();
    conf.addResource(new Path("config/hadoop/core-site.xml"));
    conf.addResource(new Path("config/hadoop/hdfs-site.xml"));
    FileSystem fs = FileSystem.get(conf);
    fs.listStatus(new Path("s3://dir"));

    if (args[0].equals("TaskManager")) {
        TaskManager.main(new String[] { "--configDir", args[1], });
    } else if (args[0].equals("JobManager")) {
        JobManager.main(new String[] { "--configDir", args[1], "--executionMode", "cluster", });
    } else {
        throw new IllegalArgumentException("Unknown parameter `" + args[0] + "`");
    }
}

From source file:SBP.java

License:Apache License

protected static void copyToLocalFile(Configuration conf, Path hdfs_path, Path local_path) throws Exception {
    FileSystem fs = FileSystem.get(conf);

    // read the result
    fs.copyToLocalFile(hdfs_path, local_path);
}

From source file:SBP.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 11) {
        for (int i = 0; i < args.length; i++) {
            System.out.println("Args: " + i + " " + args[i]);
        }/*from w  w w. j  a  va  2  s  . c  om*/
        System.out.println(args.length);
        return printUsage();
    }

    lambda = Double.parseDouble(args[10]);
    edge_path = new Path(args[0]);
    prior_path = new Path(args[1]);
    output_path = new Path(args[2]);
    Path prev_local_path = new Path("run_tmp/prev_local/");
    Path new_local_path = new Path("run_tmp/new_local/");
    Path tmp_output_path = new Path(output_path.toString());

    number_msg = Long.parseLong(args[3]);
    nreducer = Integer.parseInt(args[4]);
    nreducer = 1;
    max_iter = Integer.parseInt(args[5]);

    nstate = Integer.parseInt(args[7]);
    edge_potential_str = read_edge_potential(args[8]);

    int cur_iter = 1;
    if (args[9].startsWith("new") == false) {
        cur_iter = Integer.parseInt(args[9].substring(4));
    }

    System.out.println("edge_path=" + edge_path.toString() + ", prior_path=" + prior_path.toString()
            + ", output_path=" + output_path.toString() + ", |E|=" + number_msg + ", nreducer=" + nreducer
            + ", maxiter=" + max_iter + ", nstate=" + nstate + ", edge_potential_str=" + edge_potential_str
            + ", cur_iter=" + cur_iter + ", lambda=" + lambda);

    fs = FileSystem.get(getConf());

    // Run Stage1 and Stage2.
    if (cur_iter == 1) {
        System.out.println("BP: Initializing messages...");
        JobClient.runJob(configInitMessage());
    }

    double converge_threshold = number_msg * EPS * nstate;

    for (int i = cur_iter; i <= max_iter; i++) {
        System.out.println("   *** ITERATION " + (i) + "/" + max_iter + " ***");

        JobClient.runJob(configUpdateMessage());
        JobClient.runJob(configSmoothMessage());
        JobClient.runJob(configCheckErr());
        JobClient.runJob(configSumErr());
        String line = readLocaldirOneline(sum_error_path.toString());
        fs.delete(check_error_path, true);
        fs.delete(sum_error_path, true);
        String[] parts = line.split("\t");
        int n = Integer.parseInt(parts[0]);
        double sum = Double.parseDouble(parts[1]);
        System.out.println("Converged Msg: " + (number_msg - n));
        System.out.println("Sum Error: " + sum);
        if (sum < converge_threshold) {
            break;
        }
        // rotate directory
        fs.delete(message_cur_path);
        fs.delete(message_next_path);
        fs.rename(message_smooth_path, message_cur_path);
    }

    System.out.println("BP: Computing beliefs...");
    JobClient.runJob(configComputeBelief());

    System.out.println("BP finished. The belief vector is in the HDFS " + args[2]);

    return 0;
}

From source file:ExportStressTest.java

License:Apache License

public void createFile(int fileId) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("ExportStressTest");
    fs.mkdirs(dirPath);//ww w .ja v  a2s  .c om
    Path filePath = new Path(dirPath, "input-" + fileId);

    OutputStream os = fs.create(filePath);
    Writer w = new BufferedWriter(new OutputStreamWriter(os));
    for (int i = 0; i < RECORDS_PER_FILE; i++) {
        long v = (long) i + ((long) RECORDS_PER_FILE * (long) fileId);
        w.write("" + v + "," + ALPHABET + ALPHABET + ALPHABET + ALPHABET + "\n");

    }
    w.close();
    os.close();
}

From source file:ExportStressTest.java

License:Apache License

/** Create a set of data files to export. */
public void createData() throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("ExportStressTest");
    if (fs.exists(dirPath)) {
        System.out.println("Export directory appears to already exist. Skipping data-gen.");
        return;/*from  w w  w  .  j a v  a  2s  .  co m*/
    }

    for (int i = 0; i < NUM_FILES; i++) {
        createFile(i);
    }
}

From source file:DumpRecordsExtended.java

License:Apache License

/**
 * Runs this tool.//from   w  w  w .j  a  v a  2  s  .  c  o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);

    LOG.info("Tool name: " + DumpRecordsExtended.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    Configuration conf = new Configuration();
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(DumpRecordsExtended.class.getSimpleName());
    job.setJarByClass(DumpRecordsExtended.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:LookupQuery.java

License:Apache License

public static void initQuery(String[] args) throws IOException {
    indexPath = args[0];/*from  w ww . ja va 2  s .com*/
    collectionPath = args[1];

    config = new Configuration();
    fs = FileSystem.get(config);
    reader = new MapFile.Reader(fs, indexPath, config);

    key = new Text();
    value = new ArrayListWritable<PairOfInts>();
    areThereMoreLookups = true;
    query = "";
    Qvalue = 0;

}