public static FileSystem get(Configuration conf) throws IOException 

Returns the configured FileSystem implementation.


From source file:BooleanRetrievalCompressed.java

 * Runs this tool./*  w ww .j ava 2 s  .  c o m*/
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws IOException {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(LookupPostingsCompressed.class.getName(), options);

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");

    FileSystem fs = FileSystem.get(new Configuration());

    initialize(indexPath, collectionPath, fs);

    String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND",
            "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" };

    for (String q : queries) {
        System.out.println("Query: " + q);

    return 1;

From source file:HdfsReader.java

public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]");
        return 1;
    }/*  w  w w . j a va2 s.  c o m*/

    double fileSize;
    double fileSizeInMB;
    if (args[0].equals("1g")) {
        fileSize = 1073741824.0;
        fileSizeInMB = 1024.0;
    } else if (args[0].equals("10g")) {
        fileSize = 10737418240.0;
        fileSizeInMB = 10240.0;
    } else if (args[0].equals("100g")) {
        fileSize = 107374182400.0;
        fileSizeInMB = 102400.0;
    } else if (args[0].equals("200g")) {
        fileSize = 214748364800.0;
        fileSizeInMB = 204800.0;
    } else {
        throw new IllegalArgumentException("Invalid arg: " + args[0]);

    String fileName = "read-" + args[0] + "-avg.txt";
    File avgFile = new File(fileName);
    PrintWriter avgPW = new PrintWriter(avgFile);
    fileName = "read-" + args[0] + "-min.txt";
    File minFile = new File(fileName);
    PrintWriter minPW = new PrintWriter(minFile);
    fileName = "read-" + args[0] + "-max.txt";
    File maxFile = new File(fileName);
    PrintWriter maxPW = new PrintWriter(maxFile);

    int numIters = 10;
    int bufferSize = 4096;
    long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 };
    short replication[] = new short[] { 1, 4 };
    String hdfsFile = "/hdfs_test/" + args[0] + "/1.in";
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path hdfsFilePath = new Path(hdfsFile);

    for (int i = 0; i < 5; i++) { // blockSize
        for (int j = 0; j < 2; j++) { // replication
            OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]);
            byte[] buf = new byte[bufferSize];
            for (int m = 0; m < bufferSize; m += 4) {
                buf[m] = (byte) m;
            double numBufPerFile = fileSize / (double) bufferSize;

            for (double m = 0.0; m < numBufPerFile; m++) {
            long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE;
            for (int k = 0; k < numIters; k++) {
                InputStream is = fs.open(hdfsFilePath);

                long startTime = System.currentTimeMillis();
                int bytesRead = is.read(buf);
                while (bytesRead != -1) {
                    bytesRead = is.read(buf);
                long endTime = System.currentTimeMillis();
                long duration = (endTime - startTime);
                avg += duration;
                if (duration < min) {
                    min = duration;
                if (duration > max) {
                    max = duration;
            // write result to output
            double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg;
            double minBW = fileSizeInMB * 1000.0 / (double) max;
            double maxBW = fileSizeInMB * 1000.0 / (double) min;
    return 0;

From source file:Relevance.java

 * Exact relevance is slower, non-exact relevance will have false positives
 *//*from   w w  w. ja  va  2 s  . com*/
protected void batch_query(Tap source, Tap output, Fields wantedFields, RelevanceFunction func, Tap keysTap,
        String keyField, boolean useBloom, int bloom_bits, int bloom_hashes, boolean exact) throws IOException {
    if (!useBloom && !exact)
        throw new IllegalArgumentException("Must either use bloom filter or be exact, or both!");

    FileSystem fs = FileSystem.get(new Configuration());
    Pipe finalPipe = new Pipe("data");
    finalPipe = new Each(finalPipe, wantedFields, new Identity());

    Map<String, Tap> sources = new HashMap<String, Tap>();

    sources.put("data", source);
    Map properties = new HashMap();

    String bloomFilterPath = "/tmp/" + UUID.randomUUID().toString() + ".bloomfilter";
    if (useBloom) {
        String jobId = UUID.randomUUID().toString();

        LOG.info("Creating bloom filter");
        writeOutBloomFilter(keysTap, keyField, fs, bloomFilterPath, bloom_bits, bloom_hashes);
        properties.put("mapred.job.reuse.jvm.num.tasks", -1);
        if (!TEST_MODE) {
            properties.put("mapred.cache.files", "hdfs://" + bloomFilterPath);
        } else {
            properties.put("batch_query.relevance.file", bloomFilterPath);
        LOG.info("Done creating bloom filter");

        finalPipe = new Each(finalPipe, wantedFields, getRelevanceFilter(func, jobId));


    if (exact) {
        sources.put("relevant", keysTap);

        Pipe relevantRecords = new Pipe("relevant");
        relevantRecords = new Each(relevantRecords, new Fields(keyField), new Identity());

        finalPipe = new Each(finalPipe, wantedFields, getExactFilter(func),
                Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT)));

        finalPipe = new CoGroup(finalPipe, new Fields(RELEVANT_OBJECT), relevantRecords, new Fields(keyField),
                Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT), new Fields("__ignored")));

        finalPipe = new Each(finalPipe, Fields.join(wantedFields, new Fields(ID)), new Identity());

        if (func.canHaveMultipleMatches()) {
            finalPipe = new Distinct(finalPipe, new Fields(ID));
        finalPipe = new Each(finalPipe, wantedFields, new Identity());

    Flow flow = new FlowConnector(properties).connect("Relevance: " + func.getClass().getSimpleName(), sources,
            output, finalPipe);

    if (useBloom)
        fs.delete(new Path(bloomFilterPath), false);

From source file:FlinkBootstrap.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        throw new IllegalArgumentException(
                "Provide `TaskManager` or `JobManager` parameter with config folder");
    }/*  www.  ja  v a 2s  .co m*/

    //Load Hadoop S3 wrapper classes, due to ClassNotFound Exception without

    //Verify s3 is accessible
    Configuration conf = new Configuration();
    conf.addResource(new Path("config/hadoop/core-site.xml"));
    conf.addResource(new Path("config/hadoop/hdfs-site.xml"));
    FileSystem fs = FileSystem.get(conf);
    fs.listStatus(new Path("s3://dir"));

    if (args[0].equals("TaskManager")) {
        TaskManager.main(new String[] { "--configDir", args[1], });
    } else if (args[0].equals("JobManager")) {
        JobManager.main(new String[] { "--configDir", args[1], "--executionMode", "cluster", });
    } else {
        throw new IllegalArgumentException("Unknown parameter `" + args[0] + "`");

From source file:SBP.java

protected static void copyToLocalFile(Configuration conf, Path hdfs_path, Path local_path) throws Exception {
    FileSystem fs = FileSystem.get(conf);

    // read the result
    fs.copyToLocalFile(hdfs_path, local_path);

From source file:SBP.java

public int run(String[] args) throws Exception {
    if (args.length != 11) {
        for (int i = 0; i < args.length; i++) {
            System.out.println("Args: " + i + " " + args[i]);
        }/*from w  w w. j  a  va  2  s  . c  om*/
        return printUsage();

    lambda = Double.parseDouble(args[10]);
    edge_path = new Path(args[0]);
    prior_path = new Path(args[1]);
    output_path = new Path(args[2]);
    Path prev_local_path = new Path("run_tmp/prev_local/");
    Path new_local_path = new Path("run_tmp/new_local/");
    Path tmp_output_path = new Path(output_path.toString());

    number_msg = Long.parseLong(args[3]);
    nreducer = Integer.parseInt(args[4]);
    nreducer = 1;
    max_iter = Integer.parseInt(args[5]);

    nstate = Integer.parseInt(args[7]);
    edge_potential_str = read_edge_potential(args[8]);

    int cur_iter = 1;
    if (args[9].startsWith("new") == false) {
        cur_iter = Integer.parseInt(args[9].substring(4));

    System.out.println("edge_path=" + edge_path.toString() + ", prior_path=" + prior_path.toString()
            + ", output_path=" + output_path.toString() + ", |E|=" + number_msg + ", nreducer=" + nreducer
            + ", maxiter=" + max_iter + ", nstate=" + nstate + ", edge_potential_str=" + edge_potential_str
            + ", cur_iter=" + cur_iter + ", lambda=" + lambda);

    fs = FileSystem.get(getConf());

    // Run Stage1 and Stage2.
    if (cur_iter == 1) {
        System.out.println("BP: Initializing messages...");

    double converge_threshold = number_msg * EPS * nstate;

    for (int i = cur_iter; i <= max_iter; i++) {
        System.out.println("   *** ITERATION " + (i) + "/" + max_iter + " ***");

        String line = readLocaldirOneline(sum_error_path.toString());
        fs.delete(check_error_path, true);
        fs.delete(sum_error_path, true);
        String[] parts = line.split("\t");
        int n = Integer.parseInt(parts[0]);
        double sum = Double.parseDouble(parts[1]);
        System.out.println("Converged Msg: " + (number_msg - n));
        System.out.println("Sum Error: " + sum);
        if (sum < converge_threshold) {
        // rotate directory
        fs.rename(message_smooth_path, message_cur_path);

    System.out.println("BP: Computing beliefs...");

    System.out.println("BP finished. The belief vector is in the HDFS " + args[2]);

    return 0;

From source file:ExportStressTest.java

public void createFile(int fileId) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("ExportStressTest");
    fs.mkdirs(dirPath);//ww w .ja v  a2s  .c om
    Path filePath = new Path(dirPath, "input-" + fileId);

    OutputStream os = fs.create(filePath);
    Writer w = new BufferedWriter(new OutputStreamWriter(os));
    for (int i = 0; i < RECORDS_PER_FILE; i++) {
        long v = (long) i + ((long) RECORDS_PER_FILE * (long) fileId);
        w.write("" + v + "," + ALPHABET + ALPHABET + ALPHABET + ALPHABET + "\n");


From source file:ExportStressTest.java

/** Create a set of data files to export. */
public void createData() throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path dirPath = new Path("ExportStressTest");
    if (fs.exists(dirPath)) {
        System.out.println("Export directory appears to already exist. Skipping data-gen.");
        return;/*from  w w  w  .  j a v  a  2s  .  co m*/

    for (int i = 0; i < NUM_FILES; i++) {

From source file:DumpRecordsExtended.java

 * Runs this tool.//from   w  w  w .j  a  v a  2  s  .  c  o m
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        return -1;

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);

    LOG.info("Tool name: " + DumpRecordsExtended.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    Configuration conf = new Configuration();
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);


    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));



    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);


    return 0;

From source file:LookupQuery.java

public static void initQuery(String[] args) throws IOException {
    indexPath = args[0];/*from  w ww . ja va 2  s .com*/
    collectionPath = args[1];

    config = new Configuration();
    fs = FileSystem.get(config);
    reader = new MapFile.Reader(fs, indexPath, config);

    key = new Text();
    value = new ArrayListWritable<PairOfInts>();
    areThereMoreLookups = true;
    query = "";
    Qvalue = 0;
