Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:Relevance.java

License:Apache License

/**
 * Exact relevance is slower, non-exact relevance will have false positives
 *//*from   ww  w.j  ava 2 s .c o m*/
protected void batch_query(Tap source, Tap output, Fields wantedFields, RelevanceFunction func, Tap keysTap,
        String keyField, boolean useBloom, int bloom_bits, int bloom_hashes, boolean exact) throws IOException {
    if (!useBloom && !exact)
        throw new IllegalArgumentException("Must either use bloom filter or be exact, or both!");

    FileSystem fs = FileSystem.get(new Configuration());
    Pipe finalPipe = new Pipe("data");
    finalPipe = new Each(finalPipe, wantedFields, new Identity());

    Map<String, Tap> sources = new HashMap<String, Tap>();

    sources.put("data", source);
    Map properties = new HashMap();

    String bloomFilterPath = "/tmp/" + UUID.randomUUID().toString() + ".bloomfilter";
    if (useBloom) {
        String jobId = UUID.randomUUID().toString();

        LOG.info("Creating bloom filter");
        writeOutBloomFilter(keysTap, keyField, fs, bloomFilterPath, bloom_bits, bloom_hashes);
        properties.put("mapred.job.reuse.jvm.num.tasks", -1);
        if (!TEST_MODE) {
            properties.put("mapred.cache.files", "hdfs://" + bloomFilterPath);
        } else {
            properties.put("batch_query.relevance.file", bloomFilterPath);
        }
        LOG.info("Done creating bloom filter");

        finalPipe = new Each(finalPipe, wantedFields, getRelevanceFilter(func, jobId));

    }

    if (exact) {
        sources.put("relevant", keysTap);

        Pipe relevantRecords = new Pipe("relevant");
        relevantRecords = new Each(relevantRecords, new Fields(keyField), new Identity());

        finalPipe = new Each(finalPipe, wantedFields, getExactFilter(func),
                Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT)));

        finalPipe = new CoGroup(finalPipe, new Fields(RELEVANT_OBJECT), relevantRecords, new Fields(keyField),
                Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT), new Fields("__ignored")));

        finalPipe = new Each(finalPipe, Fields.join(wantedFields, new Fields(ID)), new Identity());

        if (func.canHaveMultipleMatches()) {
            finalPipe = new Distinct(finalPipe, new Fields(ID));
        }
        finalPipe = new Each(finalPipe, wantedFields, new Identity());
    }

    Flow flow = new FlowConnector(properties).connect("Relevance: " + func.getClass().getSimpleName(), sources,
            output, finalPipe);
    flow.complete();

    if (useBloom)
        fs.delete(new Path(bloomFilterPath), false);
}

From source file:WikipediaForwardIndexBuilder.java

License:Apache License

@SuppressWarnings("static-access")
@Override/*  ww w  . j  av  a2 s  . co m*/
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION));
    String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION);

    String tmpPath = "tmp-" + WikipediaForwardIndexBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000);

    if (!inputPath.isAbsolute()) {
        System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!");
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    JobConf conf = new JobConf(getConf(), WikipediaForwardIndexBuilder.class);
    FileSystem fs = FileSystem.get(conf);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - language: " + language);
    LOG.info("Note: This tool only works on block-compressed SequenceFiles!");

    conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language));

    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(tmpPath));
    FileOutputFormat.setCompressOutput(conf, false);

    if (language != null) {
        conf.set("wiki.language", language);
    }

    conf.setInputFormat(NoSplitSequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(IdentityReducer.class);

    // Delete the output directory if it exists already.
    fs.delete(new Path(tmpPath), true);

    RunningJob job = JobClient.runJob(conf);

    Counters counters = job.getCounters();
    int blocks = (int) counters.getCounter(Blocks.Total);

    LOG.info("number of blocks: " + blocks);

    LOG.info("Writing index file...");
    LineReader reader = new LineReader(fs.open(new Path(tmpPath + "/part-00000")));
    FSDataOutputStream out = fs.create(new Path(indexFile), true);

    out.writeUTF(edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex.class.getCanonicalName());
    out.writeUTF(inputPath.toString());
    out.writeInt(blocks);

    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\s+");

        int docno = Integer.parseInt(arr[0]);
        int offset = Integer.parseInt(arr[1]);
        short fileno = Short.parseShort(arr[2]);

        out.writeInt(docno);
        out.writeInt(offset);
        out.writeShort(fileno);

        cnt++;

        if (cnt % 100000 == 0) {
            LOG.info(cnt + " blocks written");
        }
    }

    reader.close();
    out.close();

    if (cnt != blocks) {
        throw new RuntimeException("Error: mismatch in block count!");
    }

    // Clean up.
    fs.delete(new Path(tmpPath), true);

    return 0;
}

From source file:TaskSearchWords.java

public static void main(String[] args) throws Exception {

    String hadoopServer = "ip-172-31-13-245.ap-southeast-1.compute.internal";

    Configuration conf = new Configuration();

    // this should be like defined in your mapred-site.xml
    conf.set("mapred.job.tracker", hadoopServer + ":54311");

    // like defined in hdfs-site.xml
    conf.set("fs.default.name", "hdfs://" + hadoopServer + ":9000");

    //setting mapred classes for HDFS to know which classes to process
    conf.set("mapreduce.map.class", "TokenizerMapper");
    conf.set("mapreduce.reduce.class", "IntSumReducer");

    //to prevent classdefnotfound exception
    conf.set("mapred.jar", "C:\\GitRepos\\OCR\\HadoopTasks\\dist\\HadoopTasks.jar");

    //to pass parameters to mapred classes
    conf.set("RAWOCRCLOB",
            "Omeprazole_Cap E/C 10mg\n" + "Dressit Ster esDress\n" + "Flaminal Forte 15g\n"
                    + "Co-Magaldrox_Susp 195mg/220mg/5ml S/F\n" + "Antacid/Oxetacaine_Oral Susp S/F\n"
                    + "Simeticone_Susp 40mg/ml S/F\n" + "Infacol_Susp 40mg/ml S/F");

    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(TaskSearchWords.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path("/user/ubuntu/MedicinesProcessed.csv"));
    FileSystem fs = FileSystem.get(conf);
    Path out = new Path("/user/ubuntu/processed/");
    fs.delete(out, true);

    //finally set the empty out path
    FileOutputFormat.setOutputPath(job, out);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:CountJob.java

License:Apache License

public static void doJob(String param, String args[], String msgs)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    conf.set(TokenizerMapper.PATTERN, args[2]);
    FileSystem hdfs = FileSystem.get(conf);
    Path tempOutput1 = new Path("/data/output/temp/" + param + "1");
    Path tempOutput2 = new Path("/data/output/temp/" + param + "2");
    if (hdfs.exists(tempOutput1) || hdfs.exists(tempOutput2)) {
        hdfs.delete(tempOutput1, true);
        hdfs.delete(tempOutput2, true);/* w w w  .j a va2 s  .  com*/
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(CountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempOutput1);
    job.waitForCompletion(true);

    Job sortJob1 = new Job(conf);
    sortJob1.setJobName("grep-sort");
    FileInputFormat.setInputPaths(sortJob1, tempOutput1);
    sortJob1.setInputFormatClass(SequenceFileInputFormat.class);
    sortJob1.setMapperClass(InverseMapper.class);
    sortJob1.setNumReduceTasks(1); // write a single file
    FileOutputFormat.setOutputPath(sortJob1, tempOutput2);
    sortJob1.setSortComparatorClass( // sort by decreasing freq
            LongWritable.DecreasingComparator.class);
    sortJob1.waitForCompletion(true);
    hdfs.delete(tempOutput1, true);

}

From source file:CountJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String msgs = "";
    doJob("1", args, msgs);
    doJob("2", args, msgs);
    FileSystem hdfs = FileSystem.get(conf);

    BufferedReader bfr = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000"))));
    BufferedReader bfr2 = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000"))));
    Boolean same = true;/*www  . ja v a 2s . co  m*/
    String line1;
    String line2;
    line1 = bfr.readLine();
    line2 = bfr2.readLine();
    while (same == true) {
        if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) {
            same = false;
            break;
        } else if ((line1 == null && line2 == null)) {
            break;
        } else {
            if (line1.equals(line2)) {
                line1 = bfr.readLine();
                line2 = bfr2.readLine();
            } else {
                same = false;
                break;
            }
        }
    }
    if (same == true) {
        System.out.print("same " + same + "\n");
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    } else {

        System.out.print("Different");
        doJob("3", args, msgs);
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    }
    hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true);
    hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true);

}

From source file:Hw2Part1.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: <input file> <output directory>");
        System.exit(2);/*from  ww w. java  2s .  c om*/
    }

    //    FileSystem hdfs = FileSystem.get(conf);
    String target = "hdfs://localhost:9000/";
    FileSystem fs = FileSystem.get(URI.create(target), conf);//is diffrent
    Path outputpath = new Path(otherArgs[otherArgs.length - 1]);
    if (fs.exists(outputpath)) {
        fs.delete(outputpath, true);
    }

    Job job = Job.getInstance(conf, "Hw2Part1");

    job.setJarByClass(Hw2Part1.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumCombiner.class);
    job.setReducerClass(IntSumReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(InfoWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(InfoWritable.class);

    // add the input paths as given by command line
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }

    // add the output path as given by the command line
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:HdfsCacheReader.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]");
        return 1;
    }/* w w  w.j  ava2 s.  c om*/

    double fileSize;
    double fileSizeInMB;
    if (args[0].equals("1g")) {
        fileSize = 1073741824.0;
        fileSizeInMB = 1024.0;
    } else if (args[0].equals("10g")) {
        fileSize = 10737418240.0;
        fileSizeInMB = 10240.0;
    } else if (args[0].equals("100g")) {
        fileSize = 107374182400.0;
        fileSizeInMB = 102400.0;
    } else if (args[0].equals("200g")) {
        fileSize = 214748364800.0;
        fileSizeInMB = 204800.0;
    } else {
        throw new IllegalArgumentException("Invalid arg: " + args[0]);
    }

    String fileName = "cacheRead-" + args[0] + "-avg.txt";
    File avgFile = new File(fileName);
    PrintWriter avgPW = new PrintWriter(avgFile);
    fileName = "cacheRead-" + args[0] + "-min.txt";
    File minFile = new File(fileName);
    PrintWriter minPW = new PrintWriter(minFile);
    fileName = "cacheRead-" + args[0] + "-max.txt";
    File maxFile = new File(fileName);
    PrintWriter maxPW = new PrintWriter(maxFile);

    int numIters = 10;
    int bufferSize = 65536;
    long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 };
    short replication[] = new short[] { 1, 4 };
    String hdfsFile = "/hdfs_test/" + args[0] + "/1.in";
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path hdfsFilePath = new Path(hdfsFile);

    for (int i = 0; i < 5; i++) { // blockSize
        for (int j = 0; j < 2; j++) { // replication
            OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]);
            byte[] buf = new byte[bufferSize];
            for (int m = 0; m < bufferSize; m += 4) {
                buf[m] = (byte) m;
            }
            double numBufPerFile = fileSize / (double) bufferSize;

            for (double m = 0.0; m < numBufPerFile; m++) {
                os.write(buf);
            }
            os.close();
            String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile
                    + " -pool hdfs_test";
            Process p = Runtime.getRuntime().exec(cmdStr);
            p.waitFor();
            String cmdOutLine = "";
            StringBuffer cmdOut = new StringBuffer();
            BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
            while ((cmdOutLine = cmdOutReader.readLine()) != null) {
                cmdOut.append(cmdOutLine + "\n");
            }
            // System.out.println (cmdOut.toString());

            long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE;
            for (int k = 0; k < numIters; k++) {
                FSDataInputStream in = fs.open(hdfsFilePath);
                ByteBuffer bbuf = null;
                ElasticByteBufferPool ebbp = new ElasticByteBufferPool();
                long startTime = System.currentTimeMillis();
                while ((bbuf = in.read(ebbp, bufferSize, EnumSet.of(ReadOption.SKIP_CHECKSUMS))) != null) {
                    in.releaseBuffer(bbuf);
                }
                long endTime = System.currentTimeMillis();
                in.close();
                long duration = (endTime - startTime);
                avg += duration;
                if (duration < min) {
                    min = duration;
                }
                if (duration > max) {
                    max = duration;
                }
            }
            // write result to output
            double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg;
            avgPW.print(avgBW);
            avgPW.print("\t");
            double minBW = fileSizeInMB * 1000.0 / (double) max;
            minPW.print(minBW);
            minPW.print("\t");
            double maxBW = fileSizeInMB * 1000.0 / (double) min;
            maxPW.print(maxBW);
            maxPW.print("\t");
            cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -removeDirectives -path " + hdfsFile;
            p = Runtime.getRuntime().exec(cmdStr);
            p.waitFor();
            cmdOutLine = "";
            cmdOut.setLength(0);
            cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
            while ((cmdOutLine = cmdOutReader.readLine()) != null) {
                cmdOut.append(cmdOutLine + "\n");
            }
            // System.out.println (cmdOut.toString());
            fs.delete(hdfsFilePath, true);
        }
        avgPW.println();
        minPW.println();
        maxPW.println();
    }
    avgPW.close();
    minPW.close();
    maxPW.close();
    return 0;
}

From source file:DataHBase.java

License:Open Source License

public void run(HashMap<String, String> config) throws Exception {

    //clean the former output if it exists
    Path p = new Path(config.get("hdfs_output_dir"));
    FileSystem fs = FileSystem.get(new Configuration());
    if (fs.exists(p)) {
        fs.delete(p, true);
    }//from  w  ww .  j av a  2s.com

    String junction = config.get("what_to_find"); // the name of the junction
    String date1 = config.get("date1");
    String date2 = config.get("date2");
    //date1 and date2 can be of a format YYYY-MM-DD
    if (date1.length() == 10)
        date1 = date1 + " 00:00:00";
    if (date2.length() == 10)
        date2 = date2 + " 23:59:59";
    System.out.println("Looking for data of " + junction + ": " + date1 + " - " + date2);

    //create timestamps (considering time zone!) to limit data
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    sdf.setTimeZone(TimeZone.getDefault());
    Long time1 = sdf.parse(date1).getTime();
    Long time2 = sdf.parse(date2).getTime();

    //run a job
    Configuration conf = HBaseConfiguration.create();
    conf.set("mapreduce.output.textoutputformat.separator", ","); //set comma as a delimiter

    Job job = new Job(conf, "Retrieve data from hbase");
    job.setJarByClass(DataHBase.class);

    Scan scan = new Scan();
    scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
    scan.setCacheBlocks(false); // don't set to true for MR jobs
    scan.setMaxVersions(1);
    scan.setTimeRange(time1, time2); //take a day we are interested in
    //set a filter for a junction name
    if (!junction.equals("")) {
        SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("data"),
                Bytes.toBytes("location_name"), CompareOp.EQUAL, Bytes.toBytes(junction));
        scan.setFilter(filter);
    }
    //add the specific columns to the output to limit the amount of data
    scan.addFamily(Bytes.toBytes("data"));

    TableMapReduceUtil.initTableMapperJob(config.get("hbase_table"), // input HBase table name
            scan, // Scan instance to control CF and attribute selection
            TableMap.class, // mapper
            Text.class, // mapper output key
            Text.class, // mapper output value
            job);

    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(config.get("hdfs_output_dir")));

    job.waitForCompletion(true);

}

From source file:PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/*  ww w  .j av  a 2s.c om*/
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    // setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:TestBytesBloomFilter.java

License:Apache License

public void testSetSanity() throws IOException {
    FileSystem local = FileSystem.getLocal(new Configuration());

    BytesBloomFilter set = new BytesBloomFilter(1000000, 4);
    byte[] arr1 = new byte[] { 1, 2, 3, 4, 5, 6, 7 };
    byte[] arr2 = new byte[] { 11, 12, 5, -2 };
    byte[] arr3 = new byte[] { 3, 4, 5 };
    set.add(arr1);/* ww  w .j a  v  a 2  s  .  co m*/
    set.add(arr2);

    for (byte i = 0; i < (byte) 125; i++) {
        set.add(new byte[] { i });
    }

    assertTrue(set.mayContain(arr1));
    assertTrue(set.mayContain(arr2));

    for (byte i = 0; i < (byte) 125; i++) {
        assertTrue(set.mayContain(new byte[] { i }));
    }

    //technically this could be an invalid statement, but the probability is low and this is a sanity check
    assertFalse(set.mayContain(arr3));

    //now test that we can write and read from file just fine
    local.delete(new Path("/tmp/filter-test.bloomfilter"), false);
    DataOutputStream os = new DataOutputStream(new FileOutputStream("/tmp/filter-test.bloomfilter"));
    set.write(os);
    os.close();

    BytesBloomFilter set2 = new BytesBloomFilter();
    DataInputStream is = new DataInputStream(new FileInputStream("/tmp/filter-test.bloomfilter"));
    set2.readFields(is);

    assertTrue(set2.mayContain(arr1));
    assertTrue(set2.mayContain(arr2));

    for (byte i = 0; i < (byte) 125; i++) {
        assertTrue(set2.mayContain(new byte[] { i }));
    }

    //technically this could be an invalid statement, but the probability is low and this is a sanity check
    assertFalse(set2.mayContain(arr3));

}