List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:Relevance.java
License:Apache License
/** * Exact relevance is slower, non-exact relevance will have false positives *//*from ww w.j ava 2 s .c o m*/ protected void batch_query(Tap source, Tap output, Fields wantedFields, RelevanceFunction func, Tap keysTap, String keyField, boolean useBloom, int bloom_bits, int bloom_hashes, boolean exact) throws IOException { if (!useBloom && !exact) throw new IllegalArgumentException("Must either use bloom filter or be exact, or both!"); FileSystem fs = FileSystem.get(new Configuration()); Pipe finalPipe = new Pipe("data"); finalPipe = new Each(finalPipe, wantedFields, new Identity()); Map<String, Tap> sources = new HashMap<String, Tap>(); sources.put("data", source); Map properties = new HashMap(); String bloomFilterPath = "/tmp/" + UUID.randomUUID().toString() + ".bloomfilter"; if (useBloom) { String jobId = UUID.randomUUID().toString(); LOG.info("Creating bloom filter"); writeOutBloomFilter(keysTap, keyField, fs, bloomFilterPath, bloom_bits, bloom_hashes); properties.put("mapred.job.reuse.jvm.num.tasks", -1); if (!TEST_MODE) { properties.put("mapred.cache.files", "hdfs://" + bloomFilterPath); } else { properties.put("batch_query.relevance.file", bloomFilterPath); } LOG.info("Done creating bloom filter"); finalPipe = new Each(finalPipe, wantedFields, getRelevanceFilter(func, jobId)); } if (exact) { sources.put("relevant", keysTap); Pipe relevantRecords = new Pipe("relevant"); relevantRecords = new Each(relevantRecords, new Fields(keyField), new Identity()); finalPipe = new Each(finalPipe, wantedFields, getExactFilter(func), Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT))); finalPipe = new CoGroup(finalPipe, new Fields(RELEVANT_OBJECT), relevantRecords, new Fields(keyField), Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT), new Fields("__ignored"))); finalPipe = new Each(finalPipe, Fields.join(wantedFields, new Fields(ID)), new Identity()); if (func.canHaveMultipleMatches()) { finalPipe = new Distinct(finalPipe, new Fields(ID)); } finalPipe = new Each(finalPipe, wantedFields, new Identity()); } Flow flow = new FlowConnector(properties).connect("Relevance: " + func.getClass().getSimpleName(), sources, output, finalPipe); flow.complete(); if (useBloom) fs.delete(new Path(bloomFilterPath), false); }
From source file:WikipediaForwardIndexBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override/* ww w . j av a2 s . co m*/ public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION)); String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION); String tmpPath = "tmp-" + WikipediaForwardIndexBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); if (!inputPath.isAbsolute()) { System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!"); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } JobConf conf = new JobConf(getConf(), WikipediaForwardIndexBuilder.class); FileSystem fs = FileSystem.get(conf); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input path: " + inputPath); LOG.info(" - index file: " + indexFile); LOG.info(" - language: " + language); LOG.info("Note: This tool only works on block-compressed SequenceFiles!"); conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language)); conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(tmpPath)); FileOutputFormat.setCompressOutput(conf, false); if (language != null) { conf.set("wiki.language", language); } conf.setInputFormat(NoSplitSequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(IdentityReducer.class); // Delete the output directory if it exists already. fs.delete(new Path(tmpPath), true); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int blocks = (int) counters.getCounter(Blocks.Total); LOG.info("number of blocks: " + blocks); LOG.info("Writing index file..."); LineReader reader = new LineReader(fs.open(new Path(tmpPath + "/part-00000"))); FSDataOutputStream out = fs.create(new Path(indexFile), true); out.writeUTF(edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex.class.getCanonicalName()); out.writeUTF(inputPath.toString()); out.writeInt(blocks); int cnt = 0; Text line = new Text(); while (reader.readLine(line) > 0) { String[] arr = line.toString().split("\\s+"); int docno = Integer.parseInt(arr[0]); int offset = Integer.parseInt(arr[1]); short fileno = Short.parseShort(arr[2]); out.writeInt(docno); out.writeInt(offset); out.writeShort(fileno); cnt++; if (cnt % 100000 == 0) { LOG.info(cnt + " blocks written"); } } reader.close(); out.close(); if (cnt != blocks) { throw new RuntimeException("Error: mismatch in block count!"); } // Clean up. fs.delete(new Path(tmpPath), true); return 0; }
From source file:TaskSearchWords.java
public static void main(String[] args) throws Exception { String hadoopServer = "ip-172-31-13-245.ap-southeast-1.compute.internal"; Configuration conf = new Configuration(); // this should be like defined in your mapred-site.xml conf.set("mapred.job.tracker", hadoopServer + ":54311"); // like defined in hdfs-site.xml conf.set("fs.default.name", "hdfs://" + hadoopServer + ":9000"); //setting mapred classes for HDFS to know which classes to process conf.set("mapreduce.map.class", "TokenizerMapper"); conf.set("mapreduce.reduce.class", "IntSumReducer"); //to prevent classdefnotfound exception conf.set("mapred.jar", "C:\\GitRepos\\OCR\\HadoopTasks\\dist\\HadoopTasks.jar"); //to pass parameters to mapred classes conf.set("RAWOCRCLOB", "Omeprazole_Cap E/C 10mg\n" + "Dressit Ster esDress\n" + "Flaminal Forte 15g\n" + "Co-Magaldrox_Susp 195mg/220mg/5ml S/F\n" + "Antacid/Oxetacaine_Oral Susp S/F\n" + "Simeticone_Susp 40mg/ml S/F\n" + "Infacol_Susp 40mg/ml S/F"); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(TaskSearchWords.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path("/user/ubuntu/MedicinesProcessed.csv")); FileSystem fs = FileSystem.get(conf); Path out = new Path("/user/ubuntu/processed/"); fs.delete(out, true); //finally set the empty out path FileOutputFormat.setOutputPath(job, out); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:CountJob.java
License:Apache License
public static void doJob(String param, String args[], String msgs) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); conf.set(TokenizerMapper.PATTERN, args[2]); FileSystem hdfs = FileSystem.get(conf); Path tempOutput1 = new Path("/data/output/temp/" + param + "1"); Path tempOutput2 = new Path("/data/output/temp/" + param + "2"); if (hdfs.exists(tempOutput1) || hdfs.exists(tempOutput2)) { hdfs.delete(tempOutput1, true); hdfs.delete(tempOutput2, true);/* w w w .j a va2 s . com*/ } Job job = new Job(conf, "word count"); job.setJarByClass(CountJob.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, tempOutput1); job.waitForCompletion(true); Job sortJob1 = new Job(conf); sortJob1.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob1, tempOutput1); sortJob1.setInputFormatClass(SequenceFileInputFormat.class); sortJob1.setMapperClass(InverseMapper.class); sortJob1.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob1, tempOutput2); sortJob1.setSortComparatorClass( // sort by decreasing freq LongWritable.DecreasingComparator.class); sortJob1.waitForCompletion(true); hdfs.delete(tempOutput1, true); }
From source file:CountJob.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String msgs = ""; doJob("1", args, msgs); doJob("2", args, msgs); FileSystem hdfs = FileSystem.get(conf); BufferedReader bfr = new BufferedReader( new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000")))); BufferedReader bfr2 = new BufferedReader( new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000")))); Boolean same = true;/*www . ja v a 2s . co m*/ String line1; String line2; line1 = bfr.readLine(); line2 = bfr2.readLine(); while (same == true) { if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) { same = false; break; } else if ((line1 == null && line2 == null)) { break; } else { if (line1.equals(line2)) { line1 = bfr.readLine(); line2 = bfr2.readLine(); } else { same = false; break; } } } if (same == true) { System.out.print("same " + same + "\n"); Path localP = new Path("/tmp/output.txt"); hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP); hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000")); hdfs.createNewFile(new Path(args[1] + "/_SUCCESS")); System.out.print("created result"); } else { System.out.print("Different"); doJob("3", args, msgs); Path localP = new Path("/tmp/output.txt"); hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP); hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000")); hdfs.createNewFile(new Path(args[1] + "/_SUCCESS")); System.out.print("created result"); } hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true); hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true); }
From source file:Hw2Part1.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: <input file> <output directory>"); System.exit(2);/*from ww w. java 2s . c om*/ } // FileSystem hdfs = FileSystem.get(conf); String target = "hdfs://localhost:9000/"; FileSystem fs = FileSystem.get(URI.create(target), conf);//is diffrent Path outputpath = new Path(otherArgs[otherArgs.length - 1]); if (fs.exists(outputpath)) { fs.delete(outputpath, true); } Job job = Job.getInstance(conf, "Hw2Part1"); job.setJarByClass(Hw2Part1.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumCombiner.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(InfoWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(InfoWritable.class); // add the input paths as given by command line for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } // add the output path as given by the command line FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:HdfsCacheReader.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]"); return 1; }/* w w w.j ava2 s. c om*/ double fileSize; double fileSizeInMB; if (args[0].equals("1g")) { fileSize = 1073741824.0; fileSizeInMB = 1024.0; } else if (args[0].equals("10g")) { fileSize = 10737418240.0; fileSizeInMB = 10240.0; } else if (args[0].equals("100g")) { fileSize = 107374182400.0; fileSizeInMB = 102400.0; } else if (args[0].equals("200g")) { fileSize = 214748364800.0; fileSizeInMB = 204800.0; } else { throw new IllegalArgumentException("Invalid arg: " + args[0]); } String fileName = "cacheRead-" + args[0] + "-avg.txt"; File avgFile = new File(fileName); PrintWriter avgPW = new PrintWriter(avgFile); fileName = "cacheRead-" + args[0] + "-min.txt"; File minFile = new File(fileName); PrintWriter minPW = new PrintWriter(minFile); fileName = "cacheRead-" + args[0] + "-max.txt"; File maxFile = new File(fileName); PrintWriter maxPW = new PrintWriter(maxFile); int numIters = 10; int bufferSize = 65536; long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 }; short replication[] = new short[] { 1, 4 }; String hdfsFile = "/hdfs_test/" + args[0] + "/1.in"; Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path hdfsFilePath = new Path(hdfsFile); for (int i = 0; i < 5; i++) { // blockSize for (int j = 0; j < 2; j++) { // replication OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]); byte[] buf = new byte[bufferSize]; for (int m = 0; m < bufferSize; m += 4) { buf[m] = (byte) m; } double numBufPerFile = fileSize / (double) bufferSize; for (double m = 0.0; m < numBufPerFile; m++) { os.write(buf); } os.close(); String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile + " -pool hdfs_test"; Process p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); String cmdOutLine = ""; StringBuffer cmdOut = new StringBuffer(); BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOut.append(cmdOutLine + "\n"); } // System.out.println (cmdOut.toString()); long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE; for (int k = 0; k < numIters; k++) { FSDataInputStream in = fs.open(hdfsFilePath); ByteBuffer bbuf = null; ElasticByteBufferPool ebbp = new ElasticByteBufferPool(); long startTime = System.currentTimeMillis(); while ((bbuf = in.read(ebbp, bufferSize, EnumSet.of(ReadOption.SKIP_CHECKSUMS))) != null) { in.releaseBuffer(bbuf); } long endTime = System.currentTimeMillis(); in.close(); long duration = (endTime - startTime); avg += duration; if (duration < min) { min = duration; } if (duration > max) { max = duration; } } // write result to output double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg; avgPW.print(avgBW); avgPW.print("\t"); double minBW = fileSizeInMB * 1000.0 / (double) max; minPW.print(minBW); minPW.print("\t"); double maxBW = fileSizeInMB * 1000.0 / (double) min; maxPW.print(maxBW); maxPW.print("\t"); cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -removeDirectives -path " + hdfsFile; p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); cmdOutLine = ""; cmdOut.setLength(0); cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOut.append(cmdOutLine + "\n"); } // System.out.println (cmdOut.toString()); fs.delete(hdfsFilePath, true); } avgPW.println(); minPW.println(); maxPW.println(); } avgPW.close(); minPW.close(); maxPW.close(); return 0; }
From source file:DataHBase.java
License:Open Source License
public void run(HashMap<String, String> config) throws Exception { //clean the former output if it exists Path p = new Path(config.get("hdfs_output_dir")); FileSystem fs = FileSystem.get(new Configuration()); if (fs.exists(p)) { fs.delete(p, true); }//from w ww . j av a 2s.com String junction = config.get("what_to_find"); // the name of the junction String date1 = config.get("date1"); String date2 = config.get("date2"); //date1 and date2 can be of a format YYYY-MM-DD if (date1.length() == 10) date1 = date1 + " 00:00:00"; if (date2.length() == 10) date2 = date2 + " 23:59:59"; System.out.println("Looking for data of " + junction + ": " + date1 + " - " + date2); //create timestamps (considering time zone!) to limit data SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); sdf.setTimeZone(TimeZone.getDefault()); Long time1 = sdf.parse(date1).getTime(); Long time2 = sdf.parse(date2).getTime(); //run a job Configuration conf = HBaseConfiguration.create(); conf.set("mapreduce.output.textoutputformat.separator", ","); //set comma as a delimiter Job job = new Job(conf, "Retrieve data from hbase"); job.setJarByClass(DataHBase.class); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.setMaxVersions(1); scan.setTimeRange(time1, time2); //take a day we are interested in //set a filter for a junction name if (!junction.equals("")) { SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("data"), Bytes.toBytes("location_name"), CompareOp.EQUAL, Bytes.toBytes(junction)); scan.setFilter(filter); } //add the specific columns to the output to limit the amount of data scan.addFamily(Bytes.toBytes("data")); TableMapReduceUtil.initTableMapperJob(config.get("hbase_table"), // input HBase table name scan, // Scan instance to control CF and attribute selection TableMap.class, // mapper Text.class, // mapper output key Text.class, // mapper output value job); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(config.get("hdfs_output_dir"))); job.waitForCompletion(true); }
From source file:PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/* ww w .j av a 2s.c om*/ */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { // setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:TestBytesBloomFilter.java
License:Apache License
public void testSetSanity() throws IOException { FileSystem local = FileSystem.getLocal(new Configuration()); BytesBloomFilter set = new BytesBloomFilter(1000000, 4); byte[] arr1 = new byte[] { 1, 2, 3, 4, 5, 6, 7 }; byte[] arr2 = new byte[] { 11, 12, 5, -2 }; byte[] arr3 = new byte[] { 3, 4, 5 }; set.add(arr1);/* ww w .j a v a 2 s . co m*/ set.add(arr2); for (byte i = 0; i < (byte) 125; i++) { set.add(new byte[] { i }); } assertTrue(set.mayContain(arr1)); assertTrue(set.mayContain(arr2)); for (byte i = 0; i < (byte) 125; i++) { assertTrue(set.mayContain(new byte[] { i })); } //technically this could be an invalid statement, but the probability is low and this is a sanity check assertFalse(set.mayContain(arr3)); //now test that we can write and read from file just fine local.delete(new Path("/tmp/filter-test.bloomfilter"), false); DataOutputStream os = new DataOutputStream(new FileOutputStream("/tmp/filter-test.bloomfilter")); set.write(os); os.close(); BytesBloomFilter set2 = new BytesBloomFilter(); DataInputStream is = new DataInputStream(new FileInputStream("/tmp/filter-test.bloomfilter")); set2.readFields(is); assertTrue(set2.mayContain(arr1)); assertTrue(set2.mayContain(arr2)); for (byte i = 0; i < (byte) 125; i++) { assertTrue(set2.mayContain(new byte[] { i })); } //technically this could be an invalid statement, but the probability is low and this is a sanity check assertFalse(set2.mayContain(arr3)); }