List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java
License:Apache License
public static void setOutputBase(JobConf conf, String outputBase) { conf.set("mapred.myriad.dgen.output.base", outputBase); }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java
License:Apache License
public static void setDatasetID(JobConf conf, String datasetID) { conf.set("mapred.myriad.dgen.dataset.id", datasetID); }
From source file:FormatStorage1.MergeFileUtil.java
License:Open Source License
public static void run(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil"); job.setJarByClass(MergeFileUtil.class); FileSystem fs = null;//from www . j ava2s . c o m fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setInputFormat(CombineFormatStorageFileInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:FormatStorage1.MergeFileUtil.java
License:Open Source License
public static void runold(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil"); job.setJarByClass(MergeFileUtil.class); FileSystem fs = null;/*from w w w .j a va 2s . c om*/ fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } for (FileStatus status : fss) { if (status.isDir()) { throw new IOException("!!!input dir contains directory:\t" + status.getPath().toString()); } } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } long fl = 512 * 1024 * 1024; int reduces = (int) (wholesize / fl + 1); job.setNumReduceTasks(reduces); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setInputFormat(MergeIFormatInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:FormatStorage1.MergeFileUtil1.java
License:Open Source License
public static void run(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil1"); job.setJarByClass(MergeFileUtil1.class); FileSystem fs = null;/*w w w . j a v a 2 s. co m*/ fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setInputFormat(CombineFormatStorageFileInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat1.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:FormatStorage1.MergeFileUtil1.java
License:Open Source License
public static void runold(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil"); job.setJarByClass(MergeFileUtil1.class); FileSystem fs = null;/*www . j a v a 2 s. c om*/ fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } for (FileStatus status : fss) { if (status.isDir()) { throw new IOException("!!!input dir contains directory:\t" + status.getPath().toString()); } } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } long fl = 512 * 1024 * 1024; int reduces = (int) (wholesize / fl + 1); job.setNumReduceTasks(reduces); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setInputFormat(MergeIFormatInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Driver to copy srcPath to destPath depending on required protocol. * @param args arguments//w w w . ja va 2 s .c o m */ static void copy(final Configuration conf, final Arguments args) throws IOException { getLogger().info("srcPaths=" + args.srcs); getLogger().info("destPath=" + args.dst); checkSrcPath(conf, args.srcs); JobConf job = createJobConf(conf); if (args.preservedAttributes != null) { job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes); } if (args.mapredSslConf != null) { job.set("dfs.client.https.keystore.resource", args.mapredSslConf); } // Initialize the mapper try { setup(conf, job, args); JobClient.runJob(job); finalize(conf, job, args.dst, args.preservedAttributes); } finally { // delete tmp fullyDelete(job.get(TMP_DIR_LABEL), job); // delete jobDirectory fullyDelete(job.get(JOB_DIR_LABEL), job); } }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments//from w w w. j av a2 s . c o m */ private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); // set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (null == parent) { // If dst is '/' on S3, it might not exist yet, but dst.getParent() // will return null. In this case, use '/' as its own parent to // prevent // NPE errors below. parent = args.dst; } if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { // skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); // skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); // if (LOG.isTraceEnabled()) { // LOG.trace("adding file " + child.getPath()); // } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { getLogger().info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); // Explicitly create the tmpDir to ensure that it can be cleaned // up by fullyDelete() later. tmpDir.getFileSystem(conf).mkdirs(tmpDir); getLogger().info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java
License:Apache License
public void testOnlyGetEdgeOnce(InputFormatType type) throws Exception { String INSTANCE_NAME = "A"; String tableName = "testOnlyGetEdgeOnce" + type.name(); MockInstance mockInstance = new MockInstance(INSTANCE_NAME); Connector connector = mockInstance.getConnector("root", new PasswordToken("")); connector.securityOperations().changeUserAuthorizations("root", new Authorizations(visibilityString1, visibilityString2)); TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L); Set<GraphElementWithStatistics> data = getData(); AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName); graph.setAuthorizations(new Authorizations(visibilityString1, visibilityString2)); graph.addGraphElementsWithStatistics(data); // Set up local conf JobConf conf = new JobConf(); conf.set("fs.default.name", "file:///"); conf.set("mapred.job.tracker", "local"); FileSystem fs = FileSystem.getLocal(conf); Driver driver = new Driver(type); driver.setConf(conf);//w w w. j a va 2s . c om // Create output folder for MapReduce job String outputDir = tempFolder.newFolder().getAbsolutePath(); FileUtils.deleteDirectory(outputDir); // Write properties file String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath(); BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename)); bw.write("accumulo.instance=" + INSTANCE_NAME + "\n"); bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n"); bw.write("accumulo.table=" + tableName + "\n"); bw.write("accumulo.user=root\n"); bw.write("accumulo.password=\n"); bw.close(); // Run job assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir })); // Read results in SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf); GraphElement element = new GraphElement(); SetOfStatistics statistics = new SetOfStatistics(); int count = 0; Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>(); while (reader.next(element, statistics)) { count++; results.add(new GraphElementWithStatistics(element.clone(), statistics.clone())); } reader.close(); // There should be 3 edges and 2 entities - 5 in total // Note need to check count (and not just compare sets) as the point of the test is to // ensure we don't get the same edge back twice assertEquals(5, count); Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>(); Edge edge1 = new Edge("customer", "A", "product", "P", "purchase", "instore", true, visibilityString1 + "&" + visibilityString2, sevenDaysBefore, fiveDaysBefore); SetOfStatistics statistics1 = new SetOfStatistics(); statistics1.addStatistic("count", new Count(20)); statistics1.addStatistic("anotherCount", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1)); Edge edge2 = new Edge("customer", "A", "product", "P", "purchase", "instore", false, visibilityString2, sixDaysBefore, fiveDaysBefore); SetOfStatistics statistics2 = new SetOfStatistics(); statistics2.addStatistic("countSomething", new Count(123456)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge2), statistics2)); Edge edge5 = new Edge("customer", "B", "product", "Q", "purchase", "instore", true, visibilityString2, sixDaysBefore, fiveDaysBefore); SetOfStatistics statistics5 = new SetOfStatistics(); statistics5.addStatistic("count", new Count(99)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5)); Entity entity1 = new Entity("customer", "A", "purchase", "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity1 = new SetOfStatistics(); statisticsEntity1.addStatistic("entity_count", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity1), statisticsEntity1)); Entity entity2 = new Entity("product", "R", "purchase", "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity2 = new SetOfStatistics(); statisticsEntity2.addStatistic("entity_count", new Count(12345)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity2), statisticsEntity2)); assertEquals(results, expectedResults); }
From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java
License:Apache License
public void testAuthsAreEnforced(InputFormatType type) throws Exception { String INSTANCE_NAME = "A"; String tableName = "testAuthsAreEnforced" + type.name(); MockInstance mockInstance = new MockInstance(INSTANCE_NAME); Connector connector = mockInstance.getConnector("root", new PasswordToken("")); // Only give them permission to see visibilityString1 connector.securityOperations().changeUserAuthorizations("root", new Authorizations(visibilityString1)); TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L); Set<GraphElementWithStatistics> data = getData(); AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName); graph.addGraphElementsWithStatistics(data); // Set up local conf JobConf conf = new JobConf(); conf.set("fs.default.name", "file:///"); conf.set("mapred.job.tracker", "local"); FileSystem fs = FileSystem.getLocal(conf); Driver driver = new Driver(type); driver.setConf(conf);//w ww.jav a 2 s . c o m // Create output folder for MapReduce job String outputDir = tempFolder.newFolder().getAbsolutePath(); FileUtils.deleteDirectory(outputDir); // Write properties file String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath(); BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename)); bw.write("accumulo.instance=" + INSTANCE_NAME + "\n"); bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n"); bw.write("accumulo.table=" + tableName + "\n"); bw.write("accumulo.user=root\n"); bw.write("accumulo.password=\n"); bw.close(); // Run job assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir })); // Read results in SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf); GraphElement element = new GraphElement(); SetOfStatistics statistics = new SetOfStatistics(); Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>(); int count = 0; while (reader.next(element, statistics)) { results.add(new GraphElementWithStatistics(element.clone(), statistics.clone())); count++; } reader.close(); // There should be 1 edge and 2 entities - 3 in total assertEquals(3, count); Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>(); Edge edge1 = new Edge("customer", "A", "product", "P", "purchase", "instore", true, visibilityString1, sevenDaysBefore, fiveDaysBefore); SetOfStatistics statistics1 = new SetOfStatistics(); statistics1.addStatistic("count", new Count(3)); statistics1.addStatistic("anotherCount", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1)); Entity entity1 = new Entity("customer", "A", "purchase", "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity1 = new SetOfStatistics(); statisticsEntity1.addStatistic("entity_count", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity1), statisticsEntity1)); Entity entity2 = new Entity("product", "R", "purchase", "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity2 = new SetOfStatistics(); statisticsEntity2.addStatistic("entity_count", new Count(12345)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity2), statisticsEntity2)); assertEquals(results, expectedResults); }