List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:edu.umn.cs.spatialHadoop.operations.CatUnion.java
License:Open Source License
/** * Read all categories from the category file * @param categoryFile/*from w ww . j a v a2 s.co m*/ * @param categoryShapes * @param idToCategory * @throws IOException */ private static void readCategories(Path categoryFile, Map<Integer, Integer> idToCategory) throws IOException { Map<Integer, String> idToCatName = new HashMap<Integer, String>(); FileSystem fsCategory = FileSystem.getLocal(new Configuration()); long categoryFileSize = fsCategory.getFileStatus(categoryFile).getLen(); if (categoryFileSize > 1024 * 1024) LOG.warn("Category file size is big: " + categoryFileSize); InputStream inCategory = fsCategory.open(categoryFile); LineRecordReader lineReader = new LineRecordReader(inCategory, 0, categoryFileSize, new Configuration()); LongWritable lineOffset = lineReader.createKey(); Text line = lineReader.createValue(); Set<String> catNames = new TreeSet<String>(); while (lineReader.next(lineOffset, line)) { int shape_id = TextSerializerHelper.consumeInt(line, ','); String cat_name = line.toString(); catNames.add(cat_name); idToCatName.put(shape_id, cat_name); } lineReader.close(); // Change category names to numbers Map<String, Integer> cat_name_to_id = new HashMap<String, Integer>(); int cat_id = 0; for (String cat_name : catNames) { cat_name_to_id.put(cat_name, cat_id++); } for (Map.Entry<Integer, String> entry : idToCatName.entrySet()) { idToCategory.put(entry.getKey(), cat_name_to_id.get(entry.getValue())); } }
From source file:edu.utsa.sifter.som.MainSOM.java
License:Apache License
public static void main(String[] args) throws IOException, InterruptedException, CorruptIndexException, NoSuchFieldException { final File evPath = new File(args[0]); final File idxPath = new File(evPath, "primary-idx"); final long begin = System.currentTimeMillis(); // createIndex(path); final Path outPath = new Path(new Path(evPath.toString()), "docVectors.seq"); final Configuration hadoopConf = new Configuration(); final LocalFileSystem fs = FileSystem.getLocal(hadoopConf); final SequenceFile.Writer file = SequenceFile.createWriter(fs, hadoopConf, outPath, LongWritable.class, IntArrayWritable.class); final DirectoryReader dirReader = DirectoryReader.open(FSDirectory.open(idxPath)); final SifterConfig conf = new SifterConfig(); InputStream xmlProps = null;/*from w w w . j a v a 2 s .c o m*/ try { xmlProps = new FileInputStream("sifter_props.xml"); } catch (FileNotFoundException ex) { ; // swallow exeption } conf.loadFromXML(xmlProps); // safe with null final MainSOM builder = new MainSOM(dirReader, conf); IndexWriter writer = null; FileOutputStream somJSFile = null; try { builder.initTerms(); builder.writeVectors(file); file.close(); final SequenceFile.Reader seqRdr = new SequenceFile.Reader(fs, outPath, hadoopConf); writer = builder.createWriter(new File(evPath, "som-idx"), conf); somJSFile = new FileOutputStream(new File(evPath, "som.js")); final CharsetEncoder utf8 = Charset.forName("UTF-8").newEncoder(); utf8.onMalformedInput(CodingErrorAction.IGNORE); final Writer somJS = new BufferedWriter(new OutputStreamWriter(somJSFile, utf8)); builder.makeSOM(conf, seqRdr, writer, somJS); writer.forceMerge(1); } catch (Exception e) { e.printStackTrace(System.err); } finally { file.close(); if (writer != null) { writer.close(); } if (somJSFile != null) { somJSFile.close(); } dirReader.close(); System.out.println("Number of docs written: " + builder.getNumDocs()); System.out.println("Number of outlier docs: " + builder.getNumOutliers()); System.out.println("Total term dimensions: " + builder.getTermsMap().size()); System.out.println("Max terms per doc: " + builder.getMaxDocTerms()); System.out.println("Avg terms per doc: " + builder.getAvgDocTerms()); System.out.println("Duration: " + ((System.currentTimeMillis() - begin) / 1000) + " seconds"); conf.storeToXML(new FileOutputStream("sifter_props.xml")); } }
From source file:example.TestLineRecordReader.java
License:Apache License
/** * Writes the input test file//from w ww. j a v a 2 s.c o m * * @param conf * @return Path of the file created * @throws IOException */ private Path createInputFile(Configuration conf, String data) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Path file = new Path(inputDir, "test.txt"); Writer writer = new OutputStreamWriter(localFs.create(file)); try { writer.write(data); } finally { writer.close(); } return file; }
From source file:fi.aalto.seqpig.filter.MappabilityFilter.java
License:Open Source License
public MappabilityFilter(String mappability_threshold_s) throws Exception { double mappability_threshold = Double.parseDouble(mappability_threshold_s); Configuration conf = UDFContext.getUDFContext().getJobConf(); // see https://issues.apache.org/jira/browse/PIG-2576 if (conf == null || conf.get("mapred.task.id") == null) { // we are running on the frontend //decodeSAMFileHeader(); return;/*from ww w . ja v a 2 s . c o m*/ } if (samfileheader == null) { this.samfileheader = ""; try { FileSystem fs = FileSystem.getLocal(conf); BufferedReader headerin = new BufferedReader( new InputStreamReader(fs.open(new Path("input_asciiheader")))); while (true) { String str = headerin.readLine(); if (str == null) break; else this.samfileheader += str + "\n"; } headerin.close(); if (this.samfileheader.equals("")) { int errCode = 0; String errMsg = "MappabilityFilter: unable to read samfileheader from distributed cache!"; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT); } else { String msg = "successfully read samfileheader"; warn(msg, PigWarning.UDF_WARNING_1); this.samfileheader_decoded = getSAMFileHeader(); } } catch (Exception e) { String errMsg = "MappabilityFilter: ERROR: could not read BAM header: " + e.toString(); int errCode = 0; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT); } } if (regions == null) { regions = new TreeMap<RegionEntry, Boolean>(); try { FileSystem fs = FileSystem.getLocal(conf); // this was required before creating symlinks to files in the distributed cache /*Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf); for (Path cachePath : cacheFiles) { String msg = "found cache file: "+cachePath.getName(); warn(msg, PigWarning.UDF_WARNING_1); if (cachePath.getName().equals("input_regionfile")) { BufferedReader regionin = new BufferedReader(new InputStreamReader(fs.open( cachePath ))); BufferedReader regionin = new BufferedReader(new InputStreamReader(fs.open(new Path(fs.getHomeDirectory(), new Path(regionfilename)))));'*/ BufferedReader regionin = new BufferedReader( new InputStreamReader(fs.open(new Path("./input_regionfile")))); regionin.readLine(); // throw away first line that describes colums while (true) { String str = regionin.readLine(); if (str == null) break; else { String[] region_data = str.split("\t"); if (region_data[0] == null || region_data[1] == null || region_data[2] == null || region_data[3] == null) { int errCode = 0; String errMsg = "MappabilityFilter: Error while reading region file input"; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT); } if (Double.parseDouble(region_data[3]) >= mappability_threshold) { int start = parseCoordinate(region_data[1]); int end = parseCoordinate(region_data[2]); if (end < start) { int errCode = 0; String errMsg = "MappabilityFilter: Error while reading region file input"; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT); } if (region_size < 0) { region_size = end - start + 1; warn("setting region size to " + Integer.toString(region_size), PigWarning.UDF_WARNING_1); } SAMSequenceRecord ref = samfileheader_decoded.getSequence(region_data[0]); if (ref == null) try { ref = samfileheader_decoded.getSequence(Integer.parseInt(region_data[0])); } catch (NumberFormatException e) { warn(new String("unable to parse region entry!"), PigWarning.UDF_WARNING_1); } if (ref == null) { int errCode = 0; String errMsg = "MappabilityFilter: Unable find sequence record for region: " + str; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT); } RegionEntry e = new RegionEntry(ref.getSequenceIndex(), start, end); regions.put(e, new Boolean(true)); } } } regionin.close(); String msg = "successfully read region file with " + regions.size() + " regions"; warn(msg, PigWarning.UDF_WARNING_1); } catch (Exception e) { String errMsg = "MappabilityFilter: ERROR: could not region file: " + e.toString(); int errCode = 0; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT); } } }
From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java
License:Apache License
public void testOnlyGetEdgeOnce(InputFormatType type) throws Exception { String INSTANCE_NAME = "A"; String tableName = "testOnlyGetEdgeOnce" + type.name(); MockInstance mockInstance = new MockInstance(INSTANCE_NAME); Connector connector = mockInstance.getConnector("root", new PasswordToken("")); connector.securityOperations().changeUserAuthorizations("root", new Authorizations(visibilityString1, visibilityString2)); TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L); Set<GraphElementWithStatistics> data = getData(); AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName); graph.setAuthorizations(new Authorizations(visibilityString1, visibilityString2)); graph.addGraphElementsWithStatistics(data); // Set up local conf JobConf conf = new JobConf(); conf.set("fs.default.name", "file:///"); conf.set("mapred.job.tracker", "local"); FileSystem fs = FileSystem.getLocal(conf); Driver driver = new Driver(type); driver.setConf(conf);//from w w w.j ava 2 s . c o m // Create output folder for MapReduce job String outputDir = tempFolder.newFolder().getAbsolutePath(); FileUtils.deleteDirectory(outputDir); // Write properties file String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath(); BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename)); bw.write("accumulo.instance=" + INSTANCE_NAME + "\n"); bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n"); bw.write("accumulo.table=" + tableName + "\n"); bw.write("accumulo.user=root\n"); bw.write("accumulo.password=\n"); bw.close(); // Run job assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir })); // Read results in SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf); GraphElement element = new GraphElement(); SetOfStatistics statistics = new SetOfStatistics(); int count = 0; Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>(); while (reader.next(element, statistics)) { count++; results.add(new GraphElementWithStatistics(element.clone(), statistics.clone())); } reader.close(); // There should be 3 edges and 2 entities - 5 in total // Note need to check count (and not just compare sets) as the point of the test is to // ensure we don't get the same edge back twice assertEquals(5, count); Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>(); Edge edge1 = new Edge("customer", "A", "product", "P", "purchase", "instore", true, visibilityString1 + "&" + visibilityString2, sevenDaysBefore, fiveDaysBefore); SetOfStatistics statistics1 = new SetOfStatistics(); statistics1.addStatistic("count", new Count(20)); statistics1.addStatistic("anotherCount", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1)); Edge edge2 = new Edge("customer", "A", "product", "P", "purchase", "instore", false, visibilityString2, sixDaysBefore, fiveDaysBefore); SetOfStatistics statistics2 = new SetOfStatistics(); statistics2.addStatistic("countSomething", new Count(123456)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge2), statistics2)); Edge edge5 = new Edge("customer", "B", "product", "Q", "purchase", "instore", true, visibilityString2, sixDaysBefore, fiveDaysBefore); SetOfStatistics statistics5 = new SetOfStatistics(); statistics5.addStatistic("count", new Count(99)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5)); Entity entity1 = new Entity("customer", "A", "purchase", "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity1 = new SetOfStatistics(); statisticsEntity1.addStatistic("entity_count", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity1), statisticsEntity1)); Entity entity2 = new Entity("product", "R", "purchase", "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity2 = new SetOfStatistics(); statisticsEntity2.addStatistic("entity_count", new Count(12345)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity2), statisticsEntity2)); assertEquals(results, expectedResults); }
From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java
License:Apache License
public void testAuthsAreEnforced(InputFormatType type) throws Exception { String INSTANCE_NAME = "A"; String tableName = "testAuthsAreEnforced" + type.name(); MockInstance mockInstance = new MockInstance(INSTANCE_NAME); Connector connector = mockInstance.getConnector("root", new PasswordToken("")); // Only give them permission to see visibilityString1 connector.securityOperations().changeUserAuthorizations("root", new Authorizations(visibilityString1)); TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L); Set<GraphElementWithStatistics> data = getData(); AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName); graph.addGraphElementsWithStatistics(data); // Set up local conf JobConf conf = new JobConf(); conf.set("fs.default.name", "file:///"); conf.set("mapred.job.tracker", "local"); FileSystem fs = FileSystem.getLocal(conf); Driver driver = new Driver(type); driver.setConf(conf);// www . ja v a 2 s .c o m // Create output folder for MapReduce job String outputDir = tempFolder.newFolder().getAbsolutePath(); FileUtils.deleteDirectory(outputDir); // Write properties file String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath(); BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename)); bw.write("accumulo.instance=" + INSTANCE_NAME + "\n"); bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n"); bw.write("accumulo.table=" + tableName + "\n"); bw.write("accumulo.user=root\n"); bw.write("accumulo.password=\n"); bw.close(); // Run job assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir })); // Read results in SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf); GraphElement element = new GraphElement(); SetOfStatistics statistics = new SetOfStatistics(); Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>(); int count = 0; while (reader.next(element, statistics)) { results.add(new GraphElementWithStatistics(element.clone(), statistics.clone())); count++; } reader.close(); // There should be 1 edge and 2 entities - 3 in total assertEquals(3, count); Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>(); Edge edge1 = new Edge("customer", "A", "product", "P", "purchase", "instore", true, visibilityString1, sevenDaysBefore, fiveDaysBefore); SetOfStatistics statistics1 = new SetOfStatistics(); statistics1.addStatistic("count", new Count(3)); statistics1.addStatistic("anotherCount", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1)); Entity entity1 = new Entity("customer", "A", "purchase", "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity1 = new SetOfStatistics(); statisticsEntity1.addStatistic("entity_count", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity1), statisticsEntity1)); Entity entity2 = new Entity("product", "R", "purchase", "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity2 = new SetOfStatistics(); statisticsEntity2.addStatistic("entity_count", new Count(12345)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity2), statisticsEntity2)); assertEquals(results, expectedResults); }
From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java
License:Apache License
public void testAuthsCanBeRestrictedByUser(InputFormatType type) throws Exception { String INSTANCE_NAME = "A"; String tableName = "testAuthsCanBeRestrictedByUser" + type.name(); MockInstance mockInstance = new MockInstance(INSTANCE_NAME); Connector connector = mockInstance.getConnector("root", new PasswordToken("")); // Give them permission to see visibilityString1 and 2 connector.securityOperations().changeUserAuthorizations("root", new Authorizations(visibilityString1, visibilityString2)); TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L); Set<GraphElementWithStatistics> data = getData(); AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName); graph.addGraphElementsWithStatistics(data); // Set up local conf JobConf conf = new JobConf(); conf.set("fs.default.name", "file:///"); conf.set("mapred.job.tracker", "local"); FileSystem fs = FileSystem.getLocal(conf); // Choose to only see data with visibilityString1 DriverRestrictedAuths driver = new DriverRestrictedAuths(type, new Authorizations(visibilityString1)); driver.setConf(conf);//from w ww . j a va 2 s .co m // Create output folder for MapReduce job String outputDir = tempFolder.newFolder().getAbsolutePath(); FileUtils.deleteDirectory(outputDir); // Write properties file String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath(); BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename)); bw.write("accumulo.instance=" + INSTANCE_NAME + "\n"); bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n"); bw.write("accumulo.table=" + tableName + "\n"); bw.write("accumulo.user=root\n"); bw.write("accumulo.password=\n"); bw.close(); // Run job assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir })); // Read results in SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf); GraphElement element = new GraphElement(); SetOfStatistics statistics = new SetOfStatistics(); Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>(); int count = 0; while (reader.next(element, statistics)) { results.add(new GraphElementWithStatistics(element.clone(), statistics.clone())); count++; } reader.close(); // There should be 1 edge and 2 entities - 3 in total assertEquals(3, count); Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>(); Edge edge1 = new Edge("customer", "A", "product", "P", "purchase", "instore", true, visibilityString1, sevenDaysBefore, fiveDaysBefore); SetOfStatistics statistics1 = new SetOfStatistics(); statistics1.addStatistic("count", new Count(3)); statistics1.addStatistic("anotherCount", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1)); Entity entity1 = new Entity("customer", "A", "purchase", "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity1 = new SetOfStatistics(); statisticsEntity1.addStatistic("entity_count", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity1), statisticsEntity1)); Entity entity2 = new Entity("product", "R", "purchase", "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity2 = new SetOfStatistics(); statisticsEntity2.addStatistic("entity_count", new Count(12345)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity2), statisticsEntity2)); assertEquals(results, expectedResults); }
From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java
License:Apache License
public void testPostRollUpTransformIsAppliedReadingWholeTable(InputFormatType type) throws Exception { String INSTANCE_NAME = "A"; String tableName = "testPostRollUpTransformIsAppliedReadingWholeTable" + type.name(); MockInstance mockInstance = new MockInstance(INSTANCE_NAME); Connector connector = mockInstance.getConnector("root", new PasswordToken("")); connector.securityOperations().changeUserAuthorizations("root", new Authorizations(visibilityString1, visibilityString2)); TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L); Set<GraphElementWithStatistics> data = getData(); AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName); graph.addGraphElementsWithStatistics(data); // Add post roll-up transform String transformedSummaryType = "abc"; // Set up local conf JobConf conf = new JobConf(); conf.set("fs.default.name", "file:///"); conf.set("mapred.job.tracker", "local"); FileSystem fs = FileSystem.getLocal(conf); Driver driver = new Driver(type, new ExampleTransform(transformedSummaryType)); driver.setConf(conf);//ww w. ja v a2 s . co m // Create output folder for MapReduce job String outputDir = tempFolder.newFolder().getAbsolutePath(); FileUtils.deleteDirectory(outputDir); // Write properties file String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath(); BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename)); bw.write("accumulo.instance=" + INSTANCE_NAME + "\n"); bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n"); bw.write("accumulo.table=" + tableName + "\n"); bw.write("accumulo.user=root\n"); bw.write("accumulo.password=\n"); bw.close(); // Run job assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir })); // Read results in SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf); GraphElement element = new GraphElement(); SetOfStatistics statistics = new SetOfStatistics(); Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>(); while (reader.next(element, statistics)) { results.add(new GraphElementWithStatistics(element.clone(), statistics.clone())); assertEquals(transformedSummaryType, element.getSummaryType()); } reader.close(); // There should be 3 edges and 2 entities - 5 in total Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>(); Edge edge1 = new Edge("customer", "A", "product", "P", transformedSummaryType, "instore", true, visibilityString1 + "&" + visibilityString2, sevenDaysBefore, fiveDaysBefore); SetOfStatistics statistics1 = new SetOfStatistics(); statistics1.addStatistic("count", new Count(20)); statistics1.addStatistic("anotherCount", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1)); Edge edge2 = new Edge("customer", "A", "product", "P", transformedSummaryType, "instore", false, visibilityString2, sixDaysBefore, fiveDaysBefore); SetOfStatistics statistics2 = new SetOfStatistics(); statistics2.addStatistic("countSomething", new Count(123456)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge2), statistics2)); Edge edge5 = new Edge("customer", "B", "product", "Q", transformedSummaryType, "instore", true, visibilityString2, sixDaysBefore, fiveDaysBefore); SetOfStatistics statistics5 = new SetOfStatistics(); statistics5.addStatistic("count", new Count(99)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5)); Entity entity1 = new Entity("customer", "A", transformedSummaryType, "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity1 = new SetOfStatistics(); statisticsEntity1.addStatistic("entity_count", new Count(1000000)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity1), statisticsEntity1)); Entity entity2 = new Entity("product", "R", transformedSummaryType, "count", visibilityString1, sevenDaysBefore, sixDaysBefore); SetOfStatistics statisticsEntity2 = new SetOfStatistics(); statisticsEntity2.addStatistic("entity_count", new Count(12345)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity2), statisticsEntity2)); assertEquals(expectedResults, results); }
From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java
License:Apache License
public void testPostRollUpTransformIsAppliedReadingDataForOneEntity(InputFormatType type) throws Exception { String INSTANCE_NAME = "A"; String tableName = "testPostRollUpTransformIsAppliedReadingDataForOneEntity" + type.name(); MockInstance mockInstance = new MockInstance(INSTANCE_NAME); Connector connector = mockInstance.getConnector("root", new PasswordToken("")); connector.securityOperations().changeUserAuthorizations("root", new Authorizations(visibilityString1, visibilityString2));// TEMP _ CHECK TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L); Set<GraphElementWithStatistics> data = getData(); AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName); graph.addGraphElementsWithStatistics(data); // Add post roll-up transform String transformedSummaryType = "abc"; // Set up local conf JobConf conf = new JobConf(); conf.set("fs.default.name", "file:///"); conf.set("mapred.job.tracker", "local"); FileSystem fs = FileSystem.getLocal(conf); DriverForCertainTypeValues driver = new DriverForCertainTypeValues(type, new ExampleTransform(transformedSummaryType)); driver.setConf(conf);/*from www . j av a2 s . c o m*/ // Create output folder for MapReduce job String outputDir = tempFolder.newFolder().getAbsolutePath(); FileUtils.deleteDirectory(outputDir); // Write properties file String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath(); BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename)); bw.write("accumulo.instance=" + INSTANCE_NAME + "\n"); bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n"); bw.write("accumulo.table=" + tableName + "\n"); bw.write("accumulo.user=root\n"); bw.write("accumulo.password=\n"); bw.close(); // Run job assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir, "customer", "B" })); // Read results in SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf); GraphElement element = new GraphElement(); SetOfStatistics statistics = new SetOfStatistics(); Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>(); while (reader.next(element, statistics)) { results.add(new GraphElementWithStatistics(element.clone(), statistics.clone())); assertEquals(transformedSummaryType, element.getSummaryType()); } reader.close(); // There should be 1 edge Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>(); Edge edge5 = new Edge("customer", "B", "product", "Q", transformedSummaryType, "instore", true, visibilityString2, sixDaysBefore, fiveDaysBefore); SetOfStatistics statistics5 = new SetOfStatistics(); statistics5.addStatistic("count", new Count(99)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5)); assertEquals(expectedResults, results); }
From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java
License:Apache License
public void testPostRollUpTransformIsAppliedReadingDataForOneRange(InputFormatType type) throws Exception { String INSTANCE_NAME = "A"; String tableName = "testPostRollUpTransformIsAppliedReadingDataForOneRange" + type.name(); MockInstance mockInstance = new MockInstance(INSTANCE_NAME); Connector connector = mockInstance.getConnector("root", new PasswordToken("")); connector.securityOperations().changeUserAuthorizations("root", new Authorizations(visibilityString1, visibilityString2)); TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L); Set<GraphElementWithStatistics> data = getData(); AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName); graph.addGraphElementsWithStatistics(data); // Add post roll-up transform String transformedSummaryType = "abc"; // Set up local conf JobConf conf = new JobConf(); conf.set("fs.default.name", "file:///"); conf.set("mapred.job.tracker", "local"); FileSystem fs = FileSystem.getLocal(conf); DriverForRanges driver = new DriverForRanges(type, new ExampleTransform(transformedSummaryType)); driver.setConf(conf);//from w ww. ja va 2 s.c om // Create output folder for MapReduce job String outputDir = tempFolder.newFolder().getAbsolutePath(); FileUtils.deleteDirectory(outputDir); // Write properties file String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath(); BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename)); bw.write("accumulo.instance=" + INSTANCE_NAME + "\n"); bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n"); bw.write("accumulo.table=" + tableName + "\n"); bw.write("accumulo.user=root\n"); bw.write("accumulo.password=\n"); bw.close(); // Run job assertEquals(0, driver .run(new String[] { accumuloPropertiesFilename, outputDir, "customer", "B", "customer", "B2" })); // Read results in SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf); GraphElement element = new GraphElement(); SetOfStatistics statistics = new SetOfStatistics(); Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>(); while (reader.next(element, statistics)) { results.add(new GraphElementWithStatistics(element.clone(), statistics.clone())); assertEquals(transformedSummaryType, element.getSummaryType()); } reader.close(); // There should be 1 edge Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>(); Edge edge5 = new Edge("customer", "B", "product", "Q", transformedSummaryType, "instore", true, visibilityString2, sixDaysBefore, fiveDaysBefore); SetOfStatistics statistics5 = new SetOfStatistics(); statistics5.addStatistic("count", new Count(99)); expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5)); assertEquals(results, expectedResults); }