Example usage for org.apache.hadoop.fs FileSystem getLocal

List of usage examples for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException 

Source Link

Document

Get the local FileSystem.

Usage

From source file:edu.umn.cs.spatialHadoop.operations.CatUnion.java

License:Open Source License

/**
 * Read all categories from the category file
 * @param categoryFile/*from  w  ww . j  a  v a2 s.co  m*/
 * @param categoryShapes
 * @param idToCategory
 * @throws IOException
 */
private static void readCategories(Path categoryFile, Map<Integer, Integer> idToCategory) throws IOException {
    Map<Integer, String> idToCatName = new HashMap<Integer, String>();
    FileSystem fsCategory = FileSystem.getLocal(new Configuration());
    long categoryFileSize = fsCategory.getFileStatus(categoryFile).getLen();
    if (categoryFileSize > 1024 * 1024)
        LOG.warn("Category file size is big: " + categoryFileSize);
    InputStream inCategory = fsCategory.open(categoryFile);
    LineRecordReader lineReader = new LineRecordReader(inCategory, 0, categoryFileSize, new Configuration());
    LongWritable lineOffset = lineReader.createKey();
    Text line = lineReader.createValue();

    Set<String> catNames = new TreeSet<String>();

    while (lineReader.next(lineOffset, line)) {
        int shape_id = TextSerializerHelper.consumeInt(line, ',');
        String cat_name = line.toString();
        catNames.add(cat_name);
        idToCatName.put(shape_id, cat_name);
    }

    lineReader.close();

    // Change category names to numbers
    Map<String, Integer> cat_name_to_id = new HashMap<String, Integer>();
    int cat_id = 0;
    for (String cat_name : catNames) {
        cat_name_to_id.put(cat_name, cat_id++);
    }

    for (Map.Entry<Integer, String> entry : idToCatName.entrySet()) {
        idToCategory.put(entry.getKey(), cat_name_to_id.get(entry.getValue()));
    }
}

From source file:edu.utsa.sifter.som.MainSOM.java

License:Apache License

public static void main(String[] args)
        throws IOException, InterruptedException, CorruptIndexException, NoSuchFieldException {
    final File evPath = new File(args[0]);
    final File idxPath = new File(evPath, "primary-idx");

    final long begin = System.currentTimeMillis();

    // createIndex(path);
    final Path outPath = new Path(new Path(evPath.toString()), "docVectors.seq");
    final Configuration hadoopConf = new Configuration();
    final LocalFileSystem fs = FileSystem.getLocal(hadoopConf);
    final SequenceFile.Writer file = SequenceFile.createWriter(fs, hadoopConf, outPath, LongWritable.class,
            IntArrayWritable.class);

    final DirectoryReader dirReader = DirectoryReader.open(FSDirectory.open(idxPath));

    final SifterConfig conf = new SifterConfig();
    InputStream xmlProps = null;/*from   w w  w . j a  v a 2  s  .c o  m*/
    try {
        xmlProps = new FileInputStream("sifter_props.xml");
    } catch (FileNotFoundException ex) {
        ; // swallow exeption
    }
    conf.loadFromXML(xmlProps); // safe with null

    final MainSOM builder = new MainSOM(dirReader, conf);
    IndexWriter writer = null;
    FileOutputStream somJSFile = null;
    try {
        builder.initTerms();
        builder.writeVectors(file);
        file.close();

        final SequenceFile.Reader seqRdr = new SequenceFile.Reader(fs, outPath, hadoopConf);
        writer = builder.createWriter(new File(evPath, "som-idx"), conf);

        somJSFile = new FileOutputStream(new File(evPath, "som.js"));
        final CharsetEncoder utf8 = Charset.forName("UTF-8").newEncoder();
        utf8.onMalformedInput(CodingErrorAction.IGNORE);
        final Writer somJS = new BufferedWriter(new OutputStreamWriter(somJSFile, utf8));
        builder.makeSOM(conf, seqRdr, writer, somJS);
        writer.forceMerge(1);
    } catch (Exception e) {
        e.printStackTrace(System.err);
    } finally {
        file.close();
        if (writer != null) {
            writer.close();
        }
        if (somJSFile != null) {
            somJSFile.close();
        }
        dirReader.close();

        System.out.println("Number of docs written: " + builder.getNumDocs());
        System.out.println("Number of outlier docs: " + builder.getNumOutliers());
        System.out.println("Total term dimensions: " + builder.getTermsMap().size());
        System.out.println("Max terms per doc: " + builder.getMaxDocTerms());
        System.out.println("Avg terms per doc: " + builder.getAvgDocTerms());
        System.out.println("Duration: " + ((System.currentTimeMillis() - begin) / 1000) + " seconds");

        conf.storeToXML(new FileOutputStream("sifter_props.xml"));
    }
}

From source file:example.TestLineRecordReader.java

License:Apache License

/**
 * Writes the input test file//from  w  ww. j  a v a 2  s.c o  m
 *
 * @param conf
 * @return Path of the file created
 * @throws IOException
 */
private Path createInputFile(Configuration conf, String data) throws IOException {
    FileSystem localFs = FileSystem.getLocal(conf);
    Path file = new Path(inputDir, "test.txt");
    Writer writer = new OutputStreamWriter(localFs.create(file));
    try {
        writer.write(data);
    } finally {
        writer.close();
    }
    return file;
}

From source file:fi.aalto.seqpig.filter.MappabilityFilter.java

License:Open Source License

public MappabilityFilter(String mappability_threshold_s) throws Exception {
    double mappability_threshold = Double.parseDouble(mappability_threshold_s);
    Configuration conf = UDFContext.getUDFContext().getJobConf();

    // see https://issues.apache.org/jira/browse/PIG-2576
    if (conf == null || conf.get("mapred.task.id") == null) {
        // we are running on the frontend
        //decodeSAMFileHeader();
        return;/*from ww w . ja v  a  2 s  . c  o m*/
    }

    if (samfileheader == null) {
        this.samfileheader = "";
        try {
            FileSystem fs = FileSystem.getLocal(conf);
            BufferedReader headerin = new BufferedReader(
                    new InputStreamReader(fs.open(new Path("input_asciiheader"))));

            while (true) {
                String str = headerin.readLine();
                if (str == null)
                    break;
                else
                    this.samfileheader += str + "\n";
            }
            headerin.close();

            if (this.samfileheader.equals("")) {
                int errCode = 0;
                String errMsg = "MappabilityFilter: unable to read samfileheader from distributed cache!";
                throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT);
            } else {
                String msg = "successfully read samfileheader";
                warn(msg, PigWarning.UDF_WARNING_1);
                this.samfileheader_decoded = getSAMFileHeader();
            }
        } catch (Exception e) {
            String errMsg = "MappabilityFilter: ERROR: could not read BAM header: " + e.toString();
            int errCode = 0;
            throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT);
        }
    }

    if (regions == null) {
        regions = new TreeMap<RegionEntry, Boolean>();
        try {
            FileSystem fs = FileSystem.getLocal(conf);
            // this was required before creating symlinks to files in the distributed cache
            /*Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);
            for (Path cachePath : cacheFiles) {
            String msg = "found cache file: "+cachePath.getName();
            warn(msg, PigWarning.UDF_WARNING_1);
            if (cachePath.getName().equals("input_regionfile")) {
            BufferedReader regionin = new BufferedReader(new InputStreamReader(fs.open( cachePath )));
            BufferedReader regionin = new BufferedReader(new InputStreamReader(fs.open(new Path(fs.getHomeDirectory(), new Path(regionfilename)))));'*/
            BufferedReader regionin = new BufferedReader(
                    new InputStreamReader(fs.open(new Path("./input_regionfile"))));
            regionin.readLine(); // throw away first line that describes colums 

            while (true) {
                String str = regionin.readLine();

                if (str == null)
                    break;
                else {
                    String[] region_data = str.split("\t");

                    if (region_data[0] == null || region_data[1] == null || region_data[2] == null
                            || region_data[3] == null) {
                        int errCode = 0;
                        String errMsg = "MappabilityFilter: Error while reading region file input";

                        throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT);
                    }

                    if (Double.parseDouble(region_data[3]) >= mappability_threshold) {
                        int start = parseCoordinate(region_data[1]);
                        int end = parseCoordinate(region_data[2]);

                        if (end < start) {
                            int errCode = 0;
                            String errMsg = "MappabilityFilter: Error while reading region file input";
                            throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT);
                        }

                        if (region_size < 0) {
                            region_size = end - start + 1;
                            warn("setting region size to " + Integer.toString(region_size),
                                    PigWarning.UDF_WARNING_1);
                        }

                        SAMSequenceRecord ref = samfileheader_decoded.getSequence(region_data[0]);
                        if (ref == null)
                            try {
                                ref = samfileheader_decoded.getSequence(Integer.parseInt(region_data[0]));
                            } catch (NumberFormatException e) {
                                warn(new String("unable to parse region entry!"), PigWarning.UDF_WARNING_1);
                            }
                        if (ref == null) {
                            int errCode = 0;
                            String errMsg = "MappabilityFilter: Unable find sequence record for region: " + str;

                            throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT);
                        }
                        RegionEntry e = new RegionEntry(ref.getSequenceIndex(), start, end);
                        regions.put(e, new Boolean(true));
                    }
                }
            }
            regionin.close();

            String msg = "successfully read region file with " + regions.size() + " regions";
            warn(msg, PigWarning.UDF_WARNING_1);
        } catch (Exception e) {
            String errMsg = "MappabilityFilter: ERROR: could not region file: " + e.toString();
            int errCode = 0;

            throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT);
        }
    }
}

From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java

License:Apache License

public void testOnlyGetEdgeOnce(InputFormatType type) throws Exception {
    String INSTANCE_NAME = "A";
    String tableName = "testOnlyGetEdgeOnce" + type.name();
    MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
    Connector connector = mockInstance.getConnector("root", new PasswordToken(""));
    connector.securityOperations().changeUserAuthorizations("root",
            new Authorizations(visibilityString1, visibilityString2));
    TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L);
    Set<GraphElementWithStatistics> data = getData();
    AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName);
    graph.setAuthorizations(new Authorizations(visibilityString1, visibilityString2));
    graph.addGraphElementsWithStatistics(data);

    // Set up local conf
    JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    FileSystem fs = FileSystem.getLocal(conf);
    Driver driver = new Driver(type);
    driver.setConf(conf);//from   w w w.j ava  2  s . c o  m

    // Create output folder for MapReduce job
    String outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    // Write properties file
    String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath();
    BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename));
    bw.write("accumulo.instance=" + INSTANCE_NAME + "\n");
    bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n");
    bw.write("accumulo.table=" + tableName + "\n");
    bw.write("accumulo.user=root\n");
    bw.write("accumulo.password=\n");
    bw.close();

    // Run job
    assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir }));

    // Read results in
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf);
    GraphElement element = new GraphElement();
    SetOfStatistics statistics = new SetOfStatistics();
    int count = 0;
    Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>();
    while (reader.next(element, statistics)) {
        count++;
        results.add(new GraphElementWithStatistics(element.clone(), statistics.clone()));
    }
    reader.close();
    // There should be 3 edges and 2 entities - 5 in total
    // Note need to check count (and not just compare sets) as the point of the test is to
    // ensure we don't get the same edge back twice
    assertEquals(5, count);
    Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>();
    Edge edge1 = new Edge("customer", "A", "product", "P", "purchase", "instore", true,
            visibilityString1 + "&" + visibilityString2, sevenDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics1 = new SetOfStatistics();
    statistics1.addStatistic("count", new Count(20));
    statistics1.addStatistic("anotherCount", new Count(1000000));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1));
    Edge edge2 = new Edge("customer", "A", "product", "P", "purchase", "instore", false, visibilityString2,
            sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics2 = new SetOfStatistics();
    statistics2.addStatistic("countSomething", new Count(123456));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge2), statistics2));
    Edge edge5 = new Edge("customer", "B", "product", "Q", "purchase", "instore", true, visibilityString2,
            sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics5 = new SetOfStatistics();
    statistics5.addStatistic("count", new Count(99));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5));
    Entity entity1 = new Entity("customer", "A", "purchase", "count", visibilityString1, sevenDaysBefore,
            sixDaysBefore);
    SetOfStatistics statisticsEntity1 = new SetOfStatistics();
    statisticsEntity1.addStatistic("entity_count", new Count(1000000));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity1), statisticsEntity1));
    Entity entity2 = new Entity("product", "R", "purchase", "count", visibilityString1, sevenDaysBefore,
            sixDaysBefore);
    SetOfStatistics statisticsEntity2 = new SetOfStatistics();
    statisticsEntity2.addStatistic("entity_count", new Count(12345));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity2), statisticsEntity2));

    assertEquals(results, expectedResults);
}

From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java

License:Apache License

public void testAuthsAreEnforced(InputFormatType type) throws Exception {
    String INSTANCE_NAME = "A";
    String tableName = "testAuthsAreEnforced" + type.name();
    MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
    Connector connector = mockInstance.getConnector("root", new PasswordToken(""));
    // Only give them permission to see visibilityString1
    connector.securityOperations().changeUserAuthorizations("root", new Authorizations(visibilityString1));
    TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L);
    Set<GraphElementWithStatistics> data = getData();
    AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName);
    graph.addGraphElementsWithStatistics(data);

    // Set up local conf
    JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    FileSystem fs = FileSystem.getLocal(conf);
    Driver driver = new Driver(type);
    driver.setConf(conf);//  www  . ja  v a 2  s .c o  m

    // Create output folder for MapReduce job
    String outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    // Write properties file
    String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath();
    BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename));
    bw.write("accumulo.instance=" + INSTANCE_NAME + "\n");
    bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n");
    bw.write("accumulo.table=" + tableName + "\n");
    bw.write("accumulo.user=root\n");
    bw.write("accumulo.password=\n");
    bw.close();

    // Run job
    assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir }));

    // Read results in
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf);
    GraphElement element = new GraphElement();
    SetOfStatistics statistics = new SetOfStatistics();
    Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>();
    int count = 0;
    while (reader.next(element, statistics)) {
        results.add(new GraphElementWithStatistics(element.clone(), statistics.clone()));
        count++;
    }
    reader.close();

    // There should be 1 edge and 2 entities - 3 in total
    assertEquals(3, count);
    Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>();
    Edge edge1 = new Edge("customer", "A", "product", "P", "purchase", "instore", true, visibilityString1,
            sevenDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics1 = new SetOfStatistics();
    statistics1.addStatistic("count", new Count(3));
    statistics1.addStatistic("anotherCount", new Count(1000000));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1));
    Entity entity1 = new Entity("customer", "A", "purchase", "count", visibilityString1, sevenDaysBefore,
            sixDaysBefore);
    SetOfStatistics statisticsEntity1 = new SetOfStatistics();
    statisticsEntity1.addStatistic("entity_count", new Count(1000000));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity1), statisticsEntity1));
    Entity entity2 = new Entity("product", "R", "purchase", "count", visibilityString1, sevenDaysBefore,
            sixDaysBefore);
    SetOfStatistics statisticsEntity2 = new SetOfStatistics();
    statisticsEntity2.addStatistic("entity_count", new Count(12345));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity2), statisticsEntity2));

    assertEquals(results, expectedResults);
}

From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java

License:Apache License

public void testAuthsCanBeRestrictedByUser(InputFormatType type) throws Exception {
    String INSTANCE_NAME = "A";
    String tableName = "testAuthsCanBeRestrictedByUser" + type.name();
    MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
    Connector connector = mockInstance.getConnector("root", new PasswordToken(""));
    // Give them permission to see visibilityString1 and 2
    connector.securityOperations().changeUserAuthorizations("root",
            new Authorizations(visibilityString1, visibilityString2));
    TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L);
    Set<GraphElementWithStatistics> data = getData();
    AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName);
    graph.addGraphElementsWithStatistics(data);

    // Set up local conf
    JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    FileSystem fs = FileSystem.getLocal(conf);
    // Choose to only see data with visibilityString1
    DriverRestrictedAuths driver = new DriverRestrictedAuths(type, new Authorizations(visibilityString1));
    driver.setConf(conf);//from w  ww .  j  a va 2 s  .co  m

    // Create output folder for MapReduce job
    String outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    // Write properties file
    String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath();
    BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename));
    bw.write("accumulo.instance=" + INSTANCE_NAME + "\n");
    bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n");
    bw.write("accumulo.table=" + tableName + "\n");
    bw.write("accumulo.user=root\n");
    bw.write("accumulo.password=\n");
    bw.close();

    // Run job
    assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir }));

    // Read results in
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf);
    GraphElement element = new GraphElement();
    SetOfStatistics statistics = new SetOfStatistics();
    Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>();
    int count = 0;
    while (reader.next(element, statistics)) {
        results.add(new GraphElementWithStatistics(element.clone(), statistics.clone()));
        count++;
    }
    reader.close();

    // There should be 1 edge and 2 entities - 3 in total
    assertEquals(3, count);
    Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>();
    Edge edge1 = new Edge("customer", "A", "product", "P", "purchase", "instore", true, visibilityString1,
            sevenDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics1 = new SetOfStatistics();
    statistics1.addStatistic("count", new Count(3));
    statistics1.addStatistic("anotherCount", new Count(1000000));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1));
    Entity entity1 = new Entity("customer", "A", "purchase", "count", visibilityString1, sevenDaysBefore,
            sixDaysBefore);
    SetOfStatistics statisticsEntity1 = new SetOfStatistics();
    statisticsEntity1.addStatistic("entity_count", new Count(1000000));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity1), statisticsEntity1));
    Entity entity2 = new Entity("product", "R", "purchase", "count", visibilityString1, sevenDaysBefore,
            sixDaysBefore);
    SetOfStatistics statisticsEntity2 = new SetOfStatistics();
    statisticsEntity2.addStatistic("entity_count", new Count(12345));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity2), statisticsEntity2));

    assertEquals(results, expectedResults);
}

From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java

License:Apache License

public void testPostRollUpTransformIsAppliedReadingWholeTable(InputFormatType type) throws Exception {
    String INSTANCE_NAME = "A";
    String tableName = "testPostRollUpTransformIsAppliedReadingWholeTable" + type.name();
    MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
    Connector connector = mockInstance.getConnector("root", new PasswordToken(""));
    connector.securityOperations().changeUserAuthorizations("root",
            new Authorizations(visibilityString1, visibilityString2));
    TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L);
    Set<GraphElementWithStatistics> data = getData();
    AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName);
    graph.addGraphElementsWithStatistics(data);

    // Add post roll-up transform
    String transformedSummaryType = "abc";

    // Set up local conf
    JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    FileSystem fs = FileSystem.getLocal(conf);
    Driver driver = new Driver(type, new ExampleTransform(transformedSummaryType));
    driver.setConf(conf);//ww  w.  ja v  a2 s  . co  m

    // Create output folder for MapReduce job
    String outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    // Write properties file
    String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath();
    BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename));
    bw.write("accumulo.instance=" + INSTANCE_NAME + "\n");
    bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n");
    bw.write("accumulo.table=" + tableName + "\n");
    bw.write("accumulo.user=root\n");
    bw.write("accumulo.password=\n");
    bw.close();

    // Run job
    assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir }));

    // Read results in
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf);
    GraphElement element = new GraphElement();
    SetOfStatistics statistics = new SetOfStatistics();
    Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>();
    while (reader.next(element, statistics)) {
        results.add(new GraphElementWithStatistics(element.clone(), statistics.clone()));
        assertEquals(transformedSummaryType, element.getSummaryType());
    }
    reader.close();

    // There should be 3 edges and 2 entities - 5 in total
    Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>();
    Edge edge1 = new Edge("customer", "A", "product", "P", transformedSummaryType, "instore", true,
            visibilityString1 + "&" + visibilityString2, sevenDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics1 = new SetOfStatistics();
    statistics1.addStatistic("count", new Count(20));
    statistics1.addStatistic("anotherCount", new Count(1000000));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1));
    Edge edge2 = new Edge("customer", "A", "product", "P", transformedSummaryType, "instore", false,
            visibilityString2, sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics2 = new SetOfStatistics();
    statistics2.addStatistic("countSomething", new Count(123456));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge2), statistics2));
    Edge edge5 = new Edge("customer", "B", "product", "Q", transformedSummaryType, "instore", true,
            visibilityString2, sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics5 = new SetOfStatistics();
    statistics5.addStatistic("count", new Count(99));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5));
    Entity entity1 = new Entity("customer", "A", transformedSummaryType, "count", visibilityString1,
            sevenDaysBefore, sixDaysBefore);
    SetOfStatistics statisticsEntity1 = new SetOfStatistics();
    statisticsEntity1.addStatistic("entity_count", new Count(1000000));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity1), statisticsEntity1));
    Entity entity2 = new Entity("product", "R", transformedSummaryType, "count", visibilityString1,
            sevenDaysBefore, sixDaysBefore);
    SetOfStatistics statisticsEntity2 = new SetOfStatistics();
    statisticsEntity2.addStatistic("entity_count", new Count(12345));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(entity2), statisticsEntity2));

    assertEquals(expectedResults, results);
}

From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java

License:Apache License

public void testPostRollUpTransformIsAppliedReadingDataForOneEntity(InputFormatType type) throws Exception {
    String INSTANCE_NAME = "A";
    String tableName = "testPostRollUpTransformIsAppliedReadingDataForOneEntity" + type.name();
    MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
    Connector connector = mockInstance.getConnector("root", new PasswordToken(""));
    connector.securityOperations().changeUserAuthorizations("root",
            new Authorizations(visibilityString1, visibilityString2));// TEMP _ CHECK
    TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L);
    Set<GraphElementWithStatistics> data = getData();
    AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName);
    graph.addGraphElementsWithStatistics(data);

    // Add post roll-up transform
    String transformedSummaryType = "abc";

    // Set up local conf
    JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    FileSystem fs = FileSystem.getLocal(conf);
    DriverForCertainTypeValues driver = new DriverForCertainTypeValues(type,
            new ExampleTransform(transformedSummaryType));
    driver.setConf(conf);/*from  www  .  j  av a2 s  .  c  o  m*/

    // Create output folder for MapReduce job
    String outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    // Write properties file
    String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath();
    BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename));
    bw.write("accumulo.instance=" + INSTANCE_NAME + "\n");
    bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n");
    bw.write("accumulo.table=" + tableName + "\n");
    bw.write("accumulo.user=root\n");
    bw.write("accumulo.password=\n");
    bw.close();

    // Run job
    assertEquals(0, driver.run(new String[] { accumuloPropertiesFilename, outputDir, "customer", "B" }));

    // Read results in
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf);
    GraphElement element = new GraphElement();
    SetOfStatistics statistics = new SetOfStatistics();
    Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>();
    while (reader.next(element, statistics)) {
        results.add(new GraphElementWithStatistics(element.clone(), statistics.clone()));
        assertEquals(transformedSummaryType, element.getSummaryType());
    }
    reader.close();

    // There should be 1 edge
    Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>();
    Edge edge5 = new Edge("customer", "B", "product", "Q", transformedSummaryType, "instore", true,
            visibilityString2, sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics5 = new SetOfStatistics();
    statistics5.addStatistic("count", new Count(99));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5));

    assertEquals(expectedResults, results);
}

From source file:gaffer.accumulo.inputformat.TestElementInputFormat.java

License:Apache License

public void testPostRollUpTransformIsAppliedReadingDataForOneRange(InputFormatType type) throws Exception {
    String INSTANCE_NAME = "A";
    String tableName = "testPostRollUpTransformIsAppliedReadingDataForOneRange" + type.name();
    MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
    Connector connector = mockInstance.getConnector("root", new PasswordToken(""));
    connector.securityOperations().changeUserAuthorizations("root",
            new Authorizations(visibilityString1, visibilityString2));
    TableUtils.createTable(connector, tableName, 30 * 24 * 60 * 60 * 1000L);
    Set<GraphElementWithStatistics> data = getData();
    AccumuloBackedGraph graph = new AccumuloBackedGraph(connector, tableName);
    graph.addGraphElementsWithStatistics(data);

    // Add post roll-up transform
    String transformedSummaryType = "abc";

    // Set up local conf
    JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    FileSystem fs = FileSystem.getLocal(conf);
    DriverForRanges driver = new DriverForRanges(type, new ExampleTransform(transformedSummaryType));
    driver.setConf(conf);//from   w  ww. ja  va 2  s.c  om

    // Create output folder for MapReduce job
    String outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    // Write properties file
    String accumuloPropertiesFilename = tempFolder.newFile().getAbsolutePath();
    BufferedWriter bw = new BufferedWriter(new FileWriter(accumuloPropertiesFilename));
    bw.write("accumulo.instance=" + INSTANCE_NAME + "\n");
    bw.write("accumulo.zookeepers=" + AccumuloConfig.MOCK_ZOOKEEPERS + "\n");
    bw.write("accumulo.table=" + tableName + "\n");
    bw.write("accumulo.user=root\n");
    bw.write("accumulo.password=\n");
    bw.close();

    // Run job
    assertEquals(0, driver
            .run(new String[] { accumuloPropertiesFilename, outputDir, "customer", "B", "customer", "B2" }));

    // Read results in
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputDir + "/part-m-00000"), conf);
    GraphElement element = new GraphElement();
    SetOfStatistics statistics = new SetOfStatistics();
    Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>();
    while (reader.next(element, statistics)) {
        results.add(new GraphElementWithStatistics(element.clone(), statistics.clone()));
        assertEquals(transformedSummaryType, element.getSummaryType());
    }
    reader.close();

    // There should be 1 edge
    Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>();
    Edge edge5 = new Edge("customer", "B", "product", "Q", transformedSummaryType, "instore", true,
            visibilityString2, sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics5 = new SetOfStatistics();
    statistics5.addStatistic("count", new Count(99));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5));

    assertEquals(results, expectedResults);
}