Example usage for org.apache.hadoop.mapred JobConf set

List of usage examples for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:gaffer.accumulo.TestAccumuloBackedGraphUpdatingConf.java

License:Apache License

/**
 * Tests that calling {@link AccumuloBackedGraph#setOutgoingEdgesOnly} is correctly applied to
 * confs when a query range is specified.
 *
 * @throws Exception//from  w w w.ja v a2s.  c o m
 */
@Test
public void testSetConfigurationOutgoingEdgesOnlyQuery() throws Exception {
    String instanceName = "testSetConfigurationOutgoingEdgesOnlyQuery";
    AccumuloBackedGraph graph = setUpGraphAndMockAccumulo(instanceName);
    AccumuloConfig accumuloConfig = setUpAccumuloConfig(instanceName);

    // First query for customer|B: should find the edge customer|B -> product|Q as that
    // is outgoing from B
    JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    graph.setOutgoingEdgesOnly();
    graph.setConfiguration(conf, new TypeValue("customer", "B"), accumuloConfig);
    FileSystem fs = FileSystem.getLocal(conf);

    // Run
    Driver driver = new Driver();
    driver.setConf(conf);
    String outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    assertEquals(0, driver.run(new String[] { outputDir }));

    // Read results in
    Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>();
    int count = readResults(fs, new Path(outputDir), results);

    // There should be 1 edge
    assertEquals(1, count);

    Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>();

    Edge edge5 = new Edge("customer", "B", "product", "Q", "purchase", "instore", true, visibilityString2,
            sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics5 = new SetOfStatistics();
    statistics5.addStatistic("count", new Count(99));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5));

    assertEquals(expectedResults, results);

    // Now query for product|Q: should find no edges (as there are no outgoing edges from Q)
    conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    graph.setOutgoingEdgesOnly();
    graph.setConfiguration(conf, new TypeValue("product", "Q"), accumuloConfig);

    // Run
    driver = new Driver();
    driver.setConf(conf);
    outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    assertEquals(0, driver.run(new String[] { outputDir }));

    // Read results in
    results = new HashSet<GraphElementWithStatistics>();
    count = readResults(fs, new Path(outputDir), results);

    // There should be no results
    assertEquals(0, count);
}

From source file:gaffer.accumulo.TestAccumuloBackedGraphUpdatingConf.java

License:Apache License

@Test
public void testSetConfigurationOutgoingEdgesOnlyQueryFromRanges() throws Exception {
    String instanceName = "testSetConfigurationOutgoingEdgesOnlyQueryFromRanges";
    AccumuloBackedGraph graph = setUpGraphAndMockAccumulo(instanceName);
    AccumuloConfig accumuloConfig = setUpAccumuloConfig(instanceName);

    // First query for range of all customers - should find edges A->B, A->P
    JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    graph.setReturnEdgesOnly();//  ww w . j  a v  a  2  s. co  m
    graph.setOutgoingEdgesOnly();
    graph.setConfigurationFromRanges(conf, new TypeValueRange("customer", "", "customer", "Z"), accumuloConfig);
    FileSystem fs = FileSystem.getLocal(conf);

    // Run
    Driver driver = new Driver();
    driver.setConf(conf);
    String outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    assertEquals(0, driver.run(new String[] { outputDir }));

    // Read results in
    Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>();
    int count = readResults(fs, new Path(outputDir), results);

    // There should be 3 edges
    assertEquals(3, count);

    Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>();

    Edge edge1 = new Edge("customer", "A", "product", "P", "purchase", "instore", true, visibilityString1,
            sevenDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics1 = new SetOfStatistics();
    statistics1.addStatistic("count", new Count(20));
    statistics1.addStatistic("anotherCount", new Count(1000000));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge1), statistics1));
    Edge edge4 = new Edge("customer", "A", "product", "P", "purchase", "instore", false, visibilityString2,
            sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics4 = new SetOfStatistics();
    statistics4.addStatistic("countSomething", new Count(123456));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge4), statistics4));
    Edge edge5 = new Edge("customer", "B", "product", "Q", "purchase", "instore", true, visibilityString2,
            sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics5 = new SetOfStatistics();
    statistics5.addStatistic("count", new Count(99));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5));

    assertEquals(expectedResults, results);

    // Now query for all products should find no edges (as there are no outgoing directed edges from products)
    conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    graph.setReturnEdgesOnly();
    graph.setDirectedEdgesOnly();
    graph.setOutgoingEdgesOnly();
    graph.setConfigurationFromRanges(conf, new TypeValueRange("product", "", "product", "Z"), accumuloConfig);

    // Run
    driver = new Driver();
    driver.setConf(conf);
    outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    assertEquals(0, driver.run(new String[] { outputDir }));

    // Read results in
    results = new HashSet<GraphElementWithStatistics>();
    count = readResults(fs, new Path(outputDir), results);

    // There should be no results
    assertEquals(0, count);
}

From source file:gaffer.accumulo.TestAccumuloBackedGraphUpdatingConf.java

License:Apache License

@Test
public void testSetConfigurationOutgoingEdgesOnlyQueryFromPairs() throws Exception {
    String instanceName = "testSetConfigurationOutgoingEdgesOnlyQueryFromPairs";
    AccumuloBackedGraph graph = setUpGraphAndMockAccumulo(instanceName);
    AccumuloConfig accumuloConfig = setUpAccumuloConfig(instanceName);

    // First query for pair customer|B, product|Q - should find edge B->Q
    JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    graph.setOutgoingEdgesOnly();//  ww  w  . j  a v a 2 s .  co  m
    graph.setConfigurationFromPairs(conf,
            new Pair<TypeValue>(new TypeValue("customer", "B"), new TypeValue("product", "Q")), accumuloConfig);
    FileSystem fs = FileSystem.getLocal(conf);

    // Run
    Driver driver = new Driver();
    driver.setConf(conf);
    String outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    assertEquals(0, driver.run(new String[] { outputDir }));

    // Read results in
    Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>();
    int count = readResults(fs, new Path(outputDir), results);

    // There should be 1 edge
    assertEquals(1, count);

    Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>();

    Edge edge5 = new Edge("customer", "B", "product", "Q", "purchase", "instore", true, visibilityString2,
            sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics5 = new SetOfStatistics();
    statistics5.addStatistic("count", new Count(99));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5));

    assertEquals(expectedResults, results);

    // Now query for pair product|Q, customer|B - shouldn't find any edges
    conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    graph.setOutgoingEdgesOnly();
    graph.setConfigurationFromPairs(conf,
            new Pair<TypeValue>(new TypeValue("product", "Q"), new TypeValue("customer", "B")), accumuloConfig);

    // Run
    driver = new Driver();
    driver.setConf(conf);
    outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    assertEquals(0, driver.run(new String[] { outputDir }));

    // Read results in
    results = new HashSet<GraphElementWithStatistics>();
    count = readResults(fs, new Path(outputDir), results);

    // There should be no results
    assertEquals(0, count);
}

From source file:gaffer.accumulo.TestAccumuloBackedGraphUpdatingConf.java

License:Apache License

@Test
public void testSetConfigurationIncomingEdgesOnlyQueryFromPairs() throws Exception {
    String instanceName = "testSetConfigurationIncomingEdgesOnlyQueryFromPairs";
    AccumuloBackedGraph graph = setUpGraphAndMockAccumulo(instanceName);
    AccumuloConfig accumuloConfig = setUpAccumuloConfig(instanceName);

    // First query for pair product|Q, customer|B - should find edge B->Q
    JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    graph.setIncomingEdgesOnly();/* w w w .j  a v a2 s.c  o  m*/
    graph.setConfigurationFromPairs(conf,
            new Pair<TypeValue>(new TypeValue("product", "Q"), new TypeValue("customer", "B")), accumuloConfig);
    FileSystem fs = FileSystem.getLocal(conf);

    // Run
    Driver driver = new Driver();
    driver.setConf(conf);
    String outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    assertEquals(0, driver.run(new String[] { outputDir }));

    // Read results in
    Set<GraphElementWithStatistics> results = new HashSet<GraphElementWithStatistics>();
    int count = readResults(fs, new Path(outputDir), results);

    // There should be 1 edge
    assertEquals(1, count);

    Set<GraphElementWithStatistics> expectedResults = new HashSet<GraphElementWithStatistics>();

    Edge edge5 = new Edge("customer", "B", "product", "Q", "purchase", "instore", true, visibilityString2,
            sixDaysBefore, fiveDaysBefore);
    SetOfStatistics statistics5 = new SetOfStatistics();
    statistics5.addStatistic("count", new Count(99));
    expectedResults.add(new GraphElementWithStatistics(new GraphElement(edge5), statistics5));

    assertEquals(expectedResults, results);

    // Now query for pair customer|B, product|Q - shouldn't find any edges
    conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    graph.setIncomingEdgesOnly();
    graph.setConfigurationFromPairs(conf,
            new Pair<TypeValue>(new TypeValue("customer", "B"), new TypeValue("product", "Q")), accumuloConfig);

    // Run
    driver = new Driver();
    driver.setConf(conf);
    outputDir = tempFolder.newFolder().getAbsolutePath();
    FileUtils.deleteDirectory(outputDir);

    assertEquals(0, driver.run(new String[] { outputDir }));

    // Read results in
    results = new HashSet<GraphElementWithStatistics>();
    count = readResults(fs, new Path(outputDir), results);

    // There should be no results
    assertEquals(0, count);
}

From source file:gaffer.accumulostore.inputformat.InputFormatTest.java

License:Apache License

private void shouldReturnCorrectDataToMapReduceJob(final KeyPackage kp, final View view,
        final String instanceName, final Set<String> expectedResults) throws Exception {
    final AccumuloStore store = new MockAccumuloStore();
    final Schema schema = Schema.fromJson(StreamUtil.schemas(getClass()));
    final AccumuloProperties properties = AccumuloProperties
            .loadStoreProperties(StreamUtil.storeProps(getClass()));
    switch (kp) {
    case BYTE_ENTITY_KEY_PACKAGE:
        properties.setKeyPackageClass(ByteEntityKeyPackage.class.getName());
        properties.setInstanceName(instanceName + "_BYTE_ENTITY");
        break;//  w w  w. ja  va 2  s.c o  m
    case CLASSIC_KEY_PACKAGE:
        properties.setKeyPackageClass(ClassicKeyPackage.class.getName());
        properties.setInstanceName(instanceName + "_CLASSIC");
    }
    try {
        store.initialise(schema, properties);
    } catch (StoreException e) {
        fail("StoreException thrown: " + e);
    }
    setupGraph(store);

    // Set up local conf
    final JobConf conf = new JobConf();
    conf.set("fs.default.name", "file:///");
    conf.set("mapred.job.tracker", "local");
    final FileSystem fs = FileSystem.getLocal(conf);

    // Update configuration with instance, table name, etc.
    store.updateConfiguration(conf, view);

    // Run Driver
    final File outputFolder = testFolder.newFolder();
    FileUtils.deleteDirectory(outputFolder);
    final Driver driver = new Driver(outputFolder.getAbsolutePath());
    driver.setConf(conf);
    driver.run(new String[] {});

    // Read results and check correct
    final SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(outputFolder + "/part-m-00000"),
            conf);
    final Text text = new Text();
    final Set<String> results = new HashSet<>();
    while (reader.next(text)) {
        results.add(text.toString());
    }
    reader.close();
    assertEquals(expectedResults, results);
    FileUtils.deleteDirectory(outputFolder);
}

From source file:gaffer.accumulostore.integration.AddElementsFromHdfsIT.java

License:Apache License

private JobConf createLocalConf() throws StoreException {
    // Set up local conf
    final JobConf conf = new JobConf();
    conf.set("fs.defaultFS", "file:///");
    conf.set("mapreduce.jobtracker.address", "local");

    return conf;/*from ww w  .j  ava2  s.  c  o  m*/
}

From source file:gaffer.accumulostore.operation.hdfs.handler.job.AccumuloAddElementsFromHdfsJobFactory.java

License:Apache License

@Override
protected void setupJobConf(final JobConf jobConf, final AddElementsFromHdfs operation, final Store store)
        throws IOException {
    super.setupJobConf(jobConf, operation, store);
    jobConf.set(AccumuloStoreConstants.ACCUMULO_ELEMENT_CONVERTER_CLASS,
            ((AccumuloStore) store).getKeyPackage().getKeyConverter().getClass().getName());
}

From source file:gaffer.accumulostore.operation.hdfs.handler.job.factory.SampleDataForSplitPointsJobFactory.java

License:Apache License

protected void setupJobConf(final JobConf jobConf, final SampleDataForSplitPoints operation, final Store store)
        throws IOException {
    jobConf.set(SCHEMA, new String(store.getSchema().toJson(false), CommonConstants.UTF_8));
    jobConf.set(MAPPER_GENERATOR, operation.getMapperGeneratorClassName());
    jobConf.set(VALIDATE, String.valueOf(operation.isValidate()));
    jobConf.set(PROPORTION_TO_SAMPLE, String.valueOf(operation.getProportionToSample()));
    jobConf.set(AccumuloStoreConstants.ACCUMULO_ELEMENT_CONVERTER_CLASS,
            ((AccumuloStore) store).getKeyPackage().getKeyConverter().getClass().getName());
    Integer numTasks = operation.getNumMapTasks();
    if (null != numTasks) {
        jobConf.setNumMapTasks(numTasks);
    }/* w  w w  .j a v a  2  s  .c  om*/
    jobConf.setNumReduceTasks(1);
}

From source file:gaffer.accumulostore.operation.hdfs.handler.job.SampleDataForSplitPointsJobFactory.java

License:Apache License

protected void setupJobConf(final JobConf jobConf, final SampleDataForSplitPoints operation, final Store store)
        throws IOException {
    jobConf.set(SCHEMA, new String(store.getSchema().toJson(false), CommonConstants.UTF_8));
    jobConf.set(MAPPER_GENERATOR, operation.getMapperGeneratorClassName());
    jobConf.set(VALIDATE, String.valueOf(operation.isValidate()));
    jobConf.set(PROPORTION_TO_SAMPLE, String.valueOf(operation.getProportionToSample()));
    Integer numTasks = operation.getNumMapTasks();
    if (null != numTasks) {
        jobConf.setNumMapTasks(numTasks);
    }//from   www .ja v a 2  s.  co  m
    numTasks = operation.getNumReduceTasks();
    if (null != numTasks) {
        jobConf.setNumReduceTasks(numTasks);
    }
}

From source file:gaffer.operation.simple.hdfs.handler.AbstractAddElementsFromHdfsJobFactory.java

License:Apache License

protected void setupJobConf(final JobConf jobConf, final AddElementsFromHdfs operation, final Store store)
        throws IOException {
    jobConf.set(DATA_SCHEMA, new String(store.getDataSchema().toJson(false), UTF_8_CHARSET));
    jobConf.set(STORE_SCHEMA, new String(store.getStoreSchema().toJson(false), UTF_8_CHARSET));
    jobConf.set(MAPPER_GENERATOR, operation.getMapperGeneratorClassName());
    jobConf.set(VALIDATE, String.valueOf(operation.isValidate()));
    Integer numTasks = operation.getNumMapTasks();
    if (null != numTasks) {
        jobConf.setNumMapTasks(numTasks);
    }//from ww  w.j av  a2 s  .c  o m
    numTasks = operation.getNumReduceTasks();
    if (null != numTasks) {
        jobConf.setNumReduceTasks(numTasks);
    }
}