Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf()

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java

License:Apache License

@Test
public void testOneFile() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile = new Path(dir, "file1.txt");

    writeSequenceFile(inputFile);/*from  w  w w  .j av a 2 s . co  m*/

    Job job = new Job(new JobConf());

    FileInputFormat.addInputPath(job, inputFile);

    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size());

    TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0);
    Configuration conf1 = new Configuration();
    TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId);

    RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1);
    rr.initialize(splits.get(0), context1);
    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertFalse(rr.nextKeyValue());
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java

License:Apache License

@Test
public void testTwoFiles() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile1 = new Path(dir, "file1.txt");
    Path inputFile2 = new Path(dir, "file2.txt");

    writeSequenceFile(inputFile1);/*from w ww  .  j  a  va2  s  .co m*/
    writeSequenceFile(inputFile2);

    Job job = new Job(new JobConf());

    FileInputFormat.addInputPath(job, inputFile1);
    FileInputFormat.addInputPath(job, inputFile2);

    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size());

    TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0);
    Configuration conf1 = new Configuration();
    TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId);

    RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1);
    rr.initialize(splits.get(0), context1);
    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertEquals(0.5f, rr.getProgress(), 0.1);

    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertFalse(rr.nextKeyValue());
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.bah.culvert.CulvertOutputFormatTest.java

License:Apache License

@Ignore(value = "Filed as bug. #375")
@Test//w  w w . j  a v a 2s  . c  o m
public void testBasicOperation() throws Throwable {
    CulvertOutputFormat format = new CulvertOutputFormat();
    // the client only works with configurations
    JobConf conf = new JobConf();
    Client.setDatabaseAdapter(conf, InMemoryDB.class);
    /*
     * most of the stuff we set in the table properties because we use the
     * jobconf earlier for input stuff
     */
    Properties tblProps = CulvertHiveUtils.confToProps(conf);
    InMemoryDB db = new InMemoryDB();
    CColumn col = new CColumn("foo".getBytes(), "bar".getBytes());
    db.create("baz", Arrays.asList(col));
    CulvertHiveUtils.setCulvertConfigurationIsEmbedded(tblProps, true);
    CulvertHiveUtils.setCulvertTable(tblProps, "baz");
    final int[] i = { 0 };
    Progressable progress = new Progressable() {

        @Override
        public void progress() {
            i[0]++;
        }
    };
    RecordWriter writer = format.getHiveRecordWriter(conf, null, Put.class, true, tblProps, progress);
    writer.write(new Put(new CKeyValue("a".getBytes(), "b".getBytes(), "c".getBytes(), "d".getBytes())));
    Assert.assertEquals(1, i[0]);

    SeekingCurrentIterator it = db.getTableAdapter("baz").get(new Get(new CRange("a".getBytes())));
    // this is failing - looks like stuff has been put but isn't coming out of
    // the get
    Assert.assertTrue("Iterator should have a next value", it.hasNext());
    Result next = it.next();
    Assert.assertTrue("Result row should be 'a' byte equivalent",
            Arrays.equals("a".getBytes(), next.getRecordId()));
    Assert.assertTrue("Result should be ",
            Arrays.equals("d".getBytes(), next.getValue("b".getBytes(), "c".getBytes()).getValue()));
}

From source file:com.benchmark.mapred.terasort.TeraGen.java

License:Apache License

public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new JobConf(), new TeraGen(), args);
    System.exit(res);//from   w  ww.ja v  a 2s. c o  m
}

From source file:com.benchmark.mapred.terasort.TeraSort.java

License:Apache License

/**
 * @param args/*from w  w w.j a  v a  2 s  .c  o m*/
 */
public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new JobConf(), new TeraSort(), args);
    System.exit(res);
}

From source file:com.benchmark.mapred.terasort.TeraValidate.java

License:Apache License

/**
 * @param args//from  w  w w .  j ava2 s. c om
 */
public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new JobConf(), new TeraValidate(), args);
    System.exit(res);
}

From source file:com.bosscs.spark.mongodb.config.MongoDeepJobConfig.java

License:Apache License

/**
 * {@inheritDoc}/* ww  w.  j a va2  s.c o m*/
 */
@Override
public MongoDeepJobConfig<T> initialize() {
    validate();
    super.initialize();
    configHadoop = new JobConf();
    configHadoop = new Configuration();
    StringBuilder connection = new StringBuilder();

    connection.append("mongodb").append(":").append("//");

    if (username != null && password != null) {
        connection.append(username).append(":").append(password).append("@");
    }

    boolean firstHost = true;
    for (String hostName : host) {
        if (!firstHost) {
            connection.append(",");
        }
        connection.append(hostName);
        firstHost = false;
    }

    connection.append("/").append(catalog).append(".").append(table);

    StringBuilder options = new StringBuilder();
    boolean asignado = false;

    if (readPreference != null) {
        asignado = true;
        options.append("?readPreference=").append(readPreference);
    }

    if (replicaSet != null) {
        if (asignado) {
            options.append("&");
        } else {
            options.append("?");
        }
        options.append("replicaSet=").append(replicaSet);
    }

    connection.append(options);

    configHadoop.set(MongoConfigUtil.INPUT_URI, connection.toString());

    configHadoop.set(MongoConfigUtil.OUTPUT_URI, connection.toString());

    configHadoop.set(MongoConfigUtil.INPUT_SPLIT_SIZE, String.valueOf(splitSize));

    if (inputKey != null) {
        configHadoop.set(MongoConfigUtil.INPUT_KEY, inputKey);
    }

    configHadoop.set(MongoConfigUtil.SPLITS_USE_SHARDS, String.valueOf(useShards));

    configHadoop.set(MongoConfigUtil.CREATE_INPUT_SPLITS, String.valueOf(createInputSplit));

    configHadoop.set(MongoConfigUtil.SPLITS_USE_CHUNKS, String.valueOf(splitsUseChunks));

    if (query != null) {
        configHadoop.set(MongoConfigUtil.INPUT_QUERY, query.toString());
    }

    if (fields != null) {
        configHadoop.set(MongoConfigUtil.INPUT_FIELDS, fields.toString());
    }

    if (sort != null) {
        configHadoop.set(MongoConfigUtil.INPUT_SORT, sort);
    }

    if (username != null && password != null) {
        configHadoop.set(MongoConfigUtil.INPUT_URI, connection.toString());
    }

    if (customConfiguration != null) {
        Set<Map.Entry<String, Serializable>> set = customConfiguration.entrySet();
        Iterator<Map.Entry<String, Serializable>> iterator = set.iterator();
        while (iterator.hasNext()) {
            Map.Entry<String, Serializable> entry = iterator.next();
            configHadoop.set(entry.getKey(), entry.getValue().toString());
        }
    }

    return this;
}

From source file:com.cloudera.knittingboar.records.TestTwentyNewsgroupsCustomRecordParseOLRRun.java

License:Apache License

@Before
public void setup() throws Exception {
    defaultConf = new JobConf();
    defaultConf.set("fs.defaultFS", "file:///");
    localFs = FileSystem.getLocal(defaultConf);
    inputFileName = "kboar-shard-0.txt";
    baseDir = Files.createTempDir();
    File inputFile = new File(baseDir, inputFileName);
    TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile);
    workDir = new Path(baseDir.getAbsolutePath());
}

From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndSingleWorker.java

License:Apache License

@Before
public void setup() throws Exception {
    defaultConf = new JobConf();
    defaultConf.set("fs.defaultFS", "file:///");
    localFs = FileSystem.getLocal(defaultConf);
    inputFileName = "kboar-shard-0.txt";
    baseDir = Files.createTempDir();
    File inputFile = new File(baseDir, inputFileName);
    TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile);
    workDir = new Path(baseDir.getAbsolutePath());
    configuration = new Configuration();
    // feature vector size
    configuration.setInt("com.cloudera.knittingboar.setup.FeatureVectorSize", 10000);
    configuration.setInt("com.cloudera.knittingboar.setup.numCategories", 20);
    // local input split path
    configuration.set("com.cloudera.knittingboar.setup.LocalInputSplitPath", "hdfs://127.0.0.1/input/0");
    configuration.set("com.cloudera.knittingboar.setup.RecordFactoryClassname",
            "com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory");
    /*//from   ww  w .  j  ava  2  s  .  c om
     * // predictor label names c.set(
     * "com.cloudera.knittingboar.setup.PredictorLabelNames", "x,y" ); //
     * predictor var types c.set(
     * "com.cloudera.knittingboar.setup.PredictorVariableTypes",
     * "numeric,numeric" ); // target variables c.set(
     * "com.cloudera.knittingboar.setup.TargetVariableName", "color" ); //
     * column header names c.set(
     * "com.cloudera.knittingboar.setup.ColumnHeaderNames",
     * "x,y,shape,color,k,k0,xx,xy,yy,a,b,c,bias" ); //c.set(
     * "com.cloudera.knittingboar.setup.ColumnHeaderNames",
     * "\"x\",\"y\",\"shape\",\"color\",\"k\",\"k0\",\"xx\",\"xy\",\"yy\",\"a\",\"b\",\"c\",\"bias\"\n"
     * );
     */
}

From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndTwoWorkers.java

License:Apache License

@Before
public void setup() throws Exception {
    defaultConf = new JobConf();
    defaultConf.set("fs.defaultFS", "file:///");
    localFs = FileSystem.getLocal(defaultConf);
    inputFileName = "kboar-shard-0.txt";
    baseDir = Files.createTempDir();
    File inputFile = new File(baseDir, inputFileName);
    TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile);
    workDir = new Path(baseDir.getAbsolutePath());
    configuration = new Configuration();
    // feature vector size
    configuration.setInt("com.cloudera.knittingboar.setup.FeatureVectorSize", 10000);
    configuration.setInt("com.cloudera.knittingboar.setup.numCategories", 20);
    configuration.setInt("com.cloudera.knittingboar.setup.BatchSize", 200);
    // local input split path
    configuration.set("com.cloudera.knittingboar.setup.LocalInputSplitPath", "hdfs://127.0.0.1/input/0");
    configuration.set("com.cloudera.knittingboar.setup.RecordFactoryClassname",
            "com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory");
    /*    // predictor label names
        c.set( "com.cloudera.knittingboar.setup.PredictorLabelNames", "x,y" );
            // ww w.  j  a va2 s. c  o m
        // predictor var types
        c.set( "com.cloudera.knittingboar.setup.PredictorVariableTypes", "numeric,numeric" );
                
        // target variables
        c.set( "com.cloudera.knittingboar.setup.TargetVariableName", "color" );
            
        // column header names
        c.set( "com.cloudera.knittingboar.setup.ColumnHeaderNames", "x,y,shape,color,k,k0,xx,xy,yy,a,b,c,bias" );
        //c.set( "com.cloudera.knittingboar.setup.ColumnHeaderNames", "\"x\",\"y\",\"shape\",\"color\",\"k\",\"k0\",\"xx\",\"xy\",\"yy\",\"a\",\"b\",\"c\",\"bias\"\n" );
     */
}