Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf() 

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java

License:Apache License

@Test
public void testOneFile() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile = new Path(dir, "file1.txt");

    writeSequenceFile(inputFile);/*from  w  w w  .j av a 2 s . co  m*/

    Job job = new Job(new JobConf());

    FileInputFormat.addInputPath(job, inputFile);

    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size());

    TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0);
    Configuration conf1 = new Configuration();
    TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId);

    RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1);
    rr.initialize(splits.get(0), context1);
    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertFalse(rr.nextKeyValue());
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java

License:Apache License

@Test
public void testTwoFiles() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile1 = new Path(dir, "file1.txt");
    Path inputFile2 = new Path(dir, "file2.txt");

    writeSequenceFile(inputFile1);/*from w ww  .  j  a  va2  s  .co m*/
    writeSequenceFile(inputFile2);

    Job job = new Job(new JobConf());

    FileInputFormat.addInputPath(job, inputFile1);
    FileInputFormat.addInputPath(job, inputFile2);

    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size());

    TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0);
    Configuration conf1 = new Configuration();
    TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId);

    RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1);
    rr.initialize(splits.get(0), context1);
    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertEquals(0.5f, rr.getProgress(), 0.1);

    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertFalse(rr.nextKeyValue());
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.bah.culvert.CulvertOutputFormatTest.java

License:Apache License

@Ignore(value = "Filed as bug. #375")
@Test//w  w w . j  a v a 2s  . c  o m
public void testBasicOperation() throws Throwable {
    CulvertOutputFormat format = new CulvertOutputFormat();
    // the client only works with configurations
    JobConf conf = new JobConf();
    Client.setDatabaseAdapter(conf, InMemoryDB.class);
    /*
     * most of the stuff we set in the table properties because we use the
     * jobconf earlier for input stuff
     */
    Properties tblProps = CulvertHiveUtils.confToProps(conf);
    InMemoryDB db = new InMemoryDB();
    CColumn col = new CColumn("foo".getBytes(), "bar".getBytes());
    db.create("baz", Arrays.asList(col));
    CulvertHiveUtils.setCulvertConfigurationIsEmbedded(tblProps, true);
    CulvertHiveUtils.setCulvertTable(tblProps, "baz");
    final int[] i = { 0 };
    Progressable progress = new Progressable() {

        @Override
        public void progress() {
            i[0]++;
        }
    };
    RecordWriter writer = format.getHiveRecordWriter(conf, null, Put.class, true, tblProps, progress);
    writer.write(new Put(new CKeyValue("a".getBytes(), "b".getBytes(), "c".getBytes(), "d".getBytes())));
    Assert.assertEquals(1, i[0]);

    SeekingCurrentIterator it = db.getTableAdapter("baz").get(new Get(new CRange("a".getBytes())));
    // this is failing - looks like stuff has been put but isn't coming out of
    // the get
    Assert.assertTrue("Iterator should have a next value", it.hasNext());
    Result next = it.next();
    Assert.assertTrue("Result row should be 'a' byte equivalent",
            Arrays.equals("a".getBytes(), next.getRecordId()));
    Assert.assertTrue("Result should be ",
            Arrays.equals("d".getBytes(), next.getValue("b".getBytes(), "c".getBytes()).getValue()));
}

From source file:com.benchmark.mapred.terasort.TeraGen.java

License:Apache License

public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new JobConf(), new TeraGen(), args);
    System.exit(res);//from   w  ww.ja v  a 2s. c o  m
}

From source file:com.benchmark.mapred.terasort.TeraSort.java

License:Apache License

/**
 * @param args/*from w  w w.j a  v a  2 s  .c  o m*/
 */
public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new JobConf(), new TeraSort(), args);
    System.exit(res);
}

From source file:com.benchmark.mapred.terasort.TeraValidate.java

License:Apache License

/**
 * @param args//from  w  w w .  j ava2 s. c om
 */
public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new JobConf(), new TeraValidate(), args);
    System.exit(res);
}

From source file:com.bosscs.spark.mongodb.config.MongoDeepJobConfig.java

License:Apache License

/**
 * {@inheritDoc}/* ww  w.  j a va2  s.c o m*/
 */
@Override
public MongoDeepJobConfig<T> initialize() {
    validate();
    super.initialize();
    configHadoop = new JobConf();
    configHadoop = new Configuration();
    StringBuilder connection = new StringBuilder();

    connection.append("mongodb").append(":").append("//");

    if (username != null && password != null) {
        connection.append(username).append(":").append(password).append("@");
    }

    boolean firstHost = true;
    for (String hostName : host) {
        if (!firstHost) {
            connection.append(",");
        }
        connection.append(hostName);
        firstHost = false;
    }

    connection.append("/").append(catalog).append(".").append(table);

    StringBuilder options = new StringBuilder();
    boolean asignado = false;

    if (readPreference != null) {
        asignado = true;
        options.append("?readPreference=").append(readPreference);
    }

    if (replicaSet != null) {
        if (asignado) {
            options.append("&");
        } else {
            options.append("?");
        }
        options.append("replicaSet=").append(replicaSet);
    }

    connection.append(options);

    configHadoop.set(MongoConfigUtil.INPUT_URI, connection.toString());

    configHadoop.set(MongoConfigUtil.OUTPUT_URI, connection.toString());

    configHadoop.set(MongoConfigUtil.INPUT_SPLIT_SIZE, String.valueOf(splitSize));

    if (inputKey != null) {
        configHadoop.set(MongoConfigUtil.INPUT_KEY, inputKey);
    }

    configHadoop.set(MongoConfigUtil.SPLITS_USE_SHARDS, String.valueOf(useShards));

    configHadoop.set(MongoConfigUtil.CREATE_INPUT_SPLITS, String.valueOf(createInputSplit));

    configHadoop.set(MongoConfigUtil.SPLITS_USE_CHUNKS, String.valueOf(splitsUseChunks));

    if (query != null) {
        configHadoop.set(MongoConfigUtil.INPUT_QUERY, query.toString());
    }

    if (fields != null) {
        configHadoop.set(MongoConfigUtil.INPUT_FIELDS, fields.toString());
    }

    if (sort != null) {
        configHadoop.set(MongoConfigUtil.INPUT_SORT, sort);
    }

    if (username != null && password != null) {
        configHadoop.set(MongoConfigUtil.INPUT_URI, connection.toString());
    }

    if (customConfiguration != null) {
        Set<Map.Entry<String, Serializable>> set = customConfiguration.entrySet();
        Iterator<Map.Entry<String, Serializable>> iterator = set.iterator();
        while (iterator.hasNext()) {
            Map.Entry<String, Serializable> entry = iterator.next();
            configHadoop.set(entry.getKey(), entry.getValue().toString());
        }
    }

    return this;
}

From source file:com.cloudera.knittingboar.records.TestTwentyNewsgroupsCustomRecordParseOLRRun.java

License:Apache License

@Before
public void setup() throws Exception {
    defaultConf = new JobConf();
    defaultConf.set("fs.defaultFS", "file:///");
    localFs = FileSystem.getLocal(defaultConf);
    inputFileName = "kboar-shard-0.txt";
    baseDir = Files.createTempDir();
    File inputFile = new File(baseDir, inputFileName);
    TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile);
    workDir = new Path(baseDir.getAbsolutePath());
}

From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndSingleWorker.java

License:Apache License

@Before
public void setup() throws Exception {
    defaultConf = new JobConf();
    defaultConf.set("fs.defaultFS", "file:///");
    localFs = FileSystem.getLocal(defaultConf);
    inputFileName = "kboar-shard-0.txt";
    baseDir = Files.createTempDir();
    File inputFile = new File(baseDir, inputFileName);
    TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile);
    workDir = new Path(baseDir.getAbsolutePath());
    configuration = new Configuration();
    // feature vector size
    configuration.setInt("com.cloudera.knittingboar.setup.FeatureVectorSize", 10000);
    configuration.setInt("com.cloudera.knittingboar.setup.numCategories", 20);
    // local input split path
    configuration.set("com.cloudera.knittingboar.setup.LocalInputSplitPath", "hdfs://127.0.0.1/input/0");
    configuration.set("com.cloudera.knittingboar.setup.RecordFactoryClassname",
            "com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory");
    /*//from   ww  w .  j  ava  2  s  .  c om
     * // predictor label names c.set(
     * "com.cloudera.knittingboar.setup.PredictorLabelNames", "x,y" ); //
     * predictor var types c.set(
     * "com.cloudera.knittingboar.setup.PredictorVariableTypes",
     * "numeric,numeric" ); // target variables c.set(
     * "com.cloudera.knittingboar.setup.TargetVariableName", "color" ); //
     * column header names c.set(
     * "com.cloudera.knittingboar.setup.ColumnHeaderNames",
     * "x,y,shape,color,k,k0,xx,xy,yy,a,b,c,bias" ); //c.set(
     * "com.cloudera.knittingboar.setup.ColumnHeaderNames",
     * "\"x\",\"y\",\"shape\",\"color\",\"k\",\"k0\",\"xx\",\"xy\",\"yy\",\"a\",\"b\",\"c\",\"bias\"\n"
     * );
     */
}

From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndTwoWorkers.java

License:Apache License

@Before
public void setup() throws Exception {
    defaultConf = new JobConf();
    defaultConf.set("fs.defaultFS", "file:///");
    localFs = FileSystem.getLocal(defaultConf);
    inputFileName = "kboar-shard-0.txt";
    baseDir = Files.createTempDir();
    File inputFile = new File(baseDir, inputFileName);
    TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile);
    workDir = new Path(baseDir.getAbsolutePath());
    configuration = new Configuration();
    // feature vector size
    configuration.setInt("com.cloudera.knittingboar.setup.FeatureVectorSize", 10000);
    configuration.setInt("com.cloudera.knittingboar.setup.numCategories", 20);
    configuration.setInt("com.cloudera.knittingboar.setup.BatchSize", 200);
    // local input split path
    configuration.set("com.cloudera.knittingboar.setup.LocalInputSplitPath", "hdfs://127.0.0.1/input/0");
    configuration.set("com.cloudera.knittingboar.setup.RecordFactoryClassname",
            "com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory");
    /*    // predictor label names
        c.set( "com.cloudera.knittingboar.setup.PredictorLabelNames", "x,y" );
            // ww w.  j  a va2 s. c  o m
        // predictor var types
        c.set( "com.cloudera.knittingboar.setup.PredictorVariableTypes", "numeric,numeric" );
                
        // target variables
        c.set( "com.cloudera.knittingboar.setup.TargetVariableName", "color" );
            
        // column header names
        c.set( "com.cloudera.knittingboar.setup.ColumnHeaderNames", "x,y,shape,color,k,k0,xx,xy,yy,a,b,c,bias" );
        //c.set( "com.cloudera.knittingboar.setup.ColumnHeaderNames", "\"x\",\"y\",\"shape\",\"color\",\"k\",\"k0\",\"xx\",\"xy\",\"yy\",\"a\",\"b\",\"c\",\"bias\"\n" );
     */
}