Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.datasalt.pangool.benchmark.urlresolution.HadoopUrlResolution.java

License:Apache License

public final static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: urlresolution <url-map> <url-register> <out>");
        System.exit(2);//from w  w w  . j av a 2s . co m
    }
    JobConf job = new JobConf(conf);
    FileSystem fS = FileSystem.get(conf);
    fS.delete(new Path(otherArgs[2]), true);

    MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, UrlMapClass.class);
    MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, UrlRegisterMapClass.class);

    job.setJarByClass(HadoopUrlResolution.class);

    job.setPartitionerClass(KeyPartitioner.class);
    job.setOutputValueGroupingComparator(GroupingComparator.class);

    job.setMapOutputKeyClass(UrlRegJoinUrlMap.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

    Job j = new Job(job);
    j.setReducerClass(Reduce.class);
    j.waitForCompletion(true);
}

From source file:com.datascience.cascading.CsvSchemeTest.java

License:Apache License

/**
 * Tests the content of an output path against the given expected path.
 *///from  w  w  w  .jav  a2  s  .  co  m
@SuppressWarnings("unchecked")
private void testPaths(String actual, String expected) throws Exception {

    Tap outputTest = new Hfs(new TextLine(), actual);
    Tap expectedTest = new Hfs(new TextLine(), expected);

    FlowProcess outputProcess = new HadoopFlowProcess(new JobConf(new Configuration()));
    FlowProcess expectedProcess = new HadoopFlowProcess(new JobConf(new Configuration()));

    TupleEntryIterator outputIterator = outputTest.openForRead(outputProcess);
    TupleEntryIterator expectedIterator = expectedTest.openForRead(expectedProcess);

    List<String> outputList = new ArrayList<>();
    while (outputIterator.hasNext()) {
        outputList.add(outputIterator.next().getTuple().getString(1));
    }

    List<String> expectedList = new ArrayList<>();
    while (expectedIterator.hasNext()) {
        expectedList.add(expectedIterator.next().getTuple().getString(1));
    }

    assertTrue(outputList.equals(expectedList));

}

From source file:com.datascience.hadoop.CsvInputFormatTest.java

License:Apache License

@Before
public void initialize() throws IOException {
    helper = new CsvHelper();
    String[] columns = { "id", "first name", "last name" };
    conf = helper.buildConfiguration(",", "true", "\n", columns);
    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);// w ww .j av  a 2  s  .com
}

From source file:com.datascience.hadoop.CsvInputFormatTest.java

License:Apache License

/**
 * Tests if CSVInputFormat returns a valid Record Reader.
 *//* w w w.java2s .c  o m*/
@Test
public void formatShouldReturnValidRecordReader() throws IOException {
    JobConf jobConf = new JobConf(conf);
    CsvInputFormat format = helper.createCSVInputFormat(conf);
    File inputFile = helper.getFile("/input/with-headers.txt.gz");
    Path inputPath = new Path(inputFile.getAbsoluteFile().toURI().toString());
    FileSplit split = helper.createFileSplit(inputPath, 0, inputFile.length());
    assertTrue(helper.createRecordReader(format, split, jobConf) instanceof CsvRecordReader);
}

From source file:com.datascience.hadoop.CsvOutputFormatTest.java

License:Apache License

/**
 * Test for OutputFormat creates a correct instance of RecordWriter.
 *///from  www.  ja  v a  2 s  .  c  o m
@Test
public void shouldBeAbleToWriteCompressedFormat() throws IOException {
    conf.set("mapreduce.output.fileoutputformat.compress", "true");
    conf.set("mapreduce.output.fileoutputformat.outputdir", "src/test/resources/output");
    conf.set("mapreduce.task.attempt.id", "attempt_200707121733_0003_m_00005_0");
    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);

    CsvOutputFormat format = ReflectionUtils.newInstance(CsvOutputFormat.class, conf);
    assertTrue(format.getRecordWriter(fs, jobConf, "output", null) instanceof CsvRecordWriter);
}

From source file:com.datascience.hadoop.CsvOutputFormatTest.java

License:Apache License

/**
 * Test for OutputFormat creates a correct instance of RecordWriter when compressed file is passed.
 *///from  w w  w  .ja  va2 s. com
@Test
public void shouldBeAbleToWriteNonCompressedFormat() throws IOException {
    conf.set("mapreduce.output.fileoutputformat.compress", "false");
    conf.set("mapreduce.output.fileoutputformat.outputdir", "src/test/resources/output");
    conf.set("mapreduce.task.attempt.id", "attempt_200707121733_0003_m_00005_0");
    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);

    CsvOutputFormat format = ReflectionUtils.newInstance(CsvOutputFormat.class, conf);
    assertTrue(format.getRecordWriter(fs, jobConf, "output", null) instanceof CsvRecordWriter);
}

From source file:com.datascience.hadoop.CsvRecordReaderTest.java

License:Apache License

@Test
public void readingExtraColumnsWhenNotStrict() throws IOException {

    helper = new CsvHelper();
    String[] columns = { "id", "first name", "last name", "city", "zip" };
    conf = helper.buildConfiguration("\t", "true", "\n", columns);
    conf.setBoolean(CsvInputFormat.STRICT_MODE, false);
    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);//w ww . j ava2 s .c o  m
    testForReadAllRecordsNotStrict("/input/with-extra-columns.txt", 7);
}

From source file:com.datascience.hadoop.CsvRecordReaderTest.java

License:Apache License

@Test(expected = CsvParseException.class)
public void readingExtraColumnsWhenStrict() throws IOException {

    helper = new CsvHelper();
    String[] columns = { "id", "first name", "last name", "city", "zip" };
    conf = helper.buildConfiguration("\t", "true", "\n", columns);
    conf.setBoolean(CsvInputFormat.STRICT_MODE, true);
    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);//from w  w  w . j  a v a2s  .c om
    testForReadAllRecords("/input/with-extra-columns.txt", 5, 5);
}

From source file:com.datascience.hadoop.CsvRecordReaderTest.java

License:Apache License

@Test(expected = RuntimeException.class)
public void readerShouldNotParseErrorRecords() throws IOException {
    conf.set(CsvInputFormat.CSV_READER_QUOTE_CHARACTER, "\"");

    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);/* w w w .  ja v  a  2s. co  m*/

    testForReadAllRecords("/input/skipped-lines.txt", 3, 4);
}

From source file:com.datatorrent.demos.mroperator.LineIndexer.java

License:Open Source License

/**
 * The actual main() method for our program; this is the
 * "driver" for the MapReduce job./*from w w w.jav a  2  s. c  o  m*/
 */
public static void main(String[] args) {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(LineIndexer.class);

    conf.setJobName("LineIndexer");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(conf, new Path("input"));
    FileOutputFormat.setOutputPath(conf, new Path("output"));

    conf.setMapperClass(LineIndexMapper.class);
    conf.setReducerClass(LineIndexReducer.class);

    client.setConf(conf);

    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}