Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.datasalt.pangool.benchmark.urlresolution.HadoopUrlResolution.java

License:Apache License

public final static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: urlresolution <url-map> <url-register> <out>");
        System.exit(2);//from w  w w  . j av a 2s . co m
    }
    JobConf job = new JobConf(conf);
    FileSystem fS = FileSystem.get(conf);
    fS.delete(new Path(otherArgs[2]), true);

    MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, UrlMapClass.class);
    MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, UrlRegisterMapClass.class);

    job.setJarByClass(HadoopUrlResolution.class);

    job.setPartitionerClass(KeyPartitioner.class);
    job.setOutputValueGroupingComparator(GroupingComparator.class);

    job.setMapOutputKeyClass(UrlRegJoinUrlMap.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

    Job j = new Job(job);
    j.setReducerClass(Reduce.class);
    j.waitForCompletion(true);
}

From source file:com.datascience.cascading.CsvSchemeTest.java

License:Apache License

/**
 * Tests the content of an output path against the given expected path.
 *///from  w  w  w  .jav  a2  s  .  co  m
@SuppressWarnings("unchecked")
private void testPaths(String actual, String expected) throws Exception {

    Tap outputTest = new Hfs(new TextLine(), actual);
    Tap expectedTest = new Hfs(new TextLine(), expected);

    FlowProcess outputProcess = new HadoopFlowProcess(new JobConf(new Configuration()));
    FlowProcess expectedProcess = new HadoopFlowProcess(new JobConf(new Configuration()));

    TupleEntryIterator outputIterator = outputTest.openForRead(outputProcess);
    TupleEntryIterator expectedIterator = expectedTest.openForRead(expectedProcess);

    List<String> outputList = new ArrayList<>();
    while (outputIterator.hasNext()) {
        outputList.add(outputIterator.next().getTuple().getString(1));
    }

    List<String> expectedList = new ArrayList<>();
    while (expectedIterator.hasNext()) {
        expectedList.add(expectedIterator.next().getTuple().getString(1));
    }

    assertTrue(outputList.equals(expectedList));

}

From source file:com.datascience.hadoop.CsvInputFormatTest.java

License:Apache License

@Before
public void initialize() throws IOException {
    helper = new CsvHelper();
    String[] columns = { "id", "first name", "last name" };
    conf = helper.buildConfiguration(",", "true", "\n", columns);
    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);// w ww .j av  a 2  s  .com
}

From source file:com.datascience.hadoop.CsvInputFormatTest.java

License:Apache License

/**
 * Tests if CSVInputFormat returns a valid Record Reader.
 *//* w w w.java2s .c  o m*/
@Test
public void formatShouldReturnValidRecordReader() throws IOException {
    JobConf jobConf = new JobConf(conf);
    CsvInputFormat format = helper.createCSVInputFormat(conf);
    File inputFile = helper.getFile("/input/with-headers.txt.gz");
    Path inputPath = new Path(inputFile.getAbsoluteFile().toURI().toString());
    FileSplit split = helper.createFileSplit(inputPath, 0, inputFile.length());
    assertTrue(helper.createRecordReader(format, split, jobConf) instanceof CsvRecordReader);
}

From source file:com.datascience.hadoop.CsvOutputFormatTest.java

License:Apache License

/**
 * Test for OutputFormat creates a correct instance of RecordWriter.
 *///from  www.  ja  v a  2 s  .  c  o m
@Test
public void shouldBeAbleToWriteCompressedFormat() throws IOException {
    conf.set("mapreduce.output.fileoutputformat.compress", "true");
    conf.set("mapreduce.output.fileoutputformat.outputdir", "src/test/resources/output");
    conf.set("mapreduce.task.attempt.id", "attempt_200707121733_0003_m_00005_0");
    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);

    CsvOutputFormat format = ReflectionUtils.newInstance(CsvOutputFormat.class, conf);
    assertTrue(format.getRecordWriter(fs, jobConf, "output", null) instanceof CsvRecordWriter);
}

From source file:com.datascience.hadoop.CsvOutputFormatTest.java

License:Apache License

/**
 * Test for OutputFormat creates a correct instance of RecordWriter when compressed file is passed.
 *///from  w w  w  .ja  va2 s. com
@Test
public void shouldBeAbleToWriteNonCompressedFormat() throws IOException {
    conf.set("mapreduce.output.fileoutputformat.compress", "false");
    conf.set("mapreduce.output.fileoutputformat.outputdir", "src/test/resources/output");
    conf.set("mapreduce.task.attempt.id", "attempt_200707121733_0003_m_00005_0");
    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);

    CsvOutputFormat format = ReflectionUtils.newInstance(CsvOutputFormat.class, conf);
    assertTrue(format.getRecordWriter(fs, jobConf, "output", null) instanceof CsvRecordWriter);
}

From source file:com.datascience.hadoop.CsvRecordReaderTest.java

License:Apache License

@Test
public void readingExtraColumnsWhenNotStrict() throws IOException {

    helper = new CsvHelper();
    String[] columns = { "id", "first name", "last name", "city", "zip" };
    conf = helper.buildConfiguration("\t", "true", "\n", columns);
    conf.setBoolean(CsvInputFormat.STRICT_MODE, false);
    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);//w ww . j ava2 s .c o  m
    testForReadAllRecordsNotStrict("/input/with-extra-columns.txt", 7);
}

From source file:com.datascience.hadoop.CsvRecordReaderTest.java

License:Apache License

@Test(expected = CsvParseException.class)
public void readingExtraColumnsWhenStrict() throws IOException {

    helper = new CsvHelper();
    String[] columns = { "id", "first name", "last name", "city", "zip" };
    conf = helper.buildConfiguration("\t", "true", "\n", columns);
    conf.setBoolean(CsvInputFormat.STRICT_MODE, true);
    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);//from w  w  w . j  a v a2s  .c om
    testForReadAllRecords("/input/with-extra-columns.txt", 5, 5);
}

From source file:com.datascience.hadoop.CsvRecordReaderTest.java

License:Apache License

@Test(expected = RuntimeException.class)
public void readerShouldNotParseErrorRecords() throws IOException {
    conf.set(CsvInputFormat.CSV_READER_QUOTE_CHARACTER, "\"");

    jobConf = new JobConf(conf);
    fs = FileSystem.get(conf);/* w w w .  ja v  a  2s. co  m*/

    testForReadAllRecords("/input/skipped-lines.txt", 3, 4);
}

From source file:com.datatorrent.demos.mroperator.LineIndexer.java

License:Open Source License

/**
 * The actual main() method for our program; this is the
 * "driver" for the MapReduce job./*from w w w.jav a  2  s. c  o  m*/
 */
public static void main(String[] args) {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(LineIndexer.class);

    conf.setJobName("LineIndexer");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(conf, new Path("input"));
    FileOutputFormat.setOutputPath(conf, new Path("output"));

    conf.setMapperClass(LineIndexMapper.class);
    conf.setReducerClass(LineIndexReducer.class);

    client.setConf(conf);

    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}