List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:com.datasalt.pangool.benchmark.urlresolution.HadoopUrlResolution.java
License:Apache License
public final static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: urlresolution <url-map> <url-register> <out>"); System.exit(2);//from w w w . j av a 2s . co m } JobConf job = new JobConf(conf); FileSystem fS = FileSystem.get(conf); fS.delete(new Path(otherArgs[2]), true); MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, UrlMapClass.class); MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, UrlRegisterMapClass.class); job.setJarByClass(HadoopUrlResolution.class); job.setPartitionerClass(KeyPartitioner.class); job.setOutputValueGroupingComparator(GroupingComparator.class); job.setMapOutputKeyClass(UrlRegJoinUrlMap.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); Job j = new Job(job); j.setReducerClass(Reduce.class); j.waitForCompletion(true); }
From source file:com.datascience.cascading.CsvSchemeTest.java
License:Apache License
/** * Tests the content of an output path against the given expected path. *///from w w w .jav a2 s . co m @SuppressWarnings("unchecked") private void testPaths(String actual, String expected) throws Exception { Tap outputTest = new Hfs(new TextLine(), actual); Tap expectedTest = new Hfs(new TextLine(), expected); FlowProcess outputProcess = new HadoopFlowProcess(new JobConf(new Configuration())); FlowProcess expectedProcess = new HadoopFlowProcess(new JobConf(new Configuration())); TupleEntryIterator outputIterator = outputTest.openForRead(outputProcess); TupleEntryIterator expectedIterator = expectedTest.openForRead(expectedProcess); List<String> outputList = new ArrayList<>(); while (outputIterator.hasNext()) { outputList.add(outputIterator.next().getTuple().getString(1)); } List<String> expectedList = new ArrayList<>(); while (expectedIterator.hasNext()) { expectedList.add(expectedIterator.next().getTuple().getString(1)); } assertTrue(outputList.equals(expectedList)); }
From source file:com.datascience.hadoop.CsvInputFormatTest.java
License:Apache License
@Before public void initialize() throws IOException { helper = new CsvHelper(); String[] columns = { "id", "first name", "last name" }; conf = helper.buildConfiguration(",", "true", "\n", columns); jobConf = new JobConf(conf); fs = FileSystem.get(conf);// w ww .j av a 2 s .com }
From source file:com.datascience.hadoop.CsvInputFormatTest.java
License:Apache License
/** * Tests if CSVInputFormat returns a valid Record Reader. *//* w w w.java2s .c o m*/ @Test public void formatShouldReturnValidRecordReader() throws IOException { JobConf jobConf = new JobConf(conf); CsvInputFormat format = helper.createCSVInputFormat(conf); File inputFile = helper.getFile("/input/with-headers.txt.gz"); Path inputPath = new Path(inputFile.getAbsoluteFile().toURI().toString()); FileSplit split = helper.createFileSplit(inputPath, 0, inputFile.length()); assertTrue(helper.createRecordReader(format, split, jobConf) instanceof CsvRecordReader); }
From source file:com.datascience.hadoop.CsvOutputFormatTest.java
License:Apache License
/** * Test for OutputFormat creates a correct instance of RecordWriter. *///from www. ja v a 2 s . c o m @Test public void shouldBeAbleToWriteCompressedFormat() throws IOException { conf.set("mapreduce.output.fileoutputformat.compress", "true"); conf.set("mapreduce.output.fileoutputformat.outputdir", "src/test/resources/output"); conf.set("mapreduce.task.attempt.id", "attempt_200707121733_0003_m_00005_0"); jobConf = new JobConf(conf); fs = FileSystem.get(conf); CsvOutputFormat format = ReflectionUtils.newInstance(CsvOutputFormat.class, conf); assertTrue(format.getRecordWriter(fs, jobConf, "output", null) instanceof CsvRecordWriter); }
From source file:com.datascience.hadoop.CsvOutputFormatTest.java
License:Apache License
/** * Test for OutputFormat creates a correct instance of RecordWriter when compressed file is passed. *///from w w w .ja va2 s. com @Test public void shouldBeAbleToWriteNonCompressedFormat() throws IOException { conf.set("mapreduce.output.fileoutputformat.compress", "false"); conf.set("mapreduce.output.fileoutputformat.outputdir", "src/test/resources/output"); conf.set("mapreduce.task.attempt.id", "attempt_200707121733_0003_m_00005_0"); jobConf = new JobConf(conf); fs = FileSystem.get(conf); CsvOutputFormat format = ReflectionUtils.newInstance(CsvOutputFormat.class, conf); assertTrue(format.getRecordWriter(fs, jobConf, "output", null) instanceof CsvRecordWriter); }
From source file:com.datascience.hadoop.CsvRecordReaderTest.java
License:Apache License
@Test public void readingExtraColumnsWhenNotStrict() throws IOException { helper = new CsvHelper(); String[] columns = { "id", "first name", "last name", "city", "zip" }; conf = helper.buildConfiguration("\t", "true", "\n", columns); conf.setBoolean(CsvInputFormat.STRICT_MODE, false); jobConf = new JobConf(conf); fs = FileSystem.get(conf);//w ww . j ava2 s .c o m testForReadAllRecordsNotStrict("/input/with-extra-columns.txt", 7); }
From source file:com.datascience.hadoop.CsvRecordReaderTest.java
License:Apache License
@Test(expected = CsvParseException.class) public void readingExtraColumnsWhenStrict() throws IOException { helper = new CsvHelper(); String[] columns = { "id", "first name", "last name", "city", "zip" }; conf = helper.buildConfiguration("\t", "true", "\n", columns); conf.setBoolean(CsvInputFormat.STRICT_MODE, true); jobConf = new JobConf(conf); fs = FileSystem.get(conf);//from w w w . j a v a2s .c om testForReadAllRecords("/input/with-extra-columns.txt", 5, 5); }
From source file:com.datascience.hadoop.CsvRecordReaderTest.java
License:Apache License
@Test(expected = RuntimeException.class) public void readerShouldNotParseErrorRecords() throws IOException { conf.set(CsvInputFormat.CSV_READER_QUOTE_CHARACTER, "\""); jobConf = new JobConf(conf); fs = FileSystem.get(conf);/* w w w . ja v a 2s. co m*/ testForReadAllRecords("/input/skipped-lines.txt", 3, 4); }
From source file:com.datatorrent.demos.mroperator.LineIndexer.java
License:Open Source License
/** * The actual main() method for our program; this is the * "driver" for the MapReduce job./*from w w w.jav a 2 s. c o m*/ */ public static void main(String[] args) { JobClient client = new JobClient(); JobConf conf = new JobConf(LineIndexer.class); conf.setJobName("LineIndexer"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.addInputPath(conf, new Path("input")); FileOutputFormat.setOutputPath(conf, new Path("output")); conf.setMapperClass(LineIndexMapper.class); conf.setReducerClass(LineIndexReducer.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }