List of usage examples for org.apache.hadoop.mapreduce.lib.output TextOutputFormat TextOutputFormat
TextOutputFormat
From source file:br.puc_rio.ele.lvc.interimage.common.udf.CompressedJsonStorage.java
License:Apache License
@Override public OutputFormat getOutputFormat() throws IOException { // We will use TextOutputFormat, the default Hadoop output format for // text. The key is unused and the value will be a // Text (a string writable type) that we store our JSON data in. return new TextOutputFormat<LongWritable, Text>(); }
From source file:com.acme.io.JsonStorage.java
License:Apache License
/** * Return the OutputFormat associated with StoreFunc. This will be called * on the front end during planning and on the backend during * execution. //from w w w.j a v a2 s. com * @return the {@link OutputFormat} associated with StoreFunc * @throws IOException if an exception occurs while constructing the * OutputFormat * */ @Override public OutputFormat getOutputFormat() throws IOException { // We will use TextOutputFormat, the default Hadoop output format for // text. The key is unused and the value will be a // Text (a string writable type) that we store our JSON data in. return new TextOutputFormat<LongWritable, Text>(); }
From source file:com.blackberry.logdriver.pig.FirstItemOnlyStoreFunc.java
License:Apache License
@SuppressWarnings("rawtypes") @Override//from ww w . j a v a 2 s . c om public OutputFormat getOutputFormat() throws IOException { return new TextOutputFormat<Text, NullWritable>(); }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Creates an lzo file with random data. * /*from w ww. ja v a 2s.c om*/ * @param outputDir Output directory. * @param fs File system we're using. * @param attemptContext Task attempt context, contains task id etc. * @throws IOException * @throws InterruptedException */ private byte[] createTestInput(Path outputDir, FileSystem fs, TaskAttemptContext attemptContext, int charsToOutput) throws IOException, InterruptedException { TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>(); RecordWriter<Text, Text> rw = null; md5.reset(); try { rw = output.getRecordWriter(attemptContext); char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray(); Random r = new Random(System.currentTimeMillis()); Text key = new Text(); Text value = new Text(); int charsMax = chars.length - 1; for (int i = 0; i < charsToOutput;) { i += fillText(chars, r, charsMax, key); i += fillText(chars, r, charsMax, value); rw.write(key, value); md5.update(key.getBytes(), 0, key.getLength()); // text output format writes tab between the key and value md5.update("\t".getBytes("UTF-8")); md5.update(value.getBytes(), 0, value.getLength()); } } finally { if (rw != null) { rw.close(attemptContext); OutputCommitter committer = output.getOutputCommitter(attemptContext); committer.commitTask(attemptContext); committer.cleanupJob(attemptContext); } } byte[] result = md5.digest(); md5.reset(); return result; }
From source file:com.mozilla.grouperfish.pig.storage.LDACStorage.java
License:Apache License
@SuppressWarnings("rawtypes") @Override/*from w w w . j a v a2 s .c o m*/ public OutputFormat getOutputFormat() throws IOException { return new TextOutputFormat<LongWritable, Text>(); }
From source file:de.l3s.archivepig.ExtractionStorage.java
License:Open Source License
@Override public OutputFormat getOutputFormat() throws IOException { return new TextOutputFormat<WritableComparable, Text>(); }
From source file:eu.stratosphere.hadoopcompatibility.mapreduce.example.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount <input path> <result path>"); return;//from www .j a v a 2 s . c o m } final String inputPath = args[0]; final String outputPath = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setDegreeOfParallelism(1); // Set up the Hadoop Input Format Job job = Job.getInstance(); HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>( new TextInputFormat(), LongWritable.class, Text.class, job); TextInputFormat.addInputPath(job, new Path(inputPath)); // Create a Stratosphere job with it DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat); // Tokenize the line and convert from Writable "Text" to String for better handling DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer()); // Sum up the words DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); // Convert String back to Writable "Text" for use with Hadoop Output Format DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper()); // Set up Hadoop Output Format HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>( new TextOutputFormat<Text, IntWritable>(), job); hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " "); hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test // is being executed with both types (hadoop1 and hadoop2 profile) TextOutputFormat.setOutputPath(job, new Path(outputPath)); // Output & Execute hadoopResult.output(hadoopOutputFormat); env.execute("Word Count"); }
From source file:org.apache.flink.hadoopcompatibility.mapreduce.example.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount <input path> <result path>"); return;/*from w w w .j av a2s.co m*/ } final String inputPath = args[0]; final String outputPath = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setDegreeOfParallelism(1); // Set up the Hadoop Input Format Job job = Job.getInstance(); HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>( new TextInputFormat(), LongWritable.class, Text.class, job); TextInputFormat.addInputPath(job, new Path(inputPath)); // Create a Flink job with it DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat); // Tokenize the line and convert from Writable "Text" to String for better handling DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer()); // Sum up the words DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); // Convert String back to Writable "Text" for use with Hadoop Output Format DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper()); // Set up Hadoop Output Format HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>( new TextOutputFormat<Text, IntWritable>(), job); hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " "); hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test // is being executed with both types (hadoop1 and hadoop2 profile) TextOutputFormat.setOutputPath(job, new Path(outputPath)); // Output & Execute hadoopResult.output(hadoopOutputFormat); env.execute("Word Count"); }
From source file:org.apache.flink.test.hadoop.mapreduce.WordCountMapreduceITCase.java
License:Apache License
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<LongWritable, Text>> input = env.readHadoopFile(new TextInputFormat(), LongWritable.class, Text.class, textPath); DataSet<String> text = input.map(new MapFunction<Tuple2<LongWritable, Text>, String>() { @Override/*from w w w. j av a 2 s .c om*/ public String map(Tuple2<LongWritable, Text> value) throws Exception { return value.f1.toString(); } }); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0).sum(1); DataSet<Tuple2<Text, LongWritable>> words = counts .map(new MapFunction<Tuple2<String, Integer>, Tuple2<Text, LongWritable>>() { @Override public Tuple2<Text, LongWritable> map(Tuple2<String, Integer> value) throws Exception { return new Tuple2<Text, LongWritable>(new Text(value.f0), new LongWritable(value.f1)); } }); // Set up Hadoop Output Format Job job = Job.getInstance(); HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, LongWritable>( new TextOutputFormat<Text, LongWritable>(), job); job.getConfiguration().set("mapred.textoutputformat.separator", " "); TextOutputFormat.setOutputPath(job, new Path(resultPath)); // Output & Execute words.output(hadoopOutputFormat); env.execute("Hadoop Compat WordCount"); }
From source file:org.apache.flink.test.hadoopcompatibility.mapreduce.example.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount <input path> <result path>"); return;/*from w w w . j av a 2 s . c o m*/ } final String inputPath = args[0]; final String outputPath = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // Set up the Hadoop Input Format Job job = Job.getInstance(); HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>( new TextInputFormat(), LongWritable.class, Text.class, job); TextInputFormat.addInputPath(job, new Path(inputPath)); // Create a Flink job with it DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat); // Tokenize the line and convert from Writable "Text" to String for better handling DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer()); // Sum up the words DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); // Convert String back to Writable "Text" for use with Hadoop Output Format DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper()); // Set up Hadoop Output Format HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>( new TextOutputFormat<Text, IntWritable>(), job); hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " "); hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test // is being executed with both types (hadoop1 and hadoop2 profile) TextOutputFormat.setOutputPath(job, new Path(outputPath)); // Output & Execute hadoopResult.output(hadoopOutputFormat); env.execute("Word Count"); }