Example usage for org.apache.hadoop.mapreduce.lib.output TextOutputFormat TextOutputFormat

List of usage examples for org.apache.hadoop.mapreduce.lib.output TextOutputFormat TextOutputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output TextOutputFormat TextOutputFormat.

Prototype

TextOutputFormat

Source Link

Usage

From source file:br.puc_rio.ele.lvc.interimage.common.udf.CompressedJsonStorage.java

License:Apache License

@Override
public OutputFormat getOutputFormat() throws IOException {
    // We will use TextOutputFormat, the default Hadoop output format for
    // text.  The key is unused and the value will be a
    // Text (a string writable type) that we store our JSON data in.
    return new TextOutputFormat<LongWritable, Text>();
}

From source file:com.acme.io.JsonStorage.java

License:Apache License

/**
 * Return the OutputFormat associated with StoreFunc.  This will be called
 * on the front end during planning and on the backend during
 * execution. //from  w  w  w.j  a v  a2  s.  com
 * @return the {@link OutputFormat} associated with StoreFunc
 * @throws IOException if an exception occurs while constructing the 
 * OutputFormat
 *
 */
@Override
public OutputFormat getOutputFormat() throws IOException {
    // We will use TextOutputFormat, the default Hadoop output format for
    // text.  The key is unused and the value will be a
    // Text (a string writable type) that we store our JSON data in.
    return new TextOutputFormat<LongWritable, Text>();
}

From source file:com.blackberry.logdriver.pig.FirstItemOnlyStoreFunc.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//from   ww  w  .  j a  v  a 2  s . c om
public OutputFormat getOutputFormat() throws IOException {
    return new TextOutputFormat<Text, NullWritable>();
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Creates an lzo file with random data.
 * /*from  w  ww. ja  v  a  2s.c om*/
 * @param outputDir Output directory.
 * @param fs File system we're using.
 * @param attemptContext Task attempt context, contains task id etc. 
 * @throws IOException
 * @throws InterruptedException
 */
private byte[] createTestInput(Path outputDir, FileSystem fs, TaskAttemptContext attemptContext,
        int charsToOutput) throws IOException, InterruptedException {

    TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>();
    RecordWriter<Text, Text> rw = null;

    md5.reset();

    try {
        rw = output.getRecordWriter(attemptContext);

        char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray();

        Random r = new Random(System.currentTimeMillis());
        Text key = new Text();
        Text value = new Text();
        int charsMax = chars.length - 1;
        for (int i = 0; i < charsToOutput;) {
            i += fillText(chars, r, charsMax, key);
            i += fillText(chars, r, charsMax, value);
            rw.write(key, value);
            md5.update(key.getBytes(), 0, key.getLength());
            // text output format writes tab between the key and value
            md5.update("\t".getBytes("UTF-8"));
            md5.update(value.getBytes(), 0, value.getLength());
        }
    } finally {
        if (rw != null) {
            rw.close(attemptContext);
            OutputCommitter committer = output.getOutputCommitter(attemptContext);
            committer.commitTask(attemptContext);
            committer.cleanupJob(attemptContext);
        }
    }

    byte[] result = md5.digest();
    md5.reset();
    return result;
}

From source file:com.mozilla.grouperfish.pig.storage.LDACStorage.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override/*from  w  w  w .  j a v a2 s .c o m*/
public OutputFormat getOutputFormat() throws IOException {
    return new TextOutputFormat<LongWritable, Text>();
}

From source file:de.l3s.archivepig.ExtractionStorage.java

License:Open Source License

@Override
public OutputFormat getOutputFormat() throws IOException {
    return new TextOutputFormat<WritableComparable, Text>();
}

From source file:eu.stratosphere.hadoopcompatibility.mapreduce.example.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: WordCount <input path> <result path>");
        return;//from www  .j a  v a 2 s . c o m
    }

    final String inputPath = args[0];
    final String outputPath = args[1];

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(1);

    // Set up the Hadoop Input Format
    Job job = Job.getInstance();
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(
            new TextInputFormat(), LongWritable.class, Text.class, job);
    TextInputFormat.addInputPath(job, new Path(inputPath));

    // Create a Stratosphere job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

    // Tokenize the line and convert from Writable "Text" to String for better handling
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

    // Sum up the words
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

    // Convert String back to Writable "Text" for use with Hadoop Output Format
    DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(
            new TextOutputFormat<Text, IntWritable>(), job);
    hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
    hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
    // is being executed with both types (hadoop1 and hadoop2 profile)
    TextOutputFormat.setOutputPath(job, new Path(outputPath));

    // Output & Execute
    hadoopResult.output(hadoopOutputFormat);
    env.execute("Word Count");
}

From source file:org.apache.flink.hadoopcompatibility.mapreduce.example.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: WordCount <input path> <result path>");
        return;/*from  w w w .j av  a2s.co m*/
    }

    final String inputPath = args[0];
    final String outputPath = args[1];

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(1);

    // Set up the Hadoop Input Format
    Job job = Job.getInstance();
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(
            new TextInputFormat(), LongWritable.class, Text.class, job);
    TextInputFormat.addInputPath(job, new Path(inputPath));

    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

    // Tokenize the line and convert from Writable "Text" to String for better handling
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

    // Sum up the words
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

    // Convert String back to Writable "Text" for use with Hadoop Output Format
    DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(
            new TextOutputFormat<Text, IntWritable>(), job);
    hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
    hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
    // is being executed with both types (hadoop1 and hadoop2 profile)
    TextOutputFormat.setOutputPath(job, new Path(outputPath));

    // Output & Execute
    hadoopResult.output(hadoopOutputFormat);
    env.execute("Word Count");
}

From source file:org.apache.flink.test.hadoop.mapreduce.WordCountMapreduceITCase.java

License:Apache License

@Override
protected void testProgram() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<Tuple2<LongWritable, Text>> input = env.readHadoopFile(new TextInputFormat(), LongWritable.class,
            Text.class, textPath);

    DataSet<String> text = input.map(new MapFunction<Tuple2<LongWritable, Text>, String>() {
        @Override/*from   w  w  w.  j  av  a 2 s  .c om*/
        public String map(Tuple2<LongWritable, Text> value) throws Exception {
            return value.f1.toString();
        }
    });

    DataSet<Tuple2<String, Integer>> counts =
            // split up the lines in pairs (2-tuples) containing: (word,1)
            text.flatMap(new Tokenizer())
                    // group by the tuple field "0" and sum up tuple field "1"
                    .groupBy(0).sum(1);

    DataSet<Tuple2<Text, LongWritable>> words = counts
            .map(new MapFunction<Tuple2<String, Integer>, Tuple2<Text, LongWritable>>() {

                @Override
                public Tuple2<Text, LongWritable> map(Tuple2<String, Integer> value) throws Exception {
                    return new Tuple2<Text, LongWritable>(new Text(value.f0), new LongWritable(value.f1));
                }
            });

    // Set up Hadoop Output Format
    Job job = Job.getInstance();
    HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, LongWritable>(
            new TextOutputFormat<Text, LongWritable>(), job);
    job.getConfiguration().set("mapred.textoutputformat.separator", " ");
    TextOutputFormat.setOutputPath(job, new Path(resultPath));

    // Output & Execute
    words.output(hadoopOutputFormat);
    env.execute("Hadoop Compat WordCount");
}

From source file:org.apache.flink.test.hadoopcompatibility.mapreduce.example.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: WordCount <input path> <result path>");
        return;/*from  w w  w .  j  av  a 2  s . c o m*/
    }

    final String inputPath = args[0];
    final String outputPath = args[1];

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    // Set up the Hadoop Input Format
    Job job = Job.getInstance();
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(
            new TextInputFormat(), LongWritable.class, Text.class, job);
    TextInputFormat.addInputPath(job, new Path(inputPath));

    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

    // Tokenize the line and convert from Writable "Text" to String for better handling
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

    // Sum up the words
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

    // Convert String back to Writable "Text" for use with Hadoop Output Format
    DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(
            new TextOutputFormat<Text, IntWritable>(), job);
    hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
    hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
    // is being executed with both types (hadoop1 and hadoop2 profile)
    TextOutputFormat.setOutputPath(job, new Path(outputPath));

    // Output & Execute
    hadoopResult.output(hadoopOutputFormat);
    env.execute("Word Count");
}