Example usage for org.apache.hadoop.mapreduce.lib.input TextInputFormat TextInputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input TextInputFormat TextInputFormat.

Prototype

TextInputFormat

Source Link

Usage

From source file:ar.com.datatsunami.pig.FixedWidthLoader.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//from w  ww.  j a va 2s. c o m
public InputFormat getInputFormat() throws IOException {
    return new TextInputFormat();
}

From source file:com.acme.io.JsonLoader.java

License:Apache License

/**
 * This will be called during planning on the front end. This is the
 * instance of InputFormat (rather than the class name) because the 
 * load function may need to instantiate the InputFormat in order 
 * to control how it is constructed.//from   w ww . java 2  s  . c om
 * @return the InputFormat associated with this loader.
 * @throws IOException if there is an exception during InputFormat 
 * construction
 */
@SuppressWarnings("unchecked")
public InputFormat getInputFormat() throws IOException {
    // We will use TextInputFormat, the default Hadoop input format for
    // text.  It has a LongWritable key that we will ignore, and the value
    // is a Text (a string writable) that the JSON data is in.
    return new TextInputFormat();
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorage.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override/* w w  w. j a va2  s.c  o m*/
public InputFormat getInputFormat() throws IOException {
    AvroStorageLog.funcCall("getInputFormat");
    InputFormat result = null;
    if (inputAvroSchema != null) {
        result = new PigAvroInputFormat(inputAvroSchema, ignoreBadFiles, schemaToMergedSchemaMap,
                useMultipleSchemas);
    } else {
        result = new TextInputFormat();
    }
    return result;
}

From source file:com.mozilla.pig.load.DateRangeLoader.java

License:Apache License

@Override
@SuppressWarnings("rawtypes")
public InputFormat getInputFormat() throws IOException {
    return new TextInputFormat();
}

From source file:com.qq.pig.udf.CustomJsonLoader.java

License:Apache License

@SuppressWarnings("unchecked")
public InputFormat getInputFormat() throws IOException {
    // We will use TextInputFormat, the default Hadoop input format for
    // text.  It has a LongWritable key that we will ignore, and the value
    // is a Text (a string writable) that the JSON data is in.
    return new TextInputFormat();
}

From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHdfsSource.java

License:Apache License

private List<Map.Entry> previewTextBatch(FileStatus fileStatus, int batchSize)
        throws IOException, InterruptedException {
    TextInputFormat textInputFormat = new TextInputFormat();
    InputSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), null);
    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(hadoopConf,
            TaskAttemptID.forName("attempt_1439420318532_0011_m_000000_0"));
    RecordReader<LongWritable, Text> recordReader = textInputFormat.createRecordReader(fileSplit,
            taskAttemptContext);/*from   www  .  j  a  v  a2 s  .  com*/
    recordReader.initialize(fileSplit, taskAttemptContext);
    boolean hasNext = recordReader.nextKeyValue();
    List<Map.Entry> batch = new ArrayList<>();
    while (hasNext && batch.size() < batchSize) {
        batch.add(new Pair(fileStatus.getPath().toUri().getPath() + "::" + recordReader.getCurrentKey(),
                String.valueOf(recordReader.getCurrentValue())));
        hasNext = recordReader.nextKeyValue(); // not like iterator.hasNext, actually advances
    }
    return batch;
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void text() throws Exception {
        String input = "On the top of the Crumpetty Tree\n" + "The Quangle Wangle sat,\n"
                + "But his face you could not see,\n" + "On account of his Beaver Hat.";

        writeInput(input);/* w  w  w  .  ja  va2s  . c om*/

        TextInputFormat format = new TextInputFormat();
        format.configure(conf);
        InputSplit[] splits = format.getSplits(conf, 1);
        RecordReader<LongWritable, Text> recordReader = format.getRecordReader(splits[0], conf, Reporter.NULL);
        checkNextLine(recordReader, 0, "On the top of the Crumpetty Tree");
        checkNextLine(recordReader, 33, "The Quangle Wangle sat,");
        checkNextLine(recordReader, 57, "But his face you could not see,");
        checkNextLine(recordReader, 89, "On account of his Beaver Hat.");
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public InputFormat getInputFormat() {
        return new TextInputFormat();
    }

From source file:eu.stratosphere.hadoopcompatibility.mapreduce.example.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: WordCount <input path> <result path>");
        return;//from   ww w  . j  a  va 2s  .co  m
    }

    final String inputPath = args[0];
    final String outputPath = args[1];

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(1);

    // Set up the Hadoop Input Format
    Job job = Job.getInstance();
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(
            new TextInputFormat(), LongWritable.class, Text.class, job);
    TextInputFormat.addInputPath(job, new Path(inputPath));

    // Create a Stratosphere job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

    // Tokenize the line and convert from Writable "Text" to String for better handling
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

    // Sum up the words
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

    // Convert String back to Writable "Text" for use with Hadoop Output Format
    DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(
            new TextOutputFormat<Text, IntWritable>(), job);
    hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
    hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
    // is being executed with both types (hadoop1 and hadoop2 profile)
    TextOutputFormat.setOutputPath(job, new Path(outputPath));

    // Output & Execute
    hadoopResult.output(hadoopOutputFormat);
    env.execute("Word Count");
}

From source file:org.apache.flink.hadoopcompatibility.mapreduce.example.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: WordCount <input path> <result path>");
        return;/*from  ww  w .  j a  v a  2 s .c  o  m*/
    }

    final String inputPath = args[0];
    final String outputPath = args[1];

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(1);

    // Set up the Hadoop Input Format
    Job job = Job.getInstance();
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(
            new TextInputFormat(), LongWritable.class, Text.class, job);
    TextInputFormat.addInputPath(job, new Path(inputPath));

    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

    // Tokenize the line and convert from Writable "Text" to String for better handling
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

    // Sum up the words
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

    // Convert String back to Writable "Text" for use with Hadoop Output Format
    DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(
            new TextOutputFormat<Text, IntWritable>(), job);
    hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
    hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
    // is being executed with both types (hadoop1 and hadoop2 profile)
    TextOutputFormat.setOutputPath(job, new Path(outputPath));

    // Output & Execute
    hadoopResult.output(hadoopOutputFormat);
    env.execute("Word Count");
}