Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.ckelsel.hadoop.MaxTemperature.AppMapper.java

License:Open Source License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    String line = value.toString();

}

From source file:com.cloudera.castagna.logparser.mr.StatusCodesStatsMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    log.debug("< ({}, {})", key, value);

    try {/*from  w ww .  j a v  a 2 s  .c om*/
        Map<String, String> logLine = parser.parseLine(value.toString());

        if (logLine.get(LogParser.STATUS_CODE) != null) {
            StringBuilder outKey = new StringBuilder();
            outKey.append(logLine.get(LogParser.TIME_YEAR));
            outKey.append(Constants.SEPARATOR);
            outKey.append(logLine.get(LogParser.TIME_MONTH));
            outKey.append(Constants.SEPARATOR);
            outKey.append(logLine.get(LogParser.TIME_DAY));
            outKey.append(Constants.SEPARATOR);
            outKey.append(logLine.get(LogParser.TIME_HOUR));
            outKey.append(Constants.SEPARATOR);
            outKey.append(logLine.get(LogParser.TIME_MINUTE));
            //            outKey.append(Constants.SPACE);
            //            outKey.append(logLine.get(LogParser.URL));

            StringBuilder outValue = new StringBuilder();
            outValue.append(logLine.get(LogParser.STATUS_CODE));
            outValue.append(Constants.COLON);
            outValue.append(Constants.ONE);

            outTextKey.clear();
            outTextKey.set(outKey.toString());

            outTextValue.clear();
            outTextValue.set(outValue.toString());

            context.write(outTextKey, outTextValue);
            log.debug("> ({}, {})", outTextKey, outTextValue);
        } else {
            // TODO
        }
    } catch (ParseException e) {
        log.debug("Error parsing: {} {}", key, value);
    }
}

From source file:com.cloudera.castagna.logparser.mr.StatusCodesStatsReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    Map<String, Integer> counters = new TreeMap<String, Integer>();
    Iterator<Text> iter = values.iterator();
    while (iter.hasNext()) {
        Text value = iter.next();
        log.debug("< ({}, {})", key, value);
        Utils.increment(counters, value);
    }/*from www.  j a v a 2  s .c om*/

    StringBuilder outValue = new StringBuilder();

    String[] ks = key.toString().split(Constants.SPACE);
    String date = ks[0];
    if (ks.length > 1) {
        String url = ks[1];
        outTextKey.clear();
        outTextKey.set(url);

        outValue.append(date);
        outValue.append(Constants.TAB);
    } else {
        outTextKey.clear();
        outTextKey.set(date);
    }

    int total = Utils.total(counters);

    outValue.append(total);
    outValue.append(Constants.TAB);
    for (String k : counters.keySet()) {
        outValue.append(k);
        outValue.append(Constants.COLON);
        outValue.append(counters.get(k));
        outValue.append(Constants.TAB);
    }

    outTextValue.clear();
    outTextValue.set(outValue.toString());

    context.write(outTextKey, outTextValue);
    log.debug("> ({}, {})", outTextKey, outTextValue);
}

From source file:com.cloudera.castagna.logparser.mr.TranscodeLogsMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    log.debug("< ({}, {})", key, value);

    try {/*  ww  w  .  j a  v  a 2 s. c o m*/
        Map<String, String> logLine = parser.parseLine(value.toString());

        StringBuilder outValue = new StringBuilder();
        outValue.append(logLine.get(LogParser.REMOTE_HOSTNAME));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.USERNAME));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.HTTP_METHOD));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.URL));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_YEAR));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_MONTH));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_DAY));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_HOUR));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_MINUTE));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_SECOND));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIMESTAMP));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.STATUS_CODE));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.SIZE));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.ELAPSED_TIME));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.USER_AGENT));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.REFERER));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get("JSESSIONID"));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get("SITESERVER"));
        outValue.append(Constants.TAB);

        outTextValue.clear();
        outTextValue.set(outValue.toString());

        context.write(NullWritable.get(), outTextValue);
        log.debug("> ({}, {})", NullWritable.get(), outTextValue);
    } catch (ParseException e) {
        log.debug("Error parsing: {} {}", key, value);
    }
}

From source file:com.cloudera.castagna.logparser.Utils.java

License:Apache License

public static void increment(Map<String, Integer> counters, Text value) {
    for (String v : value.toString().split(Constants.SPACE)) {
        String[] vs = v.split(Constants.COLON);
        String status_code = vs[0];
        int count = Integer.parseInt(vs[1]);
        if (counters.containsKey(status_code)) {
            counters.put(status_code, count + counters.get(status_code));
        } else {//from   w w w . j av  a 2s  . c o  m
            counters.put(status_code, count);
        }
    }
}

From source file:com.cloudera.cdk.morphline.hadoop.sequencefile.ReadSequenceFileTest.java

License:Apache License

/**
 * return a mapping of expected keys -> records
 *///from   ww w.j a v a 2s  . com
private HashMap<String, Record> createTextSequenceFile(File file, int numRecords) throws IOException {
    HashMap<String, Record> map = new HashMap<String, Record>();
    SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile());
    FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null);
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(new Configuration(), out, Text.class, Text.class,
                SequenceFile.CompressionType.NONE, null, metadata);
        for (int i = 0; i < numRecords; ++i) {
            Text key = new Text("key" + i);
            Text value = new Text("value" + i);
            writer.append(key, value);
            Record record = new Record();
            record.put("key", key);
            record.put("value", value);
            map.put(key.toString(), record);
        }
    } finally {
        Closeables.closeQuietly(writer);
    }
    return map;
}

From source file:com.cloudera.cdk.morphline.hadoop.sequencefile.ReadSequenceFileTest.java

License:Apache License

/**
 * return a mapping of expected keys -> records
 *///from  ww  w  .ja  v  a2  s  . co m
private HashMap<String, Record> createMyWritableSequenceFile(File file, int numRecords) throws IOException {
    HashMap<String, Record> map = new HashMap<String, Record>();
    SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile());
    FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null);
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(new Configuration(), out, Text.class,
                ParseTextMyWritableBuilder.MyWritable.class, SequenceFile.CompressionType.NONE, null, metadata);
        for (int i = 0; i < numRecords; ++i) {
            Text key = new Text("key" + i);
            ParseTextMyWritableBuilder.MyWritable value = new ParseTextMyWritableBuilder.MyWritable("value", i);
            writer.append(key, value);
            Record record = new Record();
            record.put("key", key);
            record.put("value", value);
            map.put(key.toString(), record);
        }
    } finally {
        Closeables.closeQuietly(writer);
    }
    return map;
}

From source file:com.cloudera.dataflow.spark.HadoopFileFormatPipelineTest.java

License:Open Source License

@Test
public void testSequenceFile() throws Exception {
    populateFile();// w  w  w . j  a va2  s  .  co m

    Pipeline p = Pipeline.create(PipelineOptionsFactory.create());
    @SuppressWarnings("unchecked")
    Class<? extends FileInputFormat<IntWritable, Text>> inputFormatClass = (Class<? extends FileInputFormat<IntWritable, Text>>) (Class<?>) SequenceFileInputFormat.class;
    HadoopIO.Read.Bound<IntWritable, Text> read = HadoopIO.Read.from(inputFile.getAbsolutePath(),
            inputFormatClass, IntWritable.class, Text.class);
    PCollection<KV<IntWritable, Text>> input = p.apply(read);
    @SuppressWarnings("unchecked")
    Class<? extends FileOutputFormat<IntWritable, Text>> outputFormatClass = (Class<? extends FileOutputFormat<IntWritable, Text>>) (Class<?>) TemplatedSequenceFileOutputFormat.class;
    @SuppressWarnings("unchecked")
    HadoopIO.Write.Bound<IntWritable, Text> write = HadoopIO.Write.to(outputFile.getAbsolutePath(),
            outputFormatClass, IntWritable.class, Text.class);
    input.apply(write.withoutSharding());
    EvaluationResult res = SparkPipelineRunner.create().run(p);
    res.close();

    IntWritable key = new IntWritable();
    Text value = new Text();
    try (Reader reader = new Reader(new Configuration(), Reader.file(new Path(outputFile.toURI())))) {
        int i = 0;
        while (reader.next(key, value)) {
            assertEquals(i, key.get());
            assertEquals("value-" + i, value.toString());
            i++;
        }
    }
}

From source file:com.cloudera.impala.TestUdf.java

License:Apache License

public Text evaluate(Text a, Text b) {
    if (a == null || b == null)
        return null;
    return new Text(a.toString() + b.toString());
}

From source file:com.cloudera.knittingboar.io.TestSplitCalcs.java

License:Apache License

/**
 * /*w w w.j  av  a 2s .  c  om*/
 * - use the TextInputFormat.getSplits() to test pulling split info
 * @throws IOException 
 * 
 */
public void testGetSplits() throws IOException {

    TextInputFormat input = new TextInputFormat();

    JobConf job = new JobConf(defaultConf);
    Path file = new Path(workDir, "testGetSplits.txt");

    int tmp_file_size = 200000;

    long block_size = localFs.getDefaultBlockSize();

    System.out.println("default block size: " + (block_size / 1024 / 1024) + "MB");

    Writer writer = new OutputStreamWriter(localFs.create(file));
    try {
        for (int i = 0; i < tmp_file_size; i++) {
            writer.write(
                    "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 1, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 99");
            writer.write("\n");
        }
    } finally {
        writer.close();
    }

    System.out.println("file write complete");

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;

    //    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, file);

    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);
    LongWritable key = new LongWritable();
    Text value = new Text();

    int numSplits = 1;

    InputSplit[] splits = format.getSplits(job, numSplits);

    LOG.info("requested " + numSplits + " splits, splitting: got =        " + splits.length);

    assertEquals(2, splits.length);

    System.out.println("---- debug splits --------- ");

    for (int x = 0; x < splits.length; x++) {

        System.out.println("> Split [" + x + "]: " + splits[x].getLength() + ", " + splits[x].toString() + ", "
                + splits[x].getLocations()[0]);

        RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[x], job, reporter);
        try {
            int count = 0;
            while (reader.next(key, value)) {

                if (count == 0) {
                    System.out.println("first: " + value.toString());
                    assertTrue(value.toString().contains("a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p"));
                }

                count++;
            }

            System.out.println("last: " + value.toString());

            assertTrue(value.toString().contains("a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p"));

        } finally {
            reader.close();
        }

    } // for each split

}