Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:authordetect.input.SingleBookReader.java

private boolean isBookStart(Text line) {
    String lineString = line.toString();
    return lineString.toLowerCase().contains("start") && lineString.toLowerCase().contains("gutenberg");
}

From source file:average.AverageMapper.java

@Override
public void map(LongWritable _key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String TempString = value.toString();
    String[] array = TempString.split(",");
    String key = new String("");
    key = array[0];/*  www. j a  v a  2s  .  c om*/
    output.collect(new Text(key), new Text(array[1]));
}

From source file:average.AverageReducer.java

@Override
public void reduce(Text _key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    Text key = _key;

    if (key.toString().equals("0Student_Id") != true) {
        int frequencyForYear = 0;
        int f = 0;
        double av = 0, c = 0.0;
        String ans = "", t = "";

        while (values.hasNext()) {

            Text value = (Text) values.next();
            t = value.toString();//from  ww w .j  av  a  2 s .  c  o m

            frequencyForYear += Integer.parseInt(t);
            c++;
            // process value
        }

        av = frequencyForYear / c;
        ans = Double.toString(av);
        output.collect(key, new Text(ans));
    } else {
        output.collect(key, new Text("Average"));
    }

}

From source file:averagerating_youtube.AvgRating_CommCountMapper.java

@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {

    String[] fields = value.toString().split(",");
    String videoId = (fields[0]);
    if (!fields[6].isEmpty()) {
        this.v_rate = Float.parseFloat(fields[6]);
    } else {/*from  w w  w  .  ja  v  a2 s  .  c o m*/
        this.v_rate = 0;
    }
    video_name.set(videoId);
    outTuple.setComment_count(1);
    outTuple.setVideo_rating(this.v_rate);
    context.write(video_name, outTuple);

}

From source file:averagetemperature.AverageTemperatureMapper.java

@Override
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

    String[] line = value.toString().split(",");
    String datePart = line[1];// www. j  a va 2  s .  com
    String temp = line[10];

    if (StringUtils.isNumeric(temp))
        try {
            output.collect(new Text(datePart), new IntWritable(Integer.parseInt(temp)));
        } catch (NumberFormatException e) {
        }
    ;

}

From source file:azkaban.common.web.JsonSequenceFileViewer.java

License:Apache License

public void displaySequenceFile(SequenceFile.Reader reader, PrintWriter output, int startLine, int endLine)
        throws IOException {

    if (logger.isDebugEnabled())
        logger.debug("display json file");

    try {/*www. j  ava 2s.  c  o m*/
        BytesWritable keyWritable = new BytesWritable();
        BytesWritable valueWritable = new BytesWritable();
        Text keySchema = reader.getMetadata().get(new Text("key.schema"));
        Text valueSchema = reader.getMetadata().get(new Text("value.schema"));

        JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema.toString());
        JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema.toString());

        // skip lines before the start line
        for (int i = 1; i < startLine; i++)
            reader.next(keyWritable, valueWritable);

        // now actually output lines
        for (int i = startLine; i <= endLine; i++) {
            boolean readSomething = reader.next(keyWritable, valueWritable);
            if (!readSomething)
                break;
            output.write(safeToString(keySerializer.toObject(keyWritable.getBytes())));
            output.write("\t=>\t");
            output.write(safeToString(valueSerializer.toObject(valueWritable.getBytes())));
            output.write("\n");
            output.flush();
        }
    } finally {
        reader.close();
    }
}

From source file:azkaban.viewer.hdfs.JsonSequenceFileViewer.java

License:Apache License

public void displaySequenceFile(AzkabanSequenceFileReader.Reader reader, PrintWriter output, int startLine,
        int endLine) throws IOException {

    if (logger.isDebugEnabled()) {
        logger.debug("display json file");
    }// w  ww. j a  v  a  2s .  c  o  m

    BytesWritable keyWritable = new BytesWritable();
    BytesWritable valueWritable = new BytesWritable();
    Text keySchema = reader.getMetadata().get(new Text("key.schema"));
    Text valueSchema = reader.getMetadata().get(new Text("value.schema"));

    JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema.toString());
    JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema.toString());

    // skip lines before the start line
    for (int i = 1; i < startLine; i++) {
        reader.next(keyWritable, valueWritable);
    }

    // now actually output lines
    for (int i = startLine; i <= endLine; i++) {
        boolean readSomething = reader.next(keyWritable, valueWritable);
        if (!readSomething) {
            break;
        }
        output.write(safeToString(keySerializer.toObject(keyWritable.getBytes())));
        output.write("\t=>\t");
        output.write(safeToString(valueSerializer.toObject(valueWritable.getBytes())));
        output.write("\n");
        output.flush();
    }
}

From source file:babel.prep.datedcorpus.DatedLangFilesOutputFormat.java

License:Apache License

protected String generateFileNameForKeyValue(Text key, Text ver, String name) {
    String toks[] = key.toString().split(DatedCorpusGenMapper.DATE_LANG_SEP);

    if (toks == null || toks.length != 2) {
        return REJECTED_FILE;
    } else {/*from  w  w w.ja  v  a  2  s  .co  m*/
        Calendar cal = Calendar.getInstance();
        cal.setTimeInMillis(Long.parseLong(toks[1]));

        int year = cal.get(Calendar.YEAR);
        int month = cal.get(Calendar.MONTH) + 1;
        int day = cal.get(Calendar.DAY_OF_MONTH);

        if (year < 2000 || year > 2011) {
            return REJECTED_FILE;
        } else {
            return toks[0] + File.separator + year + File.separator + year + "-" + month + "-" + day
                    + EXTENSION;
        }
    }
}

From source file:babel.prep.datedcorpus.DatedLangFilesOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getBaseRecordWriter(final FileSystem fs, JobConf job, String name,
        final Progressable progress) throws IOException {
    final Path dumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

    // Get the old copy out of the way
    if (fs.exists(dumpFile)) {
        fs.delete(dumpFile, true);//w w w. j ava 2  s .c  om
    } else {
        fs.mkdirs(dumpFile.getParent());
    }

    return new RecordWriter<Text, Text>() {
        public synchronized void write(Text key, Text versText) throws IOException {
            try {
                BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
                        new FileOutputStream(new File(dumpFile.toUri()), true), DEFAULT_CHARSET));

                writer.write(versText.toString());
                writer.close();
            } catch (Exception e) {
                throw new RuntimeException("Error writing page versions: " + e.toString());
            }
        }

        public synchronized void close(Reporter reporter) throws IOException {
        }
    };
}

From source file:babel.prep.extract.PageExtReducer.java

License:Apache License

public void reduce(Text key, Iterator<NutchChunk> values, OutputCollector<Text, Page> output, Reporter reporter)
        throws IOException {
    // Create a new page (potentially containing multiple versions)
    Page page = new Page(key.toString(), values);
    int numVersions = page.numVersions();

    // Only care about it if we have at least one version
    if (numVersions > 0 && (page.pageURL().length() > 0))// && isBBCEnglish(page))
    {/*from w  w  w  . j a v a2 s  .c  o m*/
        NutchPageExtractor.Stats.incPages();
        NutchPageExtractor.Stats.incVersions(numVersions);

        output.collect(key, page);
    } else {
        NutchPageExtractor.Stats.incIgnoredPages();
    }
}