Example usage for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString()

Source Link

Document

Convert text back to string

Usage

From source file:com.googlecode.hive.serde.FixedLengthAndDelimitedSerde.java

License:Apache License

@Override
public Object deserialize(Writable blob) throws SerDeException {

    if (inputFormatString == null) {
        throw new SerDeException("This table does not have serde property \"" + INPUT_FORMAT_STRING + "\"!");
    }//from  w ww . j a  v a2 s . com
    Text rowText = (Text) blob;
    Map<Integer, String> columnValues = getColumnValues(rowText.toString());

    // If do not match, ignore the line, return a row with all nulls.
    if (columnValues.keySet().size() != numColumns) {
        unmatchedRows++;
        if (unmatchedRows >= nextUnmatchedRows) {
            nextUnmatchedRows = getNextNumberToDisplay(nextUnmatchedRows);
            // Report the row
            LOG.warn("" + unmatchedRows + " unmatched rows are found: " + rowText);
        }
        return null;
    }

    // Otherwise, return the row.
    for (int c = 0; c < numColumns; c++) {
        try {
            row.set(c, columnValues.get(c));
        } catch (RuntimeException e) {
            partialMatchedRows++;
            if (partialMatchedRows >= nextPartialMatchedRows) {
                nextPartialMatchedRows = getNextNumberToDisplay(nextPartialMatchedRows);
                // Report the row
                LOG.warn("" + partialMatchedRows + " partially unmatched rows are found, "
                        + " cannot find column number " + c + ": " + rowText);
            }
            row.set(c, null);
        }
    }
    return row;
}

From source file:com.gsvic.csmr.CSMRMapper.java

License:Apache License

@Override
public void map(Text key, VectorWritable value, Context context) throws IOException, InterruptedException {
    DocumentWritable p = new DocumentWritable(new Text(key.toString()), new VectorWritable(value.get()));
    context.write(new IntWritable(1), p);

}

From source file:com.gsvic.csmr.DocumentWritable.java

License:Apache License

public DocumentWritable(Text key, VectorWritable value) {
    this.key = new Text(key.toString());
    this.value = new VectorWritable(value.get());
}

From source file:com.gsvic.csmr.io.InputData.java

License:Apache License

/**
 * Reads a Vectorized Text File, tfidf vectors/tf vectors
 * @param conf//from   w  ww  . j av a 2  s.  c o m
 * @param input
 * @return Returns the vectorized text file in a HashMap
 * @throws IOException 
 */
public static HashMap<Text, VectorWritable> vectorizedTextReader(Configuration conf, Path input)
        throws IOException {

    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Reader reader;
    reader = new SequenceFile.Reader(fs, input, conf);

    HashMap<Text, VectorWritable> dcf = new HashMap<>();
    Text key = new Text();
    VectorWritable value = new VectorWritable();

    while (reader.next(key, value)) {
        dcf.put(new Text(key.toString()), new VectorWritable(value.get()));
    }

    return dcf;
}

From source file:com.hadoop.examples.anagrams.AnagramReducer.java

License:Apache License

public void reduce(Text anagramKey, Iterator<Text> anagramValues, OutputCollector<Text, Text> results,
        Reporter reporter) throws IOException {
    String output = "";
    while (anagramValues.hasNext()) {
        Text anagam = anagramValues.next();
        output = output + anagam.toString() + "~";
    }/*ww  w .  j a  va  2s . c  o m*/
    StringTokenizer outputTokenizer = new StringTokenizer(output, "~");
    if (outputTokenizer.countTokens() >= 2) {
        output = output.replace("~", ",");
        outputKey.set(anagramKey.toString());
        outputValue.set(output);
        results.collect(outputKey, outputValue);
    }
}

From source file:com.hadoop.examples.geolocation.GeoLocationMapper.java

License:Apache License

public void map(LongWritable key, Text value, OutputCollector<Text, Text> outputCollector, Reporter reporter)
        throws IOException {

    String dataRow = value.toString();

    // since these are tab seperated files lets tokenize on tab
    StringTokenizer dataTokenizer = new StringTokenizer(dataRow, "\t");
    String articleName = dataTokenizer.nextToken();
    String pointType = dataTokenizer.nextToken();
    String geoPoint = dataTokenizer.nextToken();
    // we know that this data row is a GEO RSS type point.
    if (GEO_RSS_URI.equals(pointType)) {
        // now we process the GEO point data.
        StringTokenizer st = new StringTokenizer(geoPoint, " ");
        String strLat = st.nextToken();
        String strLong = st.nextToken();
        double lat = Double.parseDouble(strLat);
        double lang = Double.parseDouble(strLong);
        long roundedLat = Math.round(lat);
        long roundedLong = Math.round(lang);
        String locationKey = "(" + String.valueOf(roundedLat) + "," + String.valueOf(roundedLong) + ")";
        String locationName = URLDecoder.decode(articleName, "UTF-8");
        locationName = locationName.replace("_", " ");
        locationName = locationName + ":(" + lat + "," + lang + ")";
        geoLocationKey.set(locationKey);
        geoLocationName.set(locationName);
        outputCollector.collect(geoLocationKey, geoLocationName);
    }//  ww  w .  j  a  v  a  2  s  . c om

}

From source file:com.hadoop.examples.geolocation.GeoLocationReducer.java

License:Apache License

public void reduce(Text geoLocationKey, Iterator<Text> geoLocationValues, OutputCollector<Text, Text> results,
        Reporter reporter) throws IOException {
    // in this case the reducer just creates a list so that the data can
    // used later
    String outputText = "";
    while (geoLocationValues.hasNext()) {
        Text locationName = geoLocationValues.next();
        outputText = outputText + locationName.toString() + " ,";
    }/* w  w  w  . j  a v  a 2  s.co  m*/
    outputKey.set(geoLocationKey.toString());
    outputValue.set(outputText);
    results.collect(outputKey, outputValue);
}

From source file:com.hadoop.oozie.examples.DemoMapper.java

License:Open Source License

public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {
    String line = value.toString();
    StringTokenizer tokenizer = new StringTokenizer(line);
    while (tokenizer.hasMoreTokens()) {
        word.set(tokenizer.nextToken());
        output.collect(word, one);//from w w  w.j  a v  a  2 s . c o m
    }
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

private void cloneOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    /*/*  w ww .  j  a va 2  s  .com*/
     * Initialize to empty list, in which case swap() will be a no-op. The reference is then replaced with a real list, which is
     * used in the subsequent iterations.
     */
    List<Path> crushInput = emptyList();

    Text srcFile = new Text();
    Text crushOut = new Text();
    Text prevCrushOut = new Text();

    for (FileStatus partFile : listStatus) {
        Path path = partFile.getPath();

        Reader reader = new Reader(fs, path, fs.getConf());

        try {
            while (reader.next(srcFile, crushOut)) {
                if (!crushOut.equals(prevCrushOut)) {
                    swap(crushInput, prevCrushOut.toString());

                    prevCrushOut.set(crushOut);
                    crushInput = new LinkedList<Path>();
                }

                crushInput.add(new Path(srcFile.toString()));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn("Trapped exception when closing " + path, e);
            }
        }

        swap(crushInput, prevCrushOut.toString());
    }
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

/**
 * Moves the skipped files to the output directory. Called when operation in normal (non-clone) mode.
 *//*w  ww.  j  a  v  a2  s .  c  om*/
private void moveOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    Text srcFile = new Text();
    Text crushOut = new Text();

    Set<String> crushOutputFiles = new HashSet<String>(nBuckets);

    for (FileStatus partFile : listStatus) {
        Path path = partFile.getPath();

        Reader reader = new Reader(fs, path, fs.getConf());

        try {
            while (reader.next(srcFile, crushOut)) {
                crushOutputFiles.add(new Path(crushOut.toString()).toUri().getPath());
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn("Trapped exception when closing " + path, e);
            }
        }
    }

    assert crushOutputFiles.size() == nBuckets;

    /*
     * The crushoutput files will appear in a subdirectory of the output directory. The subdirectory will be the full path of the
     * input directory that was crushed. E.g.
     *
     * Crush input:
     * /user/me/input/dir1/file1
     * /user/me/input/dir1/file2
     * /user/me/input/dir2/file3
     * /user/me/input/dir2/file4
     * /user/me/input/dir3/dir4/file5
     * /user/me/input/dir3/dir4/file6
     *
     * Crush output:
     * /user/me/output/user/me/input/dir1/crushed_file ...
     * /user/me/output/user/me/input/dir2/crushed_file ...
     * /user/me/output/user/me/input/dir2/dir3/dir4/crushed_file ...
     *
     * We need to collapse this down to:
     * /user/me/output/dir1/crushed_file ...
     * /user/me/output/dir2/crushed_file ...
     * /user/me/output/dir2/dir3/dir4/crushed_file ...
     */
    String srcDirName = fs.makeQualified(srcDir).toUri().getPath();

    String destName = fs.makeQualified(dest).toUri().getPath();
    String partToReplace = fs.makeQualified(outDir).toUri().getPath() + "/crush" + srcDirName;

    print(Verbosity.INFO, "\n\nCopying crush files to " + destName);

    for (String crushOutputFile : crushOutputFiles) {
        Path srcPath = new Path(crushOutputFile);
        Path destPath = new Path(destName + crushOutputFile.substring(partToReplace.length())).getParent();

        rename(srcPath, destPath, null);
    }

    print(Verbosity.INFO, "\n\nMoving skipped files to " + destName);

    /*
     * Don't forget to move the files that were not crushed to the output dir so that the output dir has all the data that was in
     * the input dir, the difference being there are fewer files in the output dir.
     */
    for (String name : skippedFiles) {
        Path srcPath = new Path(name);
        Path destPath = new Path(destName + name.substring(srcDirName.length())).getParent();

        rename(srcPath, destPath, null);
    }
}