Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.m6d.filecrush.crush.CrushTest.java

License:Apache License

@Test
public void bucketing() throws Exception {
    File in = tmp.newFolder("in");

    Counters expectedCounters = new Counters();
    List<String> expectedBucketFiles = new ArrayList<String>();

    /*//  w ww.j  a va2s .c o  m
     * Create a hierarchy of directories. Directories are distinguished by a trailing slash in these comments.
     *
     *   1/
     *         1.1/
     *               file1 10 bytes
     *               file2 20 bytes
     *               file3 30 bytes
     *               file4 41 bytes
     *               file5 15 bytes
     *               file6 30 bytes
     *               file7   20 bytes
     *         1.2/
     *               file1 20 bytes
     *               file2 10 bytes
     *         1.3/
     *   2/
     *         file1 70 bytes
     *         file2 30 bytes
     *         file3 25 bytes
     *         file4 30 bytes
     *         file5 35 bytes
     *         2.1/
     *               file1 10 bytes
     *         2.2/
     *               file1 25 bytes
     *               file2 15 bytes
     *               file3 35 bytes
     *         2.3/
     *               file1 41 bytes
     *               file2 10 bytes
     *         2.4/
     *               2.4.1/
     *                     file1 100 bytes
     *                     file2   30 bytes
     *               2.4.2/
     *                     file1 20 bytes
     *                     file2 20 bytes
     *                     file3 10 bytes
     */

    /*
     * in contains 2 dirs and no files so it is skipped.
     *
     *    in/
     *          1/
     *          2/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/1");
    File dir2 = tmp.newFolder("in/2");

    /*
     * in/1 contains three dirs and no files so it is skipped.
     *
     *    in/
     *          1/
     *                1.1/
     *                1.2/
     *                1.3/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    File dir1_1 = tmp.newFolder("in/1/1.1");
    File dir1_2 = tmp.newFolder("in/1/1.2");
    tmp.newFolder("in/1/1.3");

    /*
     * in/2 contains five files and four dirs.
     *
     *    in/
     *          2/
     *               file1 70 bytes
     *               file2 30 bytes
     *               file3 25 bytes
     *               file4 30 bytes
     *               file5 35 bytes
     *                2.1/
     *                2.2/
     *                2.3/
     *                2.4/
     *
     *    0                  1                  2
     *    file5   35      file2 30      file4 30
     *                      file3 25
     *
     * Buckets 0 and 2 have a single file each so they are ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 5);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 3);

    File dir2_1 = tmp.newFolder("in/2/2.1");
    File dir2_2 = tmp.newFolder("in/2/2.2");
    File dir2_3 = tmp.newFolder("in/2/2.3");
    tmp.newFolder("in/2/2.4");

    createFile(dir2, "file1", 70);
    createFile(dir2, "file2", 30);
    createFile(dir2, "file3", 25);
    createFile(dir2, "file4", 30);
    createFile(dir2, "file5", 35);

    expectedBucketFiles
            .add(format("%s   %s", dir2.getAbsolutePath() + "-1", new File(dir2, "file2").getAbsolutePath()));
    expectedBucketFiles
            .add(format("%s   %s", dir2.getAbsolutePath() + "-1", new File(dir2, "file3").getAbsolutePath()));

    /*
     * in/1/1.1 contains seven files and no dirs.
     *
     *    in/
     *          1/
     *                1.1/
     *                     file1 10 bytes
     *                     file2 20 bytes
     *                     file3 30 bytes
     *                     file4 41 bytes
     *                     file5 15 bytes
     *                     file6 30 bytes
     *                     file7   20 bytes
     *
     *    0                  1                  2
     *    file3 30      file6 30      file2 20
     *    file5 15      file1 10      file7 20
     *
     * file4 is > 50 * 0.8 so it is ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 7);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 6);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir1_1, "file1", 10);
    createFile(dir1_1, "file2", 20);
    createFile(dir1_1, "file3", 30);
    createFile(dir1_1, "file4", 41);
    createFile(dir1_1, "file5", 15);
    createFile(dir1_1, "file6", 30);
    createFile(dir1_1, "file7", 20);

    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-0", new File(dir1_1, "file3").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-0", new File(dir1_1, "file5").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-1", new File(dir1_1, "file6").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-1", new File(dir1_1, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-2", new File(dir1_1, "file2").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_1.getAbsolutePath() + "-2", new File(dir1_1, "file7").getAbsolutePath()));

    /*
     * in/1/1.2 contains to files.
     *
     *    in/
     *          1/
     *                1.2/
     *                     file1 20 bytes
     *                     file2 10 bytes
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);

    createFile(dir1_2, "file1", 20);
    createFile(dir1_2, "file2", 10);

    expectedBucketFiles.add(
            format("%s   %s", dir1_2.getAbsolutePath() + "-0", new File(dir1_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir1_2.getAbsolutePath() + "-0", new File(dir1_2, "file2").getAbsolutePath()));

    /*
     * in/1/1.3 is empty.
     *
     *    in/
     *          1/
     *                1.3/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/1/1.3");

    /*
     * in/2/2.1 contains on file.
     *
     *    in/
     *          2/
     *                2.1/
     *                     file1 10 bytes
     *
     * Single file dirs are ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir2_1, "file1", 10);

    /*
     * in/2/2.2 contains three files.
     *
     *    in/
     *          2/
     *                2.2/
     *                     file1 25 bytes
     *                     file2 15 bytes
     *                     file3 35 bytes
     *
     *    0                  1
     *    file3 35      file1 25
     *                      file2 15
     *
     * Bucket 0 with a single file is ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 3);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1);

    createFile(dir2_2, "file1", 25);
    createFile(dir2_2, "file2", 15);
    createFile(dir2_2, "file3", 35);

    expectedBucketFiles.add(
            format("%s   %s", dir2_2.getAbsolutePath() + "-1", new File(dir2_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(
            format("%s   %s", dir2_2.getAbsolutePath() + "-1", new File(dir2_2, "file2").getAbsolutePath()));

    /*
     * in/2/2.3 contains 2 files.
     *
     *    in/
     *          2/
     *                2.3/
     *                     file1 41 bytes
     *                     file2 10 bytes
     *
     * file1 is too big and leaving file2 as a single file, which is also ignored.
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 2);

    createFile(dir2_3, "file1", 41);
    createFile(dir2_3, "file2", 10);

    /*
     * in/2/2.4 contains two sub directories and no files.
     *
     *    in/
     *          2/
     *               2.4/
     *                     2.4.1/
     *                     2.4.2/
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    tmp.newFolder("in/2/2.4");

    File dir2_4_1 = tmp.newFolder("in/2/2.4/2.4.1");
    File dir2_4_2 = tmp.newFolder("in/2/2.4/2.4.2");

    /*
     *    in/
     *          2/
     *               2.4/
     *                     2.4.1/
     *                           file1 100 bytes
     *                           file2   30 bytes
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2);
    expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 2);

    createFile(dir2_4_1, "file1", 100);
    createFile(dir2_4_1, "file2", 30);

    /*
     *    in/
     *          2/
     *               2.4/
     *                     2.4.2/
     *                           file1 20 bytes
     *                           file2 20 bytes
     *                           file3 10 bytes
     *   0
     *   file1 20
     *   file2 20
     *   file3 10
     */
    expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);
    expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

    expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 3);
    expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 3);

    createFile(dir2_4_2, "file1", 20);
    createFile(dir2_4_2, "file2", 20);
    createFile(dir2_4_2, "file3", 10);

    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file1").getAbsolutePath()));
    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file2").getAbsolutePath()));
    expectedBucketFiles.add(format("%s   %s", dir2_4_2.getAbsolutePath() + "-0",
            new File(dir2_4_2, "file3").getAbsolutePath()));

    Crush crush = new Crush();

    crush.setConf(job);
    crush.setFileSystem(fileSystem);

    /*
     * Call these in the same order that run() does.
     */
    crush.createJobConfAndParseArgs("--compress=none", "--max-file-blocks=1", in.getAbsolutePath(),
            new File(tmp.getRoot(), "out").getAbsolutePath(), "20101124171730");
    crush.writeDirs();

    /*
     * Verify bucket contents.
     */

    List<String> actualBucketFiles = new ArrayList<String>();

    Text key = new Text();
    Text value = new Text();

    Reader reader = new Reader(FileSystem.get(job), crush.getBucketFiles(), job);
    while (reader.next(key, value)) {
        actualBucketFiles.add(format("%s\t%s", key, value));
    }
    reader.close();

    Collections.sort(expectedBucketFiles);
    Collections.sort(actualBucketFiles);

    assertThat(actualBucketFiles, equalTo(expectedBucketFiles));

    /*
     * Verify the partition map.
     */
    Reader partitionMapReader = new Reader(FileSystem.get(job), crush.getPartitionMap(), job);

    IntWritable partNum = new IntWritable();

    Map<String, Integer> actualPartitions = new HashMap<String, Integer>();

    while (partitionMapReader.next(key, partNum)) {
        actualPartitions.put(key.toString(), partNum.get());
    }

    partitionMapReader.close();

    /*
     * These crush files need to allocated into 5 partitions:
     *
     * in/2-1                  55 bytes
     * in/1/1.1-0            45 bytes
     * in/1/1.1-2            40 bytes
     * in/1/1.1-1            40 bytes
     * in/1/1.2-0            30 bytes
     * in/2/2.2-1            40 bytes
     * in/2/2.4/2.4.2-0   50 bytes
     *
     *    0                     1                                 2                        3                        4
     *    in/2-1 55         in/2/2.4/2.4.2-0   50   in/1/1.1-0   45   in/1/1.1-2   40   in/1/1.1-1   40
     *                                                                                  in/2/2.2-1   40   in/1/1.2-0   39
     */
    Map<String, Integer> expectedPartitions = new HashMap<String, Integer>();

    //TODO: this may not be deterministic due to jvm/hashmap/filesystem
    expectedPartitions.put(dir2.getAbsolutePath() + "-1", 0);
    expectedPartitions.put(dir2_4_2.getAbsolutePath() + "-0", 1);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-0", 2);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-2", 4);
    expectedPartitions.put(dir2_2.getAbsolutePath() + "-1", 3);
    expectedPartitions.put(dir1_1.getAbsolutePath() + "-1", 3);
    expectedPartitions.put(dir1_2.getAbsolutePath() + "-0", 4);

    assertThat(actualPartitions, equalTo(expectedPartitions));

    /*
     * Verify counters.
     */
    Counters actualCounters = new Counters();

    DataInputStream countersStream = FileSystem.get(job).open(crush.getCounters());

    actualCounters.readFields(countersStream);

    countersStream.close();

    assertThat(actualCounters, equalTo(expectedCounters));
}

From source file:com.malsolo.hadoop.elephant.guide.MaxTemperatureMapper.java

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    String line = value.toString();

    String firstWord = line.substring(0, 7);
    //        System.out.printf("First word %s\n", firstWord);
    switch (firstWord) {
    case STATION:
        //                System.out.println("Skip first line");
        break;/*from  w  w  w  .j a  va 2s  .  co m*/
    case DOTS:
        //                System.out.println("Skip second line");
        break;
    default:
        String date = line.substring(102, 111);
        //                String temperature = line.substring(253, 258);
        //                System.out.printf("Processing %s with %s\n", date, temperature);
        int airTemperature;
        if (line.charAt(253) == '+') {
            airTemperature = Integer.parseInt(line.substring(254, 258).trim());
        } else {
            airTemperature = Integer.parseInt(line.substring(253, 258).trim());
        }
        //              String quality = line.substring(280, 281);
        if (airTemperature != MISSING_POSITIVE
                && airTemperature != MISSING_NEGATIVE /*&& quality.matches(QUALITY)*/) {
            context.write(new Text(date), new IntWritable(airTemperature));
        }
        break;
    }
}

From source file:com.mapr.db.utils.ImportCSV_MR.java

License:Apache License

public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    logger.info("Processing " + value.toString());
    System.out.println("Processing " + value.toString());
    DBDocument document = readAndImportCSV(value.toString());
    Value id = document.getId();/*w  w w .  j a  v  a2s .  c  o  m*/
    logger.info("ID: " + id.getString());
    System.out.println("ID: " + id.getString());
    context.write(id, document);

}

From source file:com.mapred.DepartmentMap.java

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    MRUtils.prepareDepartmentMap(value, outputMap);
    if (outputMap == null) {
        System.out.println("DepartmentMap >> bad row with value = " + value.toString());
        return;//from  w w w  .jav  a2  s. c o m
    }

    IntWritable department = new IntWritable(Integer.parseInt(outputMap.get(MRUtils.DEPARTMENT_ID).toString()));
    depKey.setDepartment(department);
    depKey.setHireDate(null);
    context.write(depKey, outputMap);
}

From source file:com.mapred.EmployeeMap.java

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    MRUtils.prepareEmployeeMap(value, outputMap);
    if (outputMap == null) {
        System.out.println("EmployeeMap >> bad row with value = " + value.toString());
        return;/*from  w ww .j  a va2  s  . c  o  m*/
    }

    IntWritable department = new IntWritable(Integer.parseInt(outputMap.get(MRUtils.DEPARTMENT_ID).toString()));
    depKey.setDepartment(department);
    depKey.setHireDate(new Text(outputMap.get(MRUtils.WORK_START).toString()));
    context.write(depKey, outputMap);
}

From source file:com.marcolotz.MRComponents.SerializerConverter.java

License:Creative Commons License

/***
 * This is a refactored manner to deserialize a String. Basically it creates
 * a new Text, reads the field and then return the new String;
 * /*from  www  .  ja va  2  s . c o m*/
 * @param dataInput
 * @return the string that was readen
 * @throws IOException
 */
public static String readString(DataInput dataInput) throws IOException {
    Text readenString = new Text();
    readenString.readFields(dataInput);
    return readenString.toString();
}

From source file:com.marklogic.contentpump.RDFInputFormat.java

License:Apache License

protected String getServerVersion(TaskAttemptContext context) throws IOException {
    //Restores the object from the configuration.
    Configuration conf = context.getConfiguration();
    Text version = DefaultStringifier.load(conf, ConfigConstants.CONF_ML_VERSION, Text.class);
    return version.toString();
}

From source file:com.marklogic.contentpump.RDFWritable.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*w w  w  .  j a  v a  2 s .  co m*/
public void readFields(DataInput in) throws IOException {
    byte hasCollection = in.readByte();
    if (hasCollection != 0) {
        Text t = new Text();
        t.readFields(in);
        graphUri = t.toString();
    }
    byte valueType = in.readByte();
    switch (valueType) {
    case 0:
        value = (VALUE) new Text();
        ((Text) value).readFields(in);
        break;
    case 1:
        value = (VALUE) new MarkLogicNode();
        ((MarkLogicNode) value).readFields(in);
        break;
    case 2:
        value = (VALUE) new BytesWritable();
        ((BytesWritable) value).readFields(in);
        break;
    default:
        throw new IOException("incorrect type");
    }
    type = valueType;
    byte hasPerms = in.readByte();
    if (hasPerms != 0) {
        int length = hasPerms;
        permissions = new ContentPermission[length];
        for (int i = 0; i < length; i++) {
            Text t = new Text();
            t.readFields(in);
            String role = t.toString();
            t.readFields(in);
            String perm = t.toString();
            ContentCapability capability = null;
            if (perm.equalsIgnoreCase(ContentCapability.READ.toString())) {
                capability = ContentCapability.READ;
            } else if (perm.equalsIgnoreCase(ContentCapability.EXECUTE.toString())) {
                capability = ContentCapability.EXECUTE;
            } else if (perm.equalsIgnoreCase(ContentCapability.INSERT.toString())) {
                capability = ContentCapability.INSERT;
            } else if (perm.equalsIgnoreCase(ContentCapability.UPDATE.toString())) {
                capability = ContentCapability.UPDATE;
            } else {
                LOG.error("Illegal permission: " + perm);
            }
            permissions[i] = new ContentPermission(capability, role);
        }

    }
}

From source file:com.marklogic.contentpump.utilities.TransformHelper.java

License:Apache License

private static String getTypeFromMap(String uri) {
    int idx = uri.lastIndexOf(".");
    Text format = null;
    if (idx != -1) {
        String suff = uri.substring(idx + 1, uri.length());
        if (suff.equalsIgnoreCase("xml"))
            return "xml";
        format = (Text) TransformOutputFormat.mimetypeMap.get(new Text(suff));
    }//ww  w. j  a va2s .co  m
    if (format == null) {
        return "binary";
    } else {
        return format.toString();
    }
}

From source file:com.me.neu.Popular_question.Mapper1.java

@Override
public void map(LongWritable k1, Text v1, OutputCollector<Text, Text> output, Reporter rprtr)
        throws IOException {
    String[] line = v1.toString().split(",");
    String[] tagNames = line[1].split(">");
    String favCount = line[2];//from  w  w w  .j a v  a 2s  .  c o m
    for (String tagName : tagNames) {
        output.collect(new Text(tagName), new Text(line[0] + "," + favCount));
    }

}