Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

@Test
public void testRun() {
    try {/*from   ww  w.  j a v  a  2s  .  c  om*/
        deleteState();
        createSourceData();

        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StatusReporter reporter = new StubStatusReporter();
        InMemoryWriter writer = new InMemoryWriter();
        Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter,
                writer);
        copyMapper.setup(context);

        for (Path path : pathList) {
            copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
                    fs.getFileStatus(path), context);
        }
        // Check that the maps worked.
        for (Path path : pathList) {
            final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
            Assert.assertTrue(fs.exists(targetPath));
            Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path));
            Assert.assertEquals(fs.getFileStatus(path).getReplication(),
                    fs.getFileStatus(targetPath).getReplication());
            Assert.assertEquals(fs.getFileStatus(path).getBlockSize(),
                    fs.getFileStatus(targetPath).getBlockSize());
            Assert.assertTrue(
                    !fs.isFile(targetPath) || fs.getFileChecksum(targetPath).equals(fs.getFileChecksum(path)));
        }

        Assert.assertEquals(pathList.size(), reporter.getCounter(CopyMapper.Counter.PATHS_COPIED).getValue());
        // Here file is compressed file. So, we should compare the file length
        // with the number of bytes read
        long totalSize = 0;
        for (Path path : pathList) {
            totalSize += fs.getFileStatus(path).getLen();
        }
        Assert.assertEquals(totalSize, reporter.getCounter(CopyMapper.Counter.BYTES_COPIED).getValue());
        long totalCounterValue = 0;
        for (Text value : writer.values()) {
            String tmp[] = value.toString().split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
            Assert.assertEquals(4, tmp.length);
            Long numOfMsgs = Long.parseLong(tmp[3]);
            totalCounterValue += numOfMsgs;
        }
        Assert.assertEquals(nFiles * NUMBER_OF_MESSAGES_PER_FILE, totalCounterValue);
        testCopyingExistingFiles(fs, copyMapper, context);
    } catch (Exception e) {
        LOG.error("Unexpected exception: ", e);
        Assert.assertTrue(false);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

private void doTestIgnoreFailures(boolean ignoreFailures) {
    try {/*from  www.j a v  a 2 s. com*/
        deleteState();
        createSourceData();

        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StatusReporter reporter = new StubStatusReporter();
        InMemoryWriter writer = new InMemoryWriter();
        Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter,
                writer);

        Configuration configuration = context.getConfiguration();
        configuration.setBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), ignoreFailures);
        configuration.setBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), true);
        configuration.setBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), true);
        copyMapper.setup(context);

        for (Path path : pathList) {
            final FileStatus fileStatus = fs.getFileStatus(path);
            if (!fileStatus.isDir()) {
                fs.delete(path, true);
                copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), fileStatus,
                        context);
            }
        }
        if (ignoreFailures) {
            for (Text value : writer.values()) {
                Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("FAIL:"));
            }
        }
        Assert.assertTrue("There should have been an exception.", ignoreFailures);
    } catch (Exception e) {
        Assert.assertTrue("Unexpected exception: " + e.getMessage(), !ignoreFailures);
        e.printStackTrace();
    }
}

From source file:com.inmobi.conduit.distcp.tools.TestCopyListing.java

License:Apache License

@Test
public void testBuildListingForSingleFile() {
    FileSystem fs = null;/*from w  ww  . ja v a2 s.c  o  m*/
    String testRootString = "/singleFileListing";
    Path testRoot = new Path(testRootString);
    SequenceFile.Reader reader = null;
    try {
        fs = FileSystem.get(getConf());
        if (fs.exists(testRoot))
            TestDistCpUtils.delete(fs, testRootString);

        Path sourceFile = new Path(testRoot, "/source/foo/bar/source.txt");
        Path decoyFile = new Path(testRoot, "/target/moo/source.txt");
        Path targetFile = new Path(testRoot, "/target/moo/target.txt");

        TestDistCpUtils.createFile(fs, sourceFile.toString());
        TestDistCpUtils.createFile(fs, decoyFile.toString());
        TestDistCpUtils.createFile(fs, targetFile.toString());

        List<Path> srcPaths = new ArrayList<Path>();
        srcPaths.add(sourceFile);

        DistCpOptions options = new DistCpOptions(srcPaths, targetFile);
        CopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS);

        final Path listFile = new Path(testRoot, "/tmp/fileList.seq");
        listing.buildListing(listFile, options);

        reader = new SequenceFile.Reader(fs, listFile, getConf());
        FileStatus fileStatus = new FileStatus();
        Text relativePath = new Text();
        Assert.assertTrue(reader.next(relativePath, fileStatus));
        Assert.assertTrue(relativePath.toString().equals(""));
    } catch (Exception e) {
        Assert.fail("Unexpected exception encountered.");
        LOG.error("Unexpected exception: ", e);
    } finally {
        TestDistCpUtils.delete(fs, testRootString);
        IOUtils.closeStream(reader);
    }
}

From source file:com.inmobi.conduit.distcp.tools.TestFileBasedCopyListing.java

License:Apache License

private void checkResult(Path listFile, int count) throws IOException {
    if (count == 0) {
        return;// w w  w  .j  ava  2 s  . c o m
    }

    int recCount = 0;
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, listFile, config);
    try {
        Text relPath = new Text();
        FileStatus fileStatus = new FileStatus();
        while (reader.next(relPath, fileStatus)) {
            Assert.assertEquals(fileStatus.getPath().toUri().getPath(), map.get(relPath.toString()));
            recCount++;
        }
    } finally {
        IOUtils.closeStream(reader);
    }
    Assert.assertEquals(recCount, count);
}

From source file:com.inmobi.conduit.distcp.tools.TestGlobbedCopyListing.java

License:Apache License

private void verifyContents(Path listingPath) throws Exception {
    SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem(), listingPath,
            new Configuration());
    Text key = new Text();
    FileStatus value = new FileStatus();
    Map<String, String> actualValues = new HashMap<String, String>();
    while (reader.next(key, value)) {
        actualValues.put(value.getPath().toString(), key.toString());
    }//ww w  .  j  ava 2  s  . c o m

    Assert.assertEquals(expectedValues.size(), actualValues.size());
    for (Map.Entry<String, String> entry : actualValues.entrySet()) {
        Assert.assertEquals(entry.getValue(), expectedValues.get(entry.getKey()));
    }
}

From source file:com.inmobi.conduit.local.CopyMapper.java

License:Apache License

@Override
public void map(Text key, FileStatus value, Context context) throws IOException, InterruptedException {
    Path src = value.getPath();/* w  w w.j a  v a 2s  . c o m*/
    String dest = key.toString();
    String collector = src.getParent().getName();
    String category = src.getParent().getParent().getName();
    Map<Long, Long> received = null;
    if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) {
        received = new HashMap<Long, Long>();
    }
    Configuration srcConf = new Configuration();
    srcConf.set(FS_DEFAULT_NAME_KEY, context.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY));

    FileSystem fs = FileSystem.get(srcConf);
    Path target = getTempPath(context, src, category, collector);
    if (FileUtil.gzip(src, target, srcConf, received)) {
        LOG.info("File " + src + " is empty hence returning without compressing");
        return;
    }
    // move to final destination
    fs.mkdirs(new Path(dest).makeQualified(fs));
    String destnFilename = collector + "-" + src.getName() + ".gz";
    Path destPath = new Path(dest + File.separator + destnFilename);
    LOG.info("Renaming file " + target + " to " + destPath);
    fs.rename(target, destPath);
    if (received != null) {

        for (Entry<Long, Long> entry : received.entrySet()) {
            String counterNameValue = getCounterNameValue(category, destnFilename, entry.getKey(),
                    entry.getValue());
            context.write(NullWritable.get(), new Text(counterNameValue));
        }
    }

}

From source file:com.inmobi.databus.local.CopyMapper.java

License:Apache License

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    Path src = new Path(key.toString());
    String dest = value.toString();
    String collector = src.getParent().getName();
    String category = src.getParent().getParent().getName();

    FileSystem fs = FileSystem.get(context.getConfiguration());
    Path target = getTempPath(context, src, category, collector);
    FileUtil.gzip(src, target, context.getConfiguration());
    // move to final destination
    fs.mkdirs(new Path(dest).makeQualified(fs));
    Path destPath = new Path(dest + File.separator + collector + "-" + src.getName() + ".gz");
    LOG.info("Renaming file " + target + " to " + destPath);
    fs.rename(target, destPath);/*from   www  .j  av  a  2 s  .  com*/

}

From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.HashIdMapper.java

License:Open Source License

@Override
public void map(LongWritable key, Text val, OutputCollector<IntWritable, Text> out, Reporter arg3)
        throws IOException {
    if (graphparser.isVertexData(val.toString())) {
        out.collect(new IntWritable(curId), new Text(val.toString()));
        ++curId;//ww w.j a  va 2 s .  c  o m
    }
}

From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.SortDictMapper.java

License:Open Source License

@Override
public void map(LongWritable key, Text val, OutputCollector<IntWritable, Text> out, Reporter reporter)
        throws IOException {
    String line = val.toString();
    StringTokenizer tokenizer = new StringTokenizer(line);

    try {//from w w  w  .  j a  v a2 s  . c o m
        String vid = tokenizer.nextToken();
        if (hashRawVid) { // partition by old vid
            Object rawId = vidparser.getValue(tokenizer.nextToken());
            int hash = rawId.hashCode() % numChunks;
            if (hash < 0)
                hash += numChunks; // resolving negative hashcode
            out.collect(new IntWritable(hash), val);
        } else { // partition by new vid
            int hash = Long.valueOf(vid).hashCode() % numChunks;
            out.collect(new IntWritable(hash), val);
        }
    } catch (NoSuchElementException e) {
        e.printStackTrace();
        LOG.error("Error parsing vertex dictionary: " + val.toString());
    }

}

From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.TransEdgeMapper.java

License:Open Source License

@Override
public void map(LongWritable key, Text value, OutputCollector<IntWritable, Text> out, Reporter reporter)
        throws IOException {
    if (!graphparser.isEdgeData(value.toString()))
        return;/*from w  w  w  . j  av a2s.  co  m*/

    Edge<VidType, ?> e = graphparser.parseEdge(value.toString(), vidparser, edataparser);
    int part = e.source().hashCode() % numChunks;
    if (part < 0)
        part += numChunks;
    if (part != dictionaryId) {
        dictionaryId = part;
        loadDictionary();
    }

    if (dict.containsKey(e.source())) {
        long srcId = dict.get(e.source());
        int targetHash = e.target().hashCode() % numChunks;
        if (targetHash < 0)
            targetHash += numChunks;
        Text output = new Text(srcId + "\t" + e.target().toString() + "\t" + e.EdgeData().toString());
        out.collect(new IntWritable(targetHash), output);
    } else {
        LOG.error("TransEdgeMapper: Cannot find key " + e.source().toString());
        LOG.error("Line: " + value.toString());
    }
}