Example usage for org.apache.hadoop.io Text compareTo

List of usage examples for org.apache.hadoop.io Text compareTo

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text compareTo.

Prototype

@Override
public int compareTo(BinaryComparable other) 

Source Link

Document

Compare bytes from {#getBytes()}.

Usage

From source file:boa.datagen.SeqSortMerge.java

License:Apache License

private static int min(Text[] keys) {
    int index = 0;
    Text min = keys[0];/*from www  . ja  v a 2  s.  c  o m*/
    for (int i = 1; i < keys.length; i++) {
        Text key = keys[i];
        if (!key.toString().isEmpty() && key.compareTo(min) < 0) {
            index = i;
            min = key;
        }
    }
    return index;
}

From source file:cienciaCelularMR.KeyMcell.java

@Override
public int compareTo(Object o) {
    KeyMcell sentiment = (KeyMcell) o;//  w  w w  . j a  v a2  s. c  o  m
    Text thisValue = this.idUsuario;
    Text thatValue = sentiment.idUsuario;

    return this.equals(o) ? 0 : (thatValue.compareTo(thisValue) == 0 ? -1 : 1);
}

From source file:com.alexholmes.hadooputils.sort.TextArrayWritable.java

License:Apache License

public int compareTo(TextArrayWritable o) {
    Writable[] theseTexts = get();/* w  w w .jav  a2 s .com*/
    Writable[] thoseTexts = o.get();
    int len = Math.min(theseTexts.length, thoseTexts.length);
    for (int i = 0; i < len; i++) {
        Text thisText = (Text) theseTexts[i];
        Text thatText = (Text) thoseTexts[i];
        int comp = thisText.compareTo(thatText);
        if (comp != 0) {
            return comp;
        }
    }
    return 0;
}

From source file:com.facebook.presto.accumulo.AccumuloClient.java

License:Apache License

/**
 * Gets the TabletServer hostname for where the given key is located in the given table
 *
 * @param table Fully-qualified table name
 * @param key Key to locate//from   w w  w .ja v a2s  .c  o m
 * @return The tablet location, or DUMMY_LOCATION if an error occurs
 */
private Optional<String> getTabletLocation(String table, Key key) {
    try {
        // Get the Accumulo table ID so we can scan some fun stuff
        String tableId = connector.tableOperations().tableIdMap().get(table);

        // Create our scanner against the metadata table, fetching 'loc' family
        Scanner scanner = connector.createScanner("accumulo.metadata", auths);
        scanner.fetchColumnFamily(new Text("loc"));

        // Set the scan range to just this table, from the table ID to the default tablet
        // row, which is the last listed tablet
        Key defaultTabletRow = new Key(tableId + '<');
        Key start = new Key(tableId);
        Key end = defaultTabletRow.followingKey(PartialKey.ROW);
        scanner.setRange(new Range(start, end));

        Optional<String> location = Optional.empty();
        if (key == null) {
            // if the key is null, then it is -inf, so get first tablet location
            Iterator<Entry<Key, Value>> iter = scanner.iterator();
            if (iter.hasNext()) {
                location = Optional.of(iter.next().getValue().toString());
            }
        } else {
            // Else, we will need to scan through the tablet location data and find the location

            // Create some text objects to do comparison for what we are looking for
            Text splitCompareKey = new Text();
            key.getRow(splitCompareKey);
            Text scannedCompareKey = new Text();

            // Scan the table!
            for (Entry<Key, Value> entry : scanner) {
                // Get the bytes of the key
                byte[] keyBytes = entry.getKey().getRow().copyBytes();

                // If the last byte is <, then we have hit the default tablet, so use this location
                if (keyBytes[keyBytes.length - 1] == '<') {
                    location = Optional.of(entry.getValue().toString());
                    break;
                } else {
                    // Chop off some magic nonsense
                    scannedCompareKey.set(keyBytes, 3, keyBytes.length - 3);

                    // Compare the keys, moving along the tablets until the location is found
                    if (scannedCompareKey.getLength() > 0) {
                        int compareTo = splitCompareKey.compareTo(scannedCompareKey);
                        if (compareTo <= 0) {
                            location = Optional.of(entry.getValue().toString());
                        } else {
                            // all future tablets will be greater than this key
                            break;
                        }
                    }
                }
            }
            scanner.close();
        }

        // If we were unable to find the location for some reason, return the default tablet
        // location
        return location.isPresent() ? location : getDefaultTabletLocation(table);
    } catch (Exception e) {
        // Swallow this exception so the query does not fail due to being unable
        // to locate the tablet server for the provided Key.
        // This is purely an optimization, but we will want to log the error.
        LOG.error("Failed to get tablet location, returning dummy location", e);
        return Optional.empty();
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void deleteMissing(Configuration conf) throws IOException {
    LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source");

    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
    CopyListing target = new GlobbedCopyListing(conf, null);

    List<Path> targets = new ArrayList<Path>(1);
    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
    targets.add(targetFinalPath);/*  w  w  w.j ava2 s  . c o  m*/
    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));

    target.buildListing(targetListing, options);
    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

    SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf);
    SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf);

    long deletedEntries = 0;
    try {
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();
        FileStatus trgtFileStatus = new FileStatus();
        Text trgtRelPath = new Text();

        FileSystem targetFS = targetFinalPath.getFileSystem(conf);
        boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
        while (targetReader.next(trgtRelPath, trgtFileStatus)) {
            while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
                srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
            }

            if (srcAvailable && trgtRelPath.equals(srcRelPath))
                continue;

            boolean result = (!targetFS.exists(trgtFileStatus.getPath())
                    || targetFS.delete(trgtFileStatus.getPath(), true));
            if (result) {
                LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
                deletedEntries++;
            } else {
                throw new IOException("Unable to delete " + trgtFileStatus.getPath());
            }
            HadoopCompat.progress(taskAttemptContext);
            HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. ["
                    + targetReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
        IOUtils.closeStream(targetReader);
    }
    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}

From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java

License:Apache License

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    for (Text t : keyList) {
        if (key.compareTo(t) < 0) {
            Long v = resultMap.get(t);
            long length = key.getLength() + value.getLength();
            v += length;/* w  w w  .j  av  a 2s  . co m*/
            resultMap.put(t, v);
            break;
        }
    }
}

From source file:com.marcolotz.MRComponents.KeyStructureWritable.java

License:Creative Commons License

@Override
public int compareTo(KeyStructureWritable comparedKeyStruct) {

    Text seriesInstanceHere = new Text(this.getSeriesInstanceUID());
    Text seriesInstanceCompared = new Text(comparedKeyStruct.getSeriesInstanceUID());

    return seriesInstanceHere.compareTo(seriesInstanceCompared);
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs,
        Path jobdir, JobConf jobconf, Configuration conf) throws IOException {
    if (!dstroot.isDir()) {
        throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
    }//from w w w. j a  va2s  .  c  o  m

    //write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
            FileStatus.class, SequenceFile.CompressionType.NONE);
    try {
        //do lsr to get all file statuses in dstroot
        final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
        for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
            final FileStatus status = lsrstack.pop();
            if (status.isDir()) {
                for (FileStatus child : dstfs.listStatus(status.getPath())) {
                    String relative = makeRelative(dstroot.getPath(), child.getPath());
                    writer.append(new Text(relative), child);
                    lsrstack.push(child);
                }
            }
        }
    } finally {
        checkAndClose(writer);
    }

    //sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
            FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    //compare lsr list and dst list
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
        lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
        dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

        //compare sorted lsr list and sorted dst list
        final Text lsrpath = new Text();
        final FileStatus lsrstatus = new FileStatus();
        final Text dstpath = new Text();
        final Text dstfrom = new Text();
        final FsShell shell = new FsShell(conf);
        final String[] shellargs = { "-rmr", null };

        boolean hasnext = dstin.next(dstpath, dstfrom);
        for (; lsrin.next(lsrpath, lsrstatus);) {
            int dst_cmp_lsr = dstpath.compareTo(lsrpath);
            for (; hasnext && dst_cmp_lsr < 0;) {
                hasnext = dstin.next(dstpath, dstfrom);
                dst_cmp_lsr = dstpath.compareTo(lsrpath);
            }

            if (dst_cmp_lsr == 0) {
                //lsrpath exists in dst, skip it
                hasnext = dstin.next(dstpath, dstfrom);
            } else {
                //lsrpath does not exist, delete it
                String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                    shellargs[1] = s;
                    int r = 0;
                    try {
                        r = shell.run(shellargs);
                    } catch (Exception e) {
                        throw new IOException("Exception from shell.", e);
                    }
                    if (r != 0) {
                        throw new IOException(
                                "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
                    }
                }
            }
        }
    } finally {
        checkAndClose(lsrin);
        checkAndClose(dstin);
    }
}

From source file:com.scaleunlimited.cascading.DistCp.java

License:Apache License

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs,
        Path jobdir, JobConf jobconf, Configuration conf) throws IOException {
    if (!dstroot.isDir()) {
        throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
    }//  ww  w .  j a  v  a  2 s  .com

    //write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
            FileStatus.class, SequenceFile.CompressionType.NONE);
    try {
        //do lsr to get all file statuses in dstroot
        final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
        for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
            final FileStatus status = lsrstack.pop();
            if (status.isDir()) {
                for (FileStatus child : dstfs.listStatus(status.getPath())) {
                    String relative = makeRelative(dstroot.getPath(), child.getPath());
                    writer.append(new Text(relative), child);
                    lsrstack.push(child);
                }
            }
        }
    } finally {
        checkAndClose(writer);
    }

    //sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
            FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    //compare lsr list and dst list  
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
        lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
        dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

        //compare sorted lsr list and sorted dst list
        final Text lsrpath = new Text();
        final FileStatus lsrstatus = new FileStatus();
        final Text dstpath = new Text();
        final Text dstfrom = new Text();
        final FsShell shell = new FsShell(conf);
        final String[] shellargs = { "-rmr", null };

        boolean hasnext = dstin.next(dstpath, dstfrom);
        for (; lsrin.next(lsrpath, lsrstatus);) {
            int dst_cmp_lsr = dstpath.compareTo(lsrpath);
            for (; hasnext && dst_cmp_lsr < 0;) {
                hasnext = dstin.next(dstpath, dstfrom);
                dst_cmp_lsr = dstpath.compareTo(lsrpath);
            }

            if (dst_cmp_lsr == 0) {
                //lsrpath exists in dst, skip it
                hasnext = dstin.next(dstpath, dstfrom);
            } else {
                //lsrpath does not exist, delete it
                String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                    shellargs[1] = s;
                    int r = 0;
                    try {
                        r = shell.run(shellargs);
                    } catch (Exception e) {
                        throw new IOException("Exception from shell.", e);
                    }
                    if (r != 0) {
                        throw new IOException(
                                "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
                    }
                }
            }
        }
    } finally {
        checkAndClose(lsrin);
        checkAndClose(dstin);
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java

License:LGPL

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(final FileSystem dstfs, final FileStatus dstroot, final Path dstsorted,
        final FileSystem jobfs, final Path jobdir, final JobConf jobconf, final Configuration conf)
        throws IOException {
    if (!dstroot.isDir()) {
        throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
    }/*from ww  w.java2 s .com*/

    // write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
            dstroot.getClass(), SequenceFile.CompressionType.NONE);
    try {
        // do lsr to get all file statuses in dstroot
        final Stack<FileStatus> lsrstack = new Stack<>();
        for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
            final FileStatus status = lsrstack.pop();
            if (status.isDir()) {
                for (FileStatus child : dstfs.listStatus(status.getPath())) {
                    String relative = makeRelative(dstroot.getPath(), child.getPath());
                    writer.append(new Text(relative), child);
                    lsrstack.push(child);
                }
            }
        }
    } finally {
        checkAndClose(writer);
    }

    // sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
            FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    // compare lsr list and dst list
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
        lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
        dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

        // compare sorted lsr list and sorted dst list
        final Text lsrpath = new Text();
        final FileStatus lsrstatus = new FileStatus();
        final Text dstpath = new Text();
        final Text dstfrom = new Text();
        final FsShell shell = new FsShell(conf);
        final String[] shellargs = { "-rmr", null };

        boolean hasnext = dstin.next(dstpath, dstfrom);
        for (; lsrin.next(lsrpath, lsrstatus);) {
            int dst_cmp_lsr = dstpath.compareTo(lsrpath);
            for (; hasnext && dst_cmp_lsr < 0;) {
                hasnext = dstin.next(dstpath, dstfrom);
                dst_cmp_lsr = dstpath.compareTo(lsrpath);
            }

            if (dst_cmp_lsr == 0) {
                // lsrpath exists in dst, skip it
                hasnext = dstin.next(dstpath, dstfrom);
            } else {
                // lsrpath does not exist, delete it
                String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                    shellargs[1] = s;
                    int r = 0;
                    try {
                        r = shell.run(shellargs);
                    } catch (Exception e) {
                        throw new IOException("Exception from shell.", e);
                    }
                    if (r != 0) {
                        throw new IOException(
                                "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
                    }
                }
            }
        }
    } finally {
        checkAndClose(lsrin);
        checkAndClose(dstin);
    }
}