Example usage for org.apache.hadoop.io Text compareTo

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text compareTo.

Prototype

@Override
public int compareTo(BinaryComparable other)

Source Link

Document

Compare bytes from {#getBytes()}.

Usage

From source file:org.apache.accumulo.testing.core.randomwalk.image.Write.java

License:Apache License

@Override
public void visit(State state, RandWalkEnv env, Properties props) throws Exception {

    MultiTableBatchWriter mtbw = env.getMultiTableBatchWriter();

    BatchWriter imagesBW = mtbw.getBatchWriter(state.getString("imageTableName"));
    BatchWriter indexBW = mtbw.getBatchWriter(state.getString("indexTableName"));

    String uuid = UUID.randomUUID().toString();
    Mutation m = new Mutation(new Text(uuid));

    // create a fake image between 4KB and 1MB
    int maxSize = Integer.parseInt(props.getProperty("maxSize"));
    int minSize = Integer.parseInt(props.getProperty("minSize"));

    Random rand = new Random();
    int numBytes = rand.nextInt(maxSize - minSize) + minSize;
    byte[] imageBytes = new byte[numBytes];
    rand.nextBytes(imageBytes);// ww  w .  j  av a  2  s  .co m
    m.put(CONTENT_COLUMN_FAMILY, IMAGE_COLUMN_QUALIFIER, new Value(imageBytes));

    // store size
    m.put(META_COLUMN_FAMILY, new Text("size"), new Value(String.format("%d", numBytes).getBytes(UTF_8)));

    // store hash
    MessageDigest alg = MessageDigest.getInstance("SHA-1");
    alg.update(imageBytes);
    byte[] hash = alg.digest();
    m.put(META_COLUMN_FAMILY, SHA1_COLUMN_QUALIFIER, new Value(hash));

    // update write counts
    state.set("numWrites", state.getLong("numWrites") + 1);
    Long totalWrites = state.getLong("totalWrites") + 1;
    state.set("totalWrites", totalWrites);

    // set count
    m.put(META_COLUMN_FAMILY, COUNT_COLUMN_QUALIFIER,
            new Value(String.format("%d", totalWrites).getBytes(UTF_8)));

    // add mutation
    imagesBW.addMutation(m);

    // now add mutation to index
    Text row = new Text(hash);
    m = new Mutation(row);
    m.put(META_COLUMN_FAMILY, UUID_COLUMN_QUALIFIER, new Value(uuid.getBytes(UTF_8)));

    indexBW.addMutation(m);

    Text lastRow = (Text) state.get("lastIndexRow");
    if (lastRow.compareTo(row) < 0) {
        state.set("lastIndexRow", new Text(row));
    }
}

From source file:org.apache.gora.accumulo.util.SignedBinaryEncoderTest.java

License:Apache License

@Test
public void testShort() {
    short s = Short.MIN_VALUE;
    Text prev = null;

    SignedBinaryEncoder encoder = new SignedBinaryEncoder();

    while (true) {
        byte[] enc = encoder.encodeShort(s);
        assertEquals(s, encoder.decodeShort(enc));
        Text current = new Text(enc);
        if (prev != null)
            assertTrue(prev.compareTo(current) < 0);
        prev = current;/* w w w .j  a v a  2s  .c o  m*/
        s++;
        if (s == Short.MAX_VALUE)
            break;
    }
}

From source file:org.apache.gora.accumulo.util.SignedBinaryEncoderTest.java

License:Apache License

private void testInt(int start, int finish) {
    int i = start;
    Text prev = null;

    SignedBinaryEncoder encoder = new SignedBinaryEncoder();

    while (true) {
        byte[] enc = encoder.encodeInt(i);
        assertEquals(i, encoder.decodeInt(enc));
        Text current = new Text(enc);
        if (prev != null)
            assertTrue(prev.compareTo(current) < 0);
        prev = current;/*from w ww . j a v  a  2s. co  m*/
        i++;
        if (i == finish)
            break;
    }
}

From source file:org.apache.gora.accumulo.util.SignedBinaryEncoderTest.java

License:Apache License

private void testLong(long start, long finish) {
    long l = start;
    Text prev = null;

    SignedBinaryEncoder encoder = new SignedBinaryEncoder();

    while (true) {
        byte[] enc = encoder.encodeLong(l);
        assertEquals(l, encoder.decodeLong(enc));
        Text current = new Text(enc);
        if (prev != null)
            assertTrue(prev.compareTo(current) < 0);
        prev = current;// w  w  w .ja  v  a2  s  .c om
        l++;
        if (l == finish)
            break;
    }
}

From source file:org.apache.kylin.engine.mr.steps.RowKeyDistributionCheckerMapper.java

License:Apache License

@Override
public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException {
    for (Text t : keyList) {
        if (key.compareTo(t) < 0) {
            Long v = resultMap.get(t);
            long length = (long) key.getLength() + value.getLength();
            v += length;/*  w  w w.j  a  v  a 2  s . c o m*/
            resultMap.put(t, v);
            break;
        }
    }
}

From source file:org.apache.rya.accumulo.mr.merge.mappers.MergeToolMapper.java

License:Apache License

/**
 * Since both Scanners will return sorted data, if the two key-values are
 * equal, then both Scanners can advance to the next comparison. If the Key
 * from Scanner1 sorts before the Key from Scanner2, then that Key doesn't
 * exist in the table from Scanner2 which means Scanner1 should advance. If
 * the Key from Scanner2 sorts before the Key from Scanner1, then that Key
 * doesn't exist in the table from Scanner1 which means Scanner2 should
 * advance./*from ww  w .  ja v  a 2s  .c  om*/
 * @param key1 the {@link RyaStatement} from the parent instance table.
 * @param key2 the {@link RyaStatement} from the child instance table.
 * @return the {@link CompareKeysResult}.
 * @throws MutationsRejectedException
 * @throws IOException
 * @throws InterruptedException
 * @throws TripleRowResolverException
 */
private CompareKeysResult compareKeys(final RyaStatement key1, final RyaStatement key2)
        throws MutationsRejectedException, IOException, InterruptedException, TripleRowResolverException {
    log.trace("key1 = " + key1);
    log.trace("key2 = " + key2);
    if (key1 == null && key2 == null) {
        // Reached the end of the parent and child table.
        return CompareKeysResult.FINISHED;
    } else if (key1 == null) {
        // Reached the end of the parent table so add the remaining child keys if they meet the time criteria.
        final Date t2 = normalizeDate(new Date(key2.getTimestamp()), false);
        // Move on to next comparison (do nothing) or add this child key to parent
        final boolean doNothing = usesStartTime && t2.before(startTime);
        return doNothing ? CompareKeysResult.ADVANCE_CHILD : CompareKeysResult.ADVANCE_CHILD_AND_ADD;
    } else if (key2 == null) {
        // Reached the end of the child table so delete the remaining parent keys if they meet the time criteria.
        final Date t1 = normalizeDate(new Date(key1.getTimestamp()), true);
        // Move on to next comparison (do nothing) or delete this key from parent
        final boolean doNothing = usesStartTime && (copyToolInputTime != null
                && (t1.before(copyToolInputTime) || (t1.after(copyToolInputTime) && t1.after(startTime)))
                || (copyToolInputTime == null && t1.after(startTime)));
        return doNothing ? CompareKeysResult.ADVANCE_PARENT : CompareKeysResult.ADVANCE_PARENT_AND_DELETE;
    } else {
        // There are 2 keys to compare
        final Map<TABLE_LAYOUT, TripleRow> map1 = parentRyaContext.serializeTriple(key1);
        final Text row1 = new Text(map1.get(TABLE_LAYOUT.SPO).getRow());
        final Map<TABLE_LAYOUT, TripleRow> map2 = childRyaContext.serializeTriple(key2);
        final Text row2 = new Text(map2.get(TABLE_LAYOUT.SPO).getRow());
        final Date t1 = normalizeDate(new Date(key1.getTimestamp()), true);
        final Date t2 = normalizeDate(new Date(key2.getTimestamp()), false);

        if (row1.compareTo(row2) < 0) {
            // Parent key sort order was before the child key sort order
            // so it doesn't exist in the child table.
            // What does this mean?  Was it added by the parent after the child was cloned? (Meaning we should leave it)
            // Or did the child delete it after it was cloned? (Meaning we should delete it)
            final boolean doNothing = usesStartTime && (copyToolInputTime != null
                    && (t1.before(copyToolInputTime) || (t1.after(copyToolInputTime) && t1.after(startTime)))
                    || (copyToolInputTime == null && t1.after(startTime)));
            return doNothing ? CompareKeysResult.ADVANCE_PARENT : CompareKeysResult.ADVANCE_PARENT_AND_DELETE;
        } else if (row1.compareTo(row2) > 0) {
            // Parent key sort order was after the child key sort order
            // so it doesn't exist in the parent table.
            // What does this mean?  Was it deleted by the parent after the child was cloned? (Meaning we should leave it)
            // Or did the child add it after it was cloned? (Meaning we should add it)
            final boolean doNothing = usesStartTime && t2.before(startTime);
            return doNothing ? CompareKeysResult.ADVANCE_CHILD : CompareKeysResult.ADVANCE_CHILD_AND_ADD;
        } else {
            // Rows are the same. So just check if column visibility needs to be updated and
            // move on to the next parent and child keys.
            return CompareKeysResult.ADVANCE_BOTH;
        }
    }
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java

License:Apache License

@Test(timeout = 5000)
//Test appendValue feature
public void testAppendValue() throws IOException {
    List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100));
    IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class,
            codec, null, null);/*from  w ww  . j a va 2s.co m*/

    Text previousKey = null;
    for (KVPair kvp : data) {
        if ((previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
            writer.appendValue(kvp.getvalue());
        } else {
            writer.append(kvp.getKey(), kvp.getvalue());
        }
        previousKey = kvp.getKey();
    }

    writer.close();

    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java

License:Apache License

private Writer writeTestFile(IFile.Writer writer, boolean rle, boolean repeatKeys, List<KVPair> data,
        CompressionCodec codec) throws IOException {
    assertNotNull(writer);//from ww  w.j  a  v  a  2 s.c o  m

    Text previousKey = null;
    for (KVPair kvp : data) {
        if (repeatKeys && (previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
            //RLE is enabled in IFile when IFile.REPEAT_KEY is set
            writer.append(IFile.REPEAT_KEY, kvp.getvalue());
        } else {
            writer.append(kvp.getKey(), kvp.getvalue());
        }
        previousKey = kvp.getKey();
    }

    writer.close();

    LOG.info("Uncompressed: " + writer.getRawLength());
    LOG.info("CompressedSize: " + writer.getCompressedLength());

    return writer;
}

From source file:org.commoncrawl.service.queryserver.query.DomainListQuery.java

License:Open Source License

@Override
protected long executeLocal(FileSystem remoteFileSystem, Configuration conf,
        DatabaseIndexV2.MasterDatabaseIndex index, EventLoop eventLoop, File tempFirDir,
        QueryRequest<DomainListQueryInfo, Text, SubDomainMetadata> requestObject) throws IOException {

    Path mergeResultsPath = new Path(
            getLocalQueryResultsPathPrefix(requestObject) + getMergedResultsFileName());

    LOG.info("Execute Local called for Query:" + getQueryId() + " MergeResultsPath is:" + mergeResultsPath);

    // get a local file system object
    FileSystem localFileSystem = FileSystem.getLocal(conf);

    //LOG.info("Executing LocalQuery - checking if MergedFile:" + mergeResultsPath + " Exists");
    // if source merged results path does not exist ... 
    if (!localFileSystem.exists(mergeResultsPath)) {
        LOG.info("Execute Local for Query:" + getQueryId() + " Source MergeFile:" + mergeResultsPath
                + " Not Found. Checking for parts files");
        // collect parts ...
        Vector<Path> parts = new Vector<Path>();

        FileStatus fileStatusArray[] = remoteFileSystem
                .globStatus(new Path(getHDFSQueryResultsPath(), "part-*"));

        if (fileStatusArray.length == 0) {
            LOG.error("Execute Local for Query:" + getQueryId() + " FAILED. No Parts Files Found!");
            throw new IOException("Remote Component Part Files Not Found");
        }/*from   w  w  w  .  ja  va 2  s .co  m*/

        for (FileStatus part : fileStatusArray) {
            //LOG.info("Found Part:"+ part);
            parts.add(part.getPath());
        }

        LOG.info("Execute Local for Query:" + getQueryId() + " Initializing Merger");
        SequenceFileSpillWriter<Text, SubDomainMetadata> mergedFileSpillWriter = new SequenceFileSpillWriter<Text, SubDomainMetadata>(
                localFileSystem, conf, mergeResultsPath, Text.class, SubDomainMetadata.class,
                new PositionBasedSequenceFileIndex.PositionBasedIndexWriter(localFileSystem,
                        PositionBasedSequenceFileIndex.getIndexNameFromBaseName(mergeResultsPath)),
                false);

        try {
            SequenceFileMerger<Text, SubDomainMetadata> merger = new SequenceFileMerger<Text, SubDomainMetadata>(
                    remoteFileSystem, conf, parts, mergedFileSpillWriter, Text.class, SubDomainMetadata.class,

                    new RawKeyValueComparator<Text, SubDomainMetadata>() {

                        DataInputBuffer key1Stream = new DataInputBuffer();
                        DataInputBuffer key2Stream = new DataInputBuffer();

                        @Override
                        public int compareRaw(byte[] key1Data, int key1Offset, int key1Length, byte[] key2Data,
                                int key2Offset, int key2Length, byte[] value1Data, int value1Offset,
                                int value1Length, byte[] value2Data, int value2Offset, int value2Length)
                                throws IOException {

                            key1Stream.reset(key1Data, key1Offset, key1Length);
                            key2Stream.reset(key2Data, key2Offset, key2Length);

                            WritableUtils.readVInt(key1Stream);
                            WritableUtils.readVInt(key2Stream);

                            return BytesWritable.Comparator.compareBytes(key1Data, key1Stream.getPosition(),
                                    key1Length - key1Stream.getPosition(), key2Data, key2Stream.getPosition(),
                                    key2Length - key2Stream.getPosition());
                        }

                        @Override
                        public int compare(Text key1, SubDomainMetadata value1, Text key2,
                                SubDomainMetadata value2) {
                            return key1.compareTo(key2);
                        }

                    });

            try {
                LOG.info("Execute Local for Query:" + getQueryId() + " Running Merger");
                merger.mergeAndSpill(null);
                LOG.info("Execute Local for Query:" + getQueryId()
                        + " Merge Successfull.. Deleting Merge Inputs");
                for (Path inputPath : parts) {
                    remoteFileSystem.delete(inputPath, false);
                }
            } catch (IOException e) {
                LOG.error("Execute Local for Query:" + getQueryId() + " Merge Failed with Exception:"
                        + CCStringUtils.stringifyException(e));
                throw e;
            } finally {
                LOG.info("** CLOSING MERGER");
                merger.close();
            }
        } finally {
            LOG.info("** FLUSHING SPILLWRITER");
            mergedFileSpillWriter.close();
        }
    }

    // now check for query specific merge file ...
    Path queryResultsPath = new Path(getLocalQueryResultsPathPrefix(requestObject)
            + getOutputFileNameBasedOnSortByField(requestObject.getClientQueryInfo().getSortByField()));

    LOG.info("Execute Local for Query:" + getQueryId() + " Checking for QueryResultsPath:" + queryResultsPath);

    if (!localFileSystem.exists(queryResultsPath)) {

        LOG.info("Exectue Local for Query:" + getQueryId() + " Results File:" + queryResultsPath
                + " does not exist. Running sort and merge process");

        LOG.info("Execute Local for Query:" + getQueryId() + " Allocating SpillWriter with output to:"
                + queryResultsPath);
        // allocate a spill writer ...  
        SequenceFileSpillWriter<Text, SubDomainMetadata> sortedResultsFileSpillWriter = new SequenceFileSpillWriter<Text, SubDomainMetadata>(
                localFileSystem, conf, queryResultsPath, Text.class, SubDomainMetadata.class,
                new PositionBasedSequenceFileIndex.PositionBasedIndexWriter(localFileSystem,
                        PositionBasedSequenceFileIndex.getIndexNameFromBaseName(queryResultsPath)),
                false);

        try {

            LOG.info("Execute Local for Query:" + getQueryId() + " Allocating MergeSortSpillWriter");
            // and connect it to the merge spill writer ...
            MergeSortSpillWriter<Text, SubDomainMetadata> mergeSortSpillWriter = new MergeSortSpillWriter<Text, SubDomainMetadata>(
                    conf, sortedResultsFileSpillWriter, localFileSystem, new Path(tempFirDir.getAbsolutePath()),
                    /*
                    new RawKeyValueComparator<Text,SubDomainMetadata>() {
                            
                      SubDomainMetadata value1 = new SubDomainMetadata();
                      SubDomainMetadata value2 = new SubDomainMetadata();
                              
                            
                      @Override
                      public int compare(Text key1, SubDomainMetadata value1, Text key2,SubDomainMetadata value2) {
                        return value1.getUrlCount() - value2.getUrlCount();
                      }
                            
                      @Override
                      public int compareRaw(byte[] key1Data, int key1Offset,
                          int key1Length, byte[] key2Data, int key2Offset,
                          int key2Length, byte[] value1Data, int value1Offset,
                          int value1Length, byte[] value2Data, int value2Offset,
                          int value2Length) throws IOException {
                            
                        value1.clear();
                        value2.clear();
                                
                        value1.readFields(new DataInputStream(new ByteArrayInputStream(value1Data,value1Offset,value1Length)));
                        value2.readFields(new DataInputStream(new ByteArrayInputStream(value2Data,value2Offset,value2Length)));
                                
                        return compare(null, value1, null, value2);
                      } 
                              
                    },
                    */
                    new OptimizedKeyGeneratorAndComparator<Text, SubDomainMetadata>() {

                        @Override
                        public void generateOptimizedKeyForPair(Text key, SubDomainMetadata value,
                                org.commoncrawl.hadoop.mergeutils.OptimizedKeyGeneratorAndComparator.OptimizedKey optimizedKeyOut)
                                throws IOException {
                            optimizedKeyOut.setLongKeyValue(value.getUrlCount());
                        }

                        @Override
                        public int getGeneratedKeyType() {
                            return OptimizedKey.KEY_TYPE_LONG;
                        }
                    }, Text.class, SubDomainMetadata.class, false, null);

            try {

                // create a vector representing the single input segment 
                Vector<Path> singleInputSegment = new Vector<Path>();

                LOG.info("Execute Local for Query:" + getQueryId() + " Adding MergeResultsPath:"
                        + mergeResultsPath + " as input for Merger");
                singleInputSegment.add(mergeResultsPath);

                // create a SequenceFileReader
                SequenceFileReader<Text, SubDomainMetadata> mergeSegmentReader = new SequenceFileReader<Text, SubDomainMetadata>(
                        localFileSystem, conf, singleInputSegment, mergeSortSpillWriter, Text.class,
                        SubDomainMetadata.class);

                try {
                    LOG.info("Execute Local for Query:" + getQueryId() + " calling readAndSpill");
                    mergeSegmentReader.readAndSpill();
                    LOG.info("Execute Local for Query:" + getQueryId() + " readAndSpill finished");
                } finally {
                    if (mergeSegmentReader != null) {
                        mergeSegmentReader.close();
                    }
                }

            } finally {
                if (mergeSortSpillWriter != null) {
                    mergeSortSpillWriter.close();
                }
            }

        } finally {
            if (sortedResultsFileSpillWriter != null) {
                sortedResultsFileSpillWriter.close();
            }
        }
    }

    //LOG.info("Allocating SequenceFileIndex object for DomainListQuery Id:" + getQueryId() + " with Path:" + queryResultsPath);
    PositionBasedSequenceFileIndex<Text, SubDomainMetadata> indexFile = new PositionBasedSequenceFileIndex<Text, SubDomainMetadata>(
            localFileSystem, queryResultsPath, Text.class, SubDomainMetadata.class);
    //LOG.info("SequenceFileIndex object for DomainListQuery Id:" + getQueryId() + " with Path:" + queryResultsPath + " returned record count:" + indexFile.getRecordCount());

    return indexFile.getRecordCount();
}

From source file:org.huahinframework.core.io.Key.java

License:Apache License

/**
 * {@inheritDoc}//from  ww w .j  ava2 s.  c o  m
 */
@Override
public int compareTo(Key key) {
    Text one = (Text) this.identifier();
    Text other = (Text) key.identifier();
    return one.compareTo(other);
}