List of usage examples for org.apache.hadoop.io Text compareTo
@Override public int compareTo(BinaryComparable other)
From source file:org.apache.accumulo.testing.core.randomwalk.image.Write.java
License:Apache License
@Override public void visit(State state, RandWalkEnv env, Properties props) throws Exception { MultiTableBatchWriter mtbw = env.getMultiTableBatchWriter(); BatchWriter imagesBW = mtbw.getBatchWriter(state.getString("imageTableName")); BatchWriter indexBW = mtbw.getBatchWriter(state.getString("indexTableName")); String uuid = UUID.randomUUID().toString(); Mutation m = new Mutation(new Text(uuid)); // create a fake image between 4KB and 1MB int maxSize = Integer.parseInt(props.getProperty("maxSize")); int minSize = Integer.parseInt(props.getProperty("minSize")); Random rand = new Random(); int numBytes = rand.nextInt(maxSize - minSize) + minSize; byte[] imageBytes = new byte[numBytes]; rand.nextBytes(imageBytes);// ww w . j av a 2 s .co m m.put(CONTENT_COLUMN_FAMILY, IMAGE_COLUMN_QUALIFIER, new Value(imageBytes)); // store size m.put(META_COLUMN_FAMILY, new Text("size"), new Value(String.format("%d", numBytes).getBytes(UTF_8))); // store hash MessageDigest alg = MessageDigest.getInstance("SHA-1"); alg.update(imageBytes); byte[] hash = alg.digest(); m.put(META_COLUMN_FAMILY, SHA1_COLUMN_QUALIFIER, new Value(hash)); // update write counts state.set("numWrites", state.getLong("numWrites") + 1); Long totalWrites = state.getLong("totalWrites") + 1; state.set("totalWrites", totalWrites); // set count m.put(META_COLUMN_FAMILY, COUNT_COLUMN_QUALIFIER, new Value(String.format("%d", totalWrites).getBytes(UTF_8))); // add mutation imagesBW.addMutation(m); // now add mutation to index Text row = new Text(hash); m = new Mutation(row); m.put(META_COLUMN_FAMILY, UUID_COLUMN_QUALIFIER, new Value(uuid.getBytes(UTF_8))); indexBW.addMutation(m); Text lastRow = (Text) state.get("lastIndexRow"); if (lastRow.compareTo(row) < 0) { state.set("lastIndexRow", new Text(row)); } }
From source file:org.apache.gora.accumulo.util.SignedBinaryEncoderTest.java
License:Apache License
@Test public void testShort() { short s = Short.MIN_VALUE; Text prev = null; SignedBinaryEncoder encoder = new SignedBinaryEncoder(); while (true) { byte[] enc = encoder.encodeShort(s); assertEquals(s, encoder.decodeShort(enc)); Text current = new Text(enc); if (prev != null) assertTrue(prev.compareTo(current) < 0); prev = current;/* w w w .j a v a 2s .c o m*/ s++; if (s == Short.MAX_VALUE) break; } }
From source file:org.apache.gora.accumulo.util.SignedBinaryEncoderTest.java
License:Apache License
private void testInt(int start, int finish) { int i = start; Text prev = null; SignedBinaryEncoder encoder = new SignedBinaryEncoder(); while (true) { byte[] enc = encoder.encodeInt(i); assertEquals(i, encoder.decodeInt(enc)); Text current = new Text(enc); if (prev != null) assertTrue(prev.compareTo(current) < 0); prev = current;/*from w ww . j a v a 2s. co m*/ i++; if (i == finish) break; } }
From source file:org.apache.gora.accumulo.util.SignedBinaryEncoderTest.java
License:Apache License
private void testLong(long start, long finish) { long l = start; Text prev = null; SignedBinaryEncoder encoder = new SignedBinaryEncoder(); while (true) { byte[] enc = encoder.encodeLong(l); assertEquals(l, encoder.decodeLong(enc)); Text current = new Text(enc); if (prev != null) assertTrue(prev.compareTo(current) < 0); prev = current;// w w w .ja v a2 s .c om l++; if (l == finish) break; } }
From source file:org.apache.kylin.engine.mr.steps.RowKeyDistributionCheckerMapper.java
License:Apache License
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { for (Text t : keyList) { if (key.compareTo(t) < 0) { Long v = resultMap.get(t); long length = (long) key.getLength() + value.getLength(); v += length;/* w w w.j a v a 2 s . c o m*/ resultMap.put(t, v); break; } } }
From source file:org.apache.rya.accumulo.mr.merge.mappers.MergeToolMapper.java
License:Apache License
/** * Since both Scanners will return sorted data, if the two key-values are * equal, then both Scanners can advance to the next comparison. If the Key * from Scanner1 sorts before the Key from Scanner2, then that Key doesn't * exist in the table from Scanner2 which means Scanner1 should advance. If * the Key from Scanner2 sorts before the Key from Scanner1, then that Key * doesn't exist in the table from Scanner1 which means Scanner2 should * advance./*from ww w . ja v a 2s .c om*/ * @param key1 the {@link RyaStatement} from the parent instance table. * @param key2 the {@link RyaStatement} from the child instance table. * @return the {@link CompareKeysResult}. * @throws MutationsRejectedException * @throws IOException * @throws InterruptedException * @throws TripleRowResolverException */ private CompareKeysResult compareKeys(final RyaStatement key1, final RyaStatement key2) throws MutationsRejectedException, IOException, InterruptedException, TripleRowResolverException { log.trace("key1 = " + key1); log.trace("key2 = " + key2); if (key1 == null && key2 == null) { // Reached the end of the parent and child table. return CompareKeysResult.FINISHED; } else if (key1 == null) { // Reached the end of the parent table so add the remaining child keys if they meet the time criteria. final Date t2 = normalizeDate(new Date(key2.getTimestamp()), false); // Move on to next comparison (do nothing) or add this child key to parent final boolean doNothing = usesStartTime && t2.before(startTime); return doNothing ? CompareKeysResult.ADVANCE_CHILD : CompareKeysResult.ADVANCE_CHILD_AND_ADD; } else if (key2 == null) { // Reached the end of the child table so delete the remaining parent keys if they meet the time criteria. final Date t1 = normalizeDate(new Date(key1.getTimestamp()), true); // Move on to next comparison (do nothing) or delete this key from parent final boolean doNothing = usesStartTime && (copyToolInputTime != null && (t1.before(copyToolInputTime) || (t1.after(copyToolInputTime) && t1.after(startTime))) || (copyToolInputTime == null && t1.after(startTime))); return doNothing ? CompareKeysResult.ADVANCE_PARENT : CompareKeysResult.ADVANCE_PARENT_AND_DELETE; } else { // There are 2 keys to compare final Map<TABLE_LAYOUT, TripleRow> map1 = parentRyaContext.serializeTriple(key1); final Text row1 = new Text(map1.get(TABLE_LAYOUT.SPO).getRow()); final Map<TABLE_LAYOUT, TripleRow> map2 = childRyaContext.serializeTriple(key2); final Text row2 = new Text(map2.get(TABLE_LAYOUT.SPO).getRow()); final Date t1 = normalizeDate(new Date(key1.getTimestamp()), true); final Date t2 = normalizeDate(new Date(key2.getTimestamp()), false); if (row1.compareTo(row2) < 0) { // Parent key sort order was before the child key sort order // so it doesn't exist in the child table. // What does this mean? Was it added by the parent after the child was cloned? (Meaning we should leave it) // Or did the child delete it after it was cloned? (Meaning we should delete it) final boolean doNothing = usesStartTime && (copyToolInputTime != null && (t1.before(copyToolInputTime) || (t1.after(copyToolInputTime) && t1.after(startTime))) || (copyToolInputTime == null && t1.after(startTime))); return doNothing ? CompareKeysResult.ADVANCE_PARENT : CompareKeysResult.ADVANCE_PARENT_AND_DELETE; } else if (row1.compareTo(row2) > 0) { // Parent key sort order was after the child key sort order // so it doesn't exist in the parent table. // What does this mean? Was it deleted by the parent after the child was cloned? (Meaning we should leave it) // Or did the child add it after it was cloned? (Meaning we should add it) final boolean doNothing = usesStartTime && t2.before(startTime); return doNothing ? CompareKeysResult.ADVANCE_CHILD : CompareKeysResult.ADVANCE_CHILD_AND_ADD; } else { // Rows are the same. So just check if column visibility needs to be updated and // move on to the next parent and child keys. return CompareKeysResult.ADVANCE_BOTH; } } }
From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java
License:Apache License
@Test(timeout = 5000) //Test appendValue feature public void testAppendValue() throws IOException { List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100)); IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class, codec, null, null);/*from w ww . j a va 2s.co m*/ Text previousKey = null; for (KVPair kvp : data) { if ((previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) { writer.appendValue(kvp.getvalue()); } else { writer.append(kvp.getKey(), kvp.getvalue()); } previousKey = kvp.getKey(); } writer.close(); readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec); }
From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java
License:Apache License
private Writer writeTestFile(IFile.Writer writer, boolean rle, boolean repeatKeys, List<KVPair> data, CompressionCodec codec) throws IOException { assertNotNull(writer);//from ww w.j a v a 2 s.c o m Text previousKey = null; for (KVPair kvp : data) { if (repeatKeys && (previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) { //RLE is enabled in IFile when IFile.REPEAT_KEY is set writer.append(IFile.REPEAT_KEY, kvp.getvalue()); } else { writer.append(kvp.getKey(), kvp.getvalue()); } previousKey = kvp.getKey(); } writer.close(); LOG.info("Uncompressed: " + writer.getRawLength()); LOG.info("CompressedSize: " + writer.getCompressedLength()); return writer; }
From source file:org.commoncrawl.service.queryserver.query.DomainListQuery.java
License:Open Source License
@Override protected long executeLocal(FileSystem remoteFileSystem, Configuration conf, DatabaseIndexV2.MasterDatabaseIndex index, EventLoop eventLoop, File tempFirDir, QueryRequest<DomainListQueryInfo, Text, SubDomainMetadata> requestObject) throws IOException { Path mergeResultsPath = new Path( getLocalQueryResultsPathPrefix(requestObject) + getMergedResultsFileName()); LOG.info("Execute Local called for Query:" + getQueryId() + " MergeResultsPath is:" + mergeResultsPath); // get a local file system object FileSystem localFileSystem = FileSystem.getLocal(conf); //LOG.info("Executing LocalQuery - checking if MergedFile:" + mergeResultsPath + " Exists"); // if source merged results path does not exist ... if (!localFileSystem.exists(mergeResultsPath)) { LOG.info("Execute Local for Query:" + getQueryId() + " Source MergeFile:" + mergeResultsPath + " Not Found. Checking for parts files"); // collect parts ... Vector<Path> parts = new Vector<Path>(); FileStatus fileStatusArray[] = remoteFileSystem .globStatus(new Path(getHDFSQueryResultsPath(), "part-*")); if (fileStatusArray.length == 0) { LOG.error("Execute Local for Query:" + getQueryId() + " FAILED. No Parts Files Found!"); throw new IOException("Remote Component Part Files Not Found"); }/*from w w w . ja va 2 s .co m*/ for (FileStatus part : fileStatusArray) { //LOG.info("Found Part:"+ part); parts.add(part.getPath()); } LOG.info("Execute Local for Query:" + getQueryId() + " Initializing Merger"); SequenceFileSpillWriter<Text, SubDomainMetadata> mergedFileSpillWriter = new SequenceFileSpillWriter<Text, SubDomainMetadata>( localFileSystem, conf, mergeResultsPath, Text.class, SubDomainMetadata.class, new PositionBasedSequenceFileIndex.PositionBasedIndexWriter(localFileSystem, PositionBasedSequenceFileIndex.getIndexNameFromBaseName(mergeResultsPath)), false); try { SequenceFileMerger<Text, SubDomainMetadata> merger = new SequenceFileMerger<Text, SubDomainMetadata>( remoteFileSystem, conf, parts, mergedFileSpillWriter, Text.class, SubDomainMetadata.class, new RawKeyValueComparator<Text, SubDomainMetadata>() { DataInputBuffer key1Stream = new DataInputBuffer(); DataInputBuffer key2Stream = new DataInputBuffer(); @Override public int compareRaw(byte[] key1Data, int key1Offset, int key1Length, byte[] key2Data, int key2Offset, int key2Length, byte[] value1Data, int value1Offset, int value1Length, byte[] value2Data, int value2Offset, int value2Length) throws IOException { key1Stream.reset(key1Data, key1Offset, key1Length); key2Stream.reset(key2Data, key2Offset, key2Length); WritableUtils.readVInt(key1Stream); WritableUtils.readVInt(key2Stream); return BytesWritable.Comparator.compareBytes(key1Data, key1Stream.getPosition(), key1Length - key1Stream.getPosition(), key2Data, key2Stream.getPosition(), key2Length - key2Stream.getPosition()); } @Override public int compare(Text key1, SubDomainMetadata value1, Text key2, SubDomainMetadata value2) { return key1.compareTo(key2); } }); try { LOG.info("Execute Local for Query:" + getQueryId() + " Running Merger"); merger.mergeAndSpill(null); LOG.info("Execute Local for Query:" + getQueryId() + " Merge Successfull.. Deleting Merge Inputs"); for (Path inputPath : parts) { remoteFileSystem.delete(inputPath, false); } } catch (IOException e) { LOG.error("Execute Local for Query:" + getQueryId() + " Merge Failed with Exception:" + CCStringUtils.stringifyException(e)); throw e; } finally { LOG.info("** CLOSING MERGER"); merger.close(); } } finally { LOG.info("** FLUSHING SPILLWRITER"); mergedFileSpillWriter.close(); } } // now check for query specific merge file ... Path queryResultsPath = new Path(getLocalQueryResultsPathPrefix(requestObject) + getOutputFileNameBasedOnSortByField(requestObject.getClientQueryInfo().getSortByField())); LOG.info("Execute Local for Query:" + getQueryId() + " Checking for QueryResultsPath:" + queryResultsPath); if (!localFileSystem.exists(queryResultsPath)) { LOG.info("Exectue Local for Query:" + getQueryId() + " Results File:" + queryResultsPath + " does not exist. Running sort and merge process"); LOG.info("Execute Local for Query:" + getQueryId() + " Allocating SpillWriter with output to:" + queryResultsPath); // allocate a spill writer ... SequenceFileSpillWriter<Text, SubDomainMetadata> sortedResultsFileSpillWriter = new SequenceFileSpillWriter<Text, SubDomainMetadata>( localFileSystem, conf, queryResultsPath, Text.class, SubDomainMetadata.class, new PositionBasedSequenceFileIndex.PositionBasedIndexWriter(localFileSystem, PositionBasedSequenceFileIndex.getIndexNameFromBaseName(queryResultsPath)), false); try { LOG.info("Execute Local for Query:" + getQueryId() + " Allocating MergeSortSpillWriter"); // and connect it to the merge spill writer ... MergeSortSpillWriter<Text, SubDomainMetadata> mergeSortSpillWriter = new MergeSortSpillWriter<Text, SubDomainMetadata>( conf, sortedResultsFileSpillWriter, localFileSystem, new Path(tempFirDir.getAbsolutePath()), /* new RawKeyValueComparator<Text,SubDomainMetadata>() { SubDomainMetadata value1 = new SubDomainMetadata(); SubDomainMetadata value2 = new SubDomainMetadata(); @Override public int compare(Text key1, SubDomainMetadata value1, Text key2,SubDomainMetadata value2) { return value1.getUrlCount() - value2.getUrlCount(); } @Override public int compareRaw(byte[] key1Data, int key1Offset, int key1Length, byte[] key2Data, int key2Offset, int key2Length, byte[] value1Data, int value1Offset, int value1Length, byte[] value2Data, int value2Offset, int value2Length) throws IOException { value1.clear(); value2.clear(); value1.readFields(new DataInputStream(new ByteArrayInputStream(value1Data,value1Offset,value1Length))); value2.readFields(new DataInputStream(new ByteArrayInputStream(value2Data,value2Offset,value2Length))); return compare(null, value1, null, value2); } }, */ new OptimizedKeyGeneratorAndComparator<Text, SubDomainMetadata>() { @Override public void generateOptimizedKeyForPair(Text key, SubDomainMetadata value, org.commoncrawl.hadoop.mergeutils.OptimizedKeyGeneratorAndComparator.OptimizedKey optimizedKeyOut) throws IOException { optimizedKeyOut.setLongKeyValue(value.getUrlCount()); } @Override public int getGeneratedKeyType() { return OptimizedKey.KEY_TYPE_LONG; } }, Text.class, SubDomainMetadata.class, false, null); try { // create a vector representing the single input segment Vector<Path> singleInputSegment = new Vector<Path>(); LOG.info("Execute Local for Query:" + getQueryId() + " Adding MergeResultsPath:" + mergeResultsPath + " as input for Merger"); singleInputSegment.add(mergeResultsPath); // create a SequenceFileReader SequenceFileReader<Text, SubDomainMetadata> mergeSegmentReader = new SequenceFileReader<Text, SubDomainMetadata>( localFileSystem, conf, singleInputSegment, mergeSortSpillWriter, Text.class, SubDomainMetadata.class); try { LOG.info("Execute Local for Query:" + getQueryId() + " calling readAndSpill"); mergeSegmentReader.readAndSpill(); LOG.info("Execute Local for Query:" + getQueryId() + " readAndSpill finished"); } finally { if (mergeSegmentReader != null) { mergeSegmentReader.close(); } } } finally { if (mergeSortSpillWriter != null) { mergeSortSpillWriter.close(); } } } finally { if (sortedResultsFileSpillWriter != null) { sortedResultsFileSpillWriter.close(); } } } //LOG.info("Allocating SequenceFileIndex object for DomainListQuery Id:" + getQueryId() + " with Path:" + queryResultsPath); PositionBasedSequenceFileIndex<Text, SubDomainMetadata> indexFile = new PositionBasedSequenceFileIndex<Text, SubDomainMetadata>( localFileSystem, queryResultsPath, Text.class, SubDomainMetadata.class); //LOG.info("SequenceFileIndex object for DomainListQuery Id:" + getQueryId() + " with Path:" + queryResultsPath + " returned record count:" + indexFile.getRecordCount()); return indexFile.getRecordCount(); }
From source file:org.huahinframework.core.io.Key.java
License:Apache License
/** * {@inheritDoc}//from ww w .j ava2 s. c o m */ @Override public int compareTo(Key key) { Text one = (Text) this.identifier(); Text other = (Text) key.identifier(); return one.compareTo(other); }