List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:org.apache.accumulo.shell.commands.GetSplitsCommand.java
License:Apache License
private static String encode(final boolean encode, final Text text) { if (text == null) { return null; }/*from w w w . j a va2 s . c o m*/ final int length = text.getLength(); return encode ? Base64.getEncoder().encodeToString(TextUtil.getBytes(text)) : DefaultFormatter.appendText(new StringBuilder(), text, length).toString(); }
From source file:org.apache.accumulo.test.merkle.RangeSerialization.java
License:Apache License
public static Range toRange(Key key) { Text holder = new Text(); key.getRow(holder);/* ww w . j a v a 2s.co m*/ Key startKey; if (0 == holder.getLength()) { startKey = null; } else { startKey = new Key(holder); } key.getColumnQualifier(holder); Key endKey; if (0 == holder.getLength()) { endKey = null; } else { endKey = new Key(holder); } // Don't be inclusive for no bounds on a Range return new Range(startKey, startKey != null, endKey, endKey != null); }
From source file:org.apache.accumulo.test.performance.scan.CollectTabletStats.java
License:Apache License
private static void calcTabletStats(Connector conn, String table, Authorizations auths, int batchSize, KeyExtent ke, String[] columns) throws Exception { // long t1 = System.currentTimeMillis(); Scanner scanner = conn.createScanner(table, auths); scanner.setBatchSize(batchSize);//ww w. j av a 2 s .com scanner.setRange(new Range(ke.getPrevEndRow(), false, ke.getEndRow(), true)); for (String c : columns) { scanner.fetchColumnFamily(new Text(c)); } Stat rowLen = new Stat(); Stat cfLen = new Stat(); Stat cqLen = new Stat(); Stat cvLen = new Stat(); Stat valLen = new Stat(); Stat colsPerRow = new Stat(); Text lastRow = null; int colsPerRowCount = 0; for (Entry<Key, Value> entry : scanner) { Key key = entry.getKey(); Text row = key.getRow(); if (lastRow == null) { lastRow = row; } if (!lastRow.equals(row)) { colsPerRow.addStat(colsPerRowCount); lastRow = row; colsPerRowCount = 0; } colsPerRowCount++; rowLen.addStat(row.getLength()); cfLen.addStat(key.getColumnFamilyData().length()); cqLen.addStat(key.getColumnQualifierData().length()); cvLen.addStat(key.getColumnVisibilityData().length()); valLen.addStat(entry.getValue().get().length); } synchronized (System.out) { System.out.println(""); System.out.println("\tTablet " + ke.getUUID() + " statistics : "); printStat("Row length", rowLen); printStat("Column family length", cfLen); printStat("Column qualifier length", cqLen); printStat("Column visibility length", cvLen); printStat("Value length", valLen); printStat("Columns per row", colsPerRow); System.out.println(""); } }
From source file:org.apache.accumulo.tserver.tablet.Tablet.java
License:Apache License
private SplitRowSpec findSplitRow(Collection<FileRef> files) { // never split the root tablet // check if we already decided that we can never split // check to see if we're big enough to split long splitThreshold = tableConfiguration.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD); long maxEndRow = tableConfiguration.getMemoryInBytes(Property.TABLE_MAX_END_ROW_SIZE); if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) { return null; }/*from w ww. j a v a 2 s . c om*/ // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred. if (sawBigRow) { if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) { // a minor compaction or map file import has occurred... check again sawBigRow = false; } else { // nothing changed, do not split return null; } } SortedMap<Double, Key> keys = null; try { // we should make .25 below configurable keys = FileUtil.findMidPoint(getTabletServer().getFileSystem(), getTabletServer().getConfiguration(), extent.getPrevEndRow(), extent.getEndRow(), FileUtil.toPathStrings(files), .25); } catch (IOException e) { log.error("Failed to find midpoint " + e.getMessage()); return null; } // check to see if one row takes up most of the tablet, in which case we can not split try { Text lastRow; if (extent.getEndRow() == null) { Key lastKey = (Key) FileUtil.findLastKey(getTabletServer().getFileSystem(), getTabletServer().getConfiguration(), files); lastRow = lastKey.getRow(); } else { lastRow = extent.getEndRow(); } // We expect to get a midPoint for this set of files. If we don't get one, we have a problem. final Key mid = keys.get(.5); if (null == mid) { throw new IllegalStateException("Could not determine midpoint for files"); } // check to see that the midPoint is not equal to the end key if (mid.compareRow(lastRow) == 0) { if (keys.firstKey() < .5) { Key candidate = keys.get(keys.firstKey()); if (candidate.getLength() > maxEndRow) { log.warn("Cannot split tablet " + extent + ", selected split point too long. Length : " + candidate.getLength()); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } if (candidate.compareRow(lastRow) != 0) { // we should use this ratio in split size estimations if (log.isTraceEnabled()) log.trace(String.format( "Splitting at %6.2f instead of .5, row at .5 is same as end row%n", keys.firstKey())); return new SplitRowSpec(keys.firstKey(), candidate.getRow()); } } log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } Text text = mid.getRow(); SortedMap<Double, Key> firstHalf = keys.headMap(.5); if (firstHalf.size() > 0) { Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow(); Text shorter = new Text(); int trunc = longestCommonLength(text, beforeMid); shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1)); text = shorter; } if (text.getLength() > maxEndRow) { log.warn("Cannot split tablet " + extent + ", selected split point too long. Length : " + text.getLength()); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } return new SplitRowSpec(.5, text); } catch (IOException e) { // don't split now, but check again later log.error("Failed to find lastkey " + e.getMessage()); return null; } }
From source file:org.apache.accumulo.tserver.Tablet.java
License:Apache License
private SplitRowSpec findSplitRow(Collection<FileRef> files) { // never split the root tablet // check if we already decided that we can never split // check to see if we're big enough to split long splitThreshold = acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD); if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) { return null; }/*from www. ja v a2 s . c om*/ // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred. if (sawBigRow) { if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) { // a minor compaction or map file import has occurred... check again sawBigRow = false; } else { // nothing changed, do not split return null; } } SortedMap<Double, Key> keys = null; try { // we should make .25 below configurable keys = FileUtil.findMidPoint(fs, tabletServer.getSystemConfiguration(), extent.getPrevEndRow(), extent.getEndRow(), FileUtil.toPathStrings(files), .25); } catch (IOException e) { log.error("Failed to find midpoint " + e.getMessage()); return null; } // check to see if one row takes up most of the tablet, in which case we can not split try { Text lastRow; if (extent.getEndRow() == null) { Key lastKey = (Key) FileUtil.findLastKey(fs, tabletServer.getSystemConfiguration(), files); lastRow = lastKey.getRow(); } else { lastRow = extent.getEndRow(); } // check to see that the midPoint is not equal to the end key if (keys.get(.5).compareRow(lastRow) == 0) { if (keys.firstKey() < .5) { Key candidate = keys.get(keys.firstKey()); if (candidate.compareRow(lastRow) != 0) { // we should use this ratio in split size estimations if (log.isTraceEnabled()) log.trace(String.format( "Splitting at %6.2f instead of .5, row at .5 is same as end row%n", keys.firstKey())); return new SplitRowSpec(keys.firstKey(), candidate.getRow()); } } log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } Key mid = keys.get(.5); Text text = (mid == null) ? null : mid.getRow(); SortedMap<Double, Key> firstHalf = keys.headMap(.5); if (firstHalf.size() > 0) { Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow(); Text shorter = new Text(); int trunc = longestCommonLength(text, beforeMid); shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1)); text = shorter; } return new SplitRowSpec(.5, text); } catch (IOException e) { // don't split now, but check again later log.error("Failed to find lastkey " + e.getMessage()); return null; } }
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
@Override protected void map(Writable k, Text value, Context context) throws IOException, InterruptedException { BlurRecord record = _mutate.getRecord(); record.clearColumns();//from www . jav a2 s .c om String str = value.toString(); Iterable<String> split = _splitter.split(str); List<String> list = toList(split); int offset = 0; boolean gen = false; if (!_autoGenerateRowIdAsHashOfData) { record.setRowId(list.get(offset++)); } else { _digest.reset(); byte[] bs = value.getBytes(); int length = value.getLength(); _digest.update(bs, 0, length); record.setRowId(new BigInteger(_digest.digest()).toString(Character.MAX_RADIX)); gen = true; } if (!_autoGenerateRecordIdAsHashOfData) { record.setRecordId(list.get(offset++)); } else { if (gen) { record.setRecordId(record.getRowId()); } else { _digest.reset(); byte[] bs = value.getBytes(); int length = value.getLength(); _digest.update(bs, 0, length); record.setRecordId(new BigInteger(_digest.digest()).toString(Character.MAX_RADIX)); } } String family; if (_familyNotInFile) { family = _familyFromPath; } else { family = list.get(offset++); } record.setFamily(family); List<String> columnNames = _columnNameMap.get(family); if (columnNames == null) { throw new IOException("Family [" + family + "] is missing in the definition."); } if (list.size() - offset != columnNames.size()) { String options = ""; if (!_autoGenerateRowIdAsHashOfData) { options += "rowid,"; } if (!_autoGenerateRecordIdAsHashOfData) { options += "recordid,"; } if (!_familyNotInFile) { options += "family,"; } String msg = "Record [" + str + "] does not match defined record [" + options + getColumnNames(columnNames) + "]."; throw new IOException(msg); } for (int i = 0; i < columnNames.size(); i++) { String val = handleHiveNulls(list.get(i + offset)); if (val != null) { record.addColumn(columnNames.get(i), val); _columnCounter.increment(1); } } _key.set(record.getRowId()); _mutate.setMutateType(MUTATE_TYPE.REPLACE); context.write(_key, _mutate); _recordCounter.increment(1); context.progress(); }
From source file:org.apache.drill.exec.store.text.DrillTextRecordReader.java
License:Apache License
/** * Returns the index within the text of the first occurrence of delimiter, starting the search at the specified index. * * @param text the text being searched * @param delimiter the delimiter//from w ww. ja v a 2 s. com * @param start the index to start searching * @return the first occurrence of delimiter, starting the search at the specified index */ public int find(Text text, byte delimiter, int start) { int len = text.getLength(); int p = start; byte[] bytes = text.getBytes(); boolean inQuotes = false; while (p < len) { if ('\"' == bytes[p]) { inQuotes = !inQuotes; } if (!inQuotes && bytes[p] == delimiter) { return p; } p++; } return -1; }
From source file:org.apache.flume.sink.hdfs.HDFSTextFormatter.java
License:Apache License
@Override public byte[] getBytes(Event e) { Text record = makeText(e); record.append("\n".getBytes(), 0, 1); byte[] rawBytes = record.getBytes(); return Arrays.copyOf(rawBytes, record.getLength()); }
From source file:org.apache.fluo.core.util.ByteUtil.java
License:Apache License
/** * Convert from Hadoop Text to Bytes/*from w w w . j a v a2 s .c o m*/ */ public static Bytes toBytes(Text t) { return Bytes.of(t.getBytes(), 0, t.getLength()); }
From source file:org.apache.fluo.core.util.ByteUtil.java
License:Apache License
public static byte[] toByteArray(Text text) { byte[] bytes = text.getBytes(); if (bytes.length != text.getLength()) { bytes = new byte[text.getLength()]; System.arraycopy(text.getBytes(), 0, bytes, 0, bytes.length); }//from w ww.j a v a2s .co m return bytes; }