Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:org.apache.accumulo.shell.commands.GetSplitsCommand.java

License:Apache License

private static String encode(final boolean encode, final Text text) {
    if (text == null) {
        return null;
    }/*from w  w  w . j  a  va2  s .  c o  m*/
    final int length = text.getLength();
    return encode ? Base64.getEncoder().encodeToString(TextUtil.getBytes(text))
            : DefaultFormatter.appendText(new StringBuilder(), text, length).toString();
}

From source file:org.apache.accumulo.test.merkle.RangeSerialization.java

License:Apache License

public static Range toRange(Key key) {
    Text holder = new Text();
    key.getRow(holder);/*  ww  w . j a v a 2s.co  m*/
    Key startKey;
    if (0 == holder.getLength()) {
        startKey = null;
    } else {
        startKey = new Key(holder);
    }

    key.getColumnQualifier(holder);
    Key endKey;
    if (0 == holder.getLength()) {
        endKey = null;
    } else {
        endKey = new Key(holder);
    }

    // Don't be inclusive for no bounds on a Range
    return new Range(startKey, startKey != null, endKey, endKey != null);
}

From source file:org.apache.accumulo.test.performance.scan.CollectTabletStats.java

License:Apache License

private static void calcTabletStats(Connector conn, String table, Authorizations auths, int batchSize,
        KeyExtent ke, String[] columns) throws Exception {

    // long t1 = System.currentTimeMillis();

    Scanner scanner = conn.createScanner(table, auths);
    scanner.setBatchSize(batchSize);//ww  w. j  av  a  2 s .com
    scanner.setRange(new Range(ke.getPrevEndRow(), false, ke.getEndRow(), true));

    for (String c : columns) {
        scanner.fetchColumnFamily(new Text(c));
    }

    Stat rowLen = new Stat();
    Stat cfLen = new Stat();
    Stat cqLen = new Stat();
    Stat cvLen = new Stat();
    Stat valLen = new Stat();
    Stat colsPerRow = new Stat();

    Text lastRow = null;
    int colsPerRowCount = 0;

    for (Entry<Key, Value> entry : scanner) {

        Key key = entry.getKey();
        Text row = key.getRow();

        if (lastRow == null) {
            lastRow = row;
        }

        if (!lastRow.equals(row)) {
            colsPerRow.addStat(colsPerRowCount);
            lastRow = row;
            colsPerRowCount = 0;
        }

        colsPerRowCount++;

        rowLen.addStat(row.getLength());
        cfLen.addStat(key.getColumnFamilyData().length());
        cqLen.addStat(key.getColumnQualifierData().length());
        cvLen.addStat(key.getColumnVisibilityData().length());
        valLen.addStat(entry.getValue().get().length);
    }

    synchronized (System.out) {
        System.out.println("");
        System.out.println("\tTablet " + ke.getUUID() + " statistics : ");
        printStat("Row length", rowLen);
        printStat("Column family length", cfLen);
        printStat("Column qualifier length", cqLen);
        printStat("Column visibility length", cvLen);
        printStat("Value length", valLen);
        printStat("Columns per row", colsPerRow);
        System.out.println("");
    }

}

From source file:org.apache.accumulo.tserver.tablet.Tablet.java

License:Apache License

private SplitRowSpec findSplitRow(Collection<FileRef> files) {

    // never split the root tablet
    // check if we already decided that we can never split
    // check to see if we're big enough to split

    long splitThreshold = tableConfiguration.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD);
    long maxEndRow = tableConfiguration.getMemoryInBytes(Property.TABLE_MAX_END_ROW_SIZE);

    if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) {
        return null;
    }/*from  w  ww.  j  a  v  a  2 s . c om*/

    // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred.
    if (sawBigRow) {
        if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime
                || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) {
            // a minor compaction or map file import has occurred... check again
            sawBigRow = false;
        } else {
            // nothing changed, do not split
            return null;
        }
    }

    SortedMap<Double, Key> keys = null;

    try {
        // we should make .25 below configurable
        keys = FileUtil.findMidPoint(getTabletServer().getFileSystem(), getTabletServer().getConfiguration(),
                extent.getPrevEndRow(), extent.getEndRow(), FileUtil.toPathStrings(files), .25);
    } catch (IOException e) {
        log.error("Failed to find midpoint " + e.getMessage());
        return null;
    }

    // check to see if one row takes up most of the tablet, in which case we can not split
    try {

        Text lastRow;
        if (extent.getEndRow() == null) {
            Key lastKey = (Key) FileUtil.findLastKey(getTabletServer().getFileSystem(),
                    getTabletServer().getConfiguration(), files);
            lastRow = lastKey.getRow();
        } else {
            lastRow = extent.getEndRow();
        }

        // We expect to get a midPoint for this set of files. If we don't get one, we have a problem.
        final Key mid = keys.get(.5);
        if (null == mid) {
            throw new IllegalStateException("Could not determine midpoint for files");
        }

        // check to see that the midPoint is not equal to the end key
        if (mid.compareRow(lastRow) == 0) {
            if (keys.firstKey() < .5) {
                Key candidate = keys.get(keys.firstKey());
                if (candidate.getLength() > maxEndRow) {
                    log.warn("Cannot split tablet " + extent + ", selected split point too long.  Length :  "
                            + candidate.getLength());

                    sawBigRow = true;
                    timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
                    timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

                    return null;
                }
                if (candidate.compareRow(lastRow) != 0) {
                    // we should use this ratio in split size estimations
                    if (log.isTraceEnabled())
                        log.trace(String.format(
                                "Splitting at %6.2f instead of .5, row at .5 is same as end row%n",
                                keys.firstKey()));
                    return new SplitRowSpec(keys.firstKey(), candidate.getRow());
                }

            }

            log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow);

            sawBigRow = true;
            timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
            timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

            return null;
        }

        Text text = mid.getRow();
        SortedMap<Double, Key> firstHalf = keys.headMap(.5);
        if (firstHalf.size() > 0) {
            Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow();
            Text shorter = new Text();
            int trunc = longestCommonLength(text, beforeMid);
            shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1));
            text = shorter;
        }

        if (text.getLength() > maxEndRow) {
            log.warn("Cannot split tablet " + extent + ", selected split point too long.  Length :  "
                    + text.getLength());

            sawBigRow = true;
            timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
            timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

            return null;
        }

        return new SplitRowSpec(.5, text);
    } catch (IOException e) {
        // don't split now, but check again later
        log.error("Failed to find lastkey " + e.getMessage());
        return null;
    }

}

From source file:org.apache.accumulo.tserver.Tablet.java

License:Apache License

private SplitRowSpec findSplitRow(Collection<FileRef> files) {

    // never split the root tablet
    // check if we already decided that we can never split
    // check to see if we're big enough to split

    long splitThreshold = acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD);
    if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) {
        return null;
    }/*from  www. ja  v a2  s .  c om*/

    // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred.
    if (sawBigRow) {
        if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime
                || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) {
            // a minor compaction or map file import has occurred... check again
            sawBigRow = false;
        } else {
            // nothing changed, do not split
            return null;
        }
    }

    SortedMap<Double, Key> keys = null;

    try {
        // we should make .25 below configurable
        keys = FileUtil.findMidPoint(fs, tabletServer.getSystemConfiguration(), extent.getPrevEndRow(),
                extent.getEndRow(), FileUtil.toPathStrings(files), .25);
    } catch (IOException e) {
        log.error("Failed to find midpoint " + e.getMessage());
        return null;
    }

    // check to see if one row takes up most of the tablet, in which case we can not split
    try {

        Text lastRow;
        if (extent.getEndRow() == null) {
            Key lastKey = (Key) FileUtil.findLastKey(fs, tabletServer.getSystemConfiguration(), files);
            lastRow = lastKey.getRow();
        } else {
            lastRow = extent.getEndRow();
        }

        // check to see that the midPoint is not equal to the end key
        if (keys.get(.5).compareRow(lastRow) == 0) {
            if (keys.firstKey() < .5) {
                Key candidate = keys.get(keys.firstKey());
                if (candidate.compareRow(lastRow) != 0) {
                    // we should use this ratio in split size estimations
                    if (log.isTraceEnabled())
                        log.trace(String.format(
                                "Splitting at %6.2f instead of .5, row at .5 is same as end row%n",
                                keys.firstKey()));
                    return new SplitRowSpec(keys.firstKey(), candidate.getRow());
                }

            }

            log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow);

            sawBigRow = true;
            timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
            timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

            return null;
        }
        Key mid = keys.get(.5);
        Text text = (mid == null) ? null : mid.getRow();
        SortedMap<Double, Key> firstHalf = keys.headMap(.5);
        if (firstHalf.size() > 0) {
            Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow();
            Text shorter = new Text();
            int trunc = longestCommonLength(text, beforeMid);
            shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1));
            text = shorter;
        }
        return new SplitRowSpec(.5, text);
    } catch (IOException e) {
        // don't split now, but check again later
        log.error("Failed to find lastkey " + e.getMessage());
        return null;
    }
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

@Override
protected void map(Writable k, Text value, Context context) throws IOException, InterruptedException {
    BlurRecord record = _mutate.getRecord();
    record.clearColumns();//from  www  .  jav  a2 s .c  om
    String str = value.toString();

    Iterable<String> split = _splitter.split(str);
    List<String> list = toList(split);

    int offset = 0;
    boolean gen = false;
    if (!_autoGenerateRowIdAsHashOfData) {
        record.setRowId(list.get(offset++));
    } else {
        _digest.reset();
        byte[] bs = value.getBytes();
        int length = value.getLength();
        _digest.update(bs, 0, length);
        record.setRowId(new BigInteger(_digest.digest()).toString(Character.MAX_RADIX));
        gen = true;
    }

    if (!_autoGenerateRecordIdAsHashOfData) {
        record.setRecordId(list.get(offset++));
    } else {
        if (gen) {
            record.setRecordId(record.getRowId());
        } else {
            _digest.reset();
            byte[] bs = value.getBytes();
            int length = value.getLength();
            _digest.update(bs, 0, length);
            record.setRecordId(new BigInteger(_digest.digest()).toString(Character.MAX_RADIX));
        }
    }
    String family;
    if (_familyNotInFile) {
        family = _familyFromPath;
    } else {
        family = list.get(offset++);
    }
    record.setFamily(family);

    List<String> columnNames = _columnNameMap.get(family);
    if (columnNames == null) {
        throw new IOException("Family [" + family + "] is missing in the definition.");
    }
    if (list.size() - offset != columnNames.size()) {

        String options = "";

        if (!_autoGenerateRowIdAsHashOfData) {
            options += "rowid,";
        }
        if (!_autoGenerateRecordIdAsHashOfData) {
            options += "recordid,";
        }
        if (!_familyNotInFile) {
            options += "family,";
        }
        String msg = "Record [" + str + "] does not match defined record [" + options
                + getColumnNames(columnNames) + "].";
        throw new IOException(msg);
    }

    for (int i = 0; i < columnNames.size(); i++) {
        String val = handleHiveNulls(list.get(i + offset));
        if (val != null) {
            record.addColumn(columnNames.get(i), val);
            _columnCounter.increment(1);
        }
    }
    _key.set(record.getRowId());
    _mutate.setMutateType(MUTATE_TYPE.REPLACE);
    context.write(_key, _mutate);
    _recordCounter.increment(1);
    context.progress();
}

From source file:org.apache.drill.exec.store.text.DrillTextRecordReader.java

License:Apache License

/**
 * Returns the index within the text of the first occurrence of delimiter, starting the search at the specified index.
 *
 * @param  text  the text being searched
 * @param  delimiter the delimiter//from  w ww.  ja v a  2  s. com
 * @param  start the index to start searching
 * @return      the first occurrence of delimiter, starting the search at the specified index
 */
public int find(Text text, byte delimiter, int start) {
    int len = text.getLength();
    int p = start;
    byte[] bytes = text.getBytes();
    boolean inQuotes = false;
    while (p < len) {
        if ('\"' == bytes[p]) {
            inQuotes = !inQuotes;
        }
        if (!inQuotes && bytes[p] == delimiter) {
            return p;
        }
        p++;
    }
    return -1;
}

From source file:org.apache.flume.sink.hdfs.HDFSTextFormatter.java

License:Apache License

@Override
public byte[] getBytes(Event e) {
    Text record = makeText(e);
    record.append("\n".getBytes(), 0, 1);
    byte[] rawBytes = record.getBytes();
    return Arrays.copyOf(rawBytes, record.getLength());
}

From source file:org.apache.fluo.core.util.ByteUtil.java

License:Apache License

/**
 * Convert from Hadoop Text to Bytes/*from w w  w . j a  v  a2 s .c o m*/
 */
public static Bytes toBytes(Text t) {
    return Bytes.of(t.getBytes(), 0, t.getLength());
}

From source file:org.apache.fluo.core.util.ByteUtil.java

License:Apache License

public static byte[] toByteArray(Text text) {
    byte[] bytes = text.getBytes();
    if (bytes.length != text.getLength()) {
        bytes = new byte[text.getLength()];
        System.arraycopy(text.getBytes(), 0, bytes, 0, bytes.length);
    }//from w ww.j a v a2s  .co m
    return bytes;
}