Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:org.apache.accumulo.examples.mapreduce.TableToFile.java

License:Apache License

@Override
public int run(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, AccumuloSecurityException {
    Job job = Job.getInstance(getConf());
    job.setJobName(this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());
    Opts opts = new Opts();
    opts.parseArgs(getClass().getName(), args);

    job.setInputFormatClass(AccumuloInputFormat.class);
    opts.setAccumuloConfigs(job);/*from  w w w  . j  a v  a2  s.co  m*/

    HashSet<Pair<Text, Text>> columnsToFetch = new HashSet<>();
    for (String col : opts.columns.split(",")) {
        int idx = col.indexOf(":");
        Text cf = new Text(idx < 0 ? col : col.substring(0, idx));
        Text cq = idx < 0 ? null : new Text(col.substring(idx + 1));
        if (cf.getLength() > 0)
            columnsToFetch.add(new Pair<>(cf, cq));
    }
    if (!columnsToFetch.isEmpty())
        AccumuloInputFormat.fetchColumns(job, columnsToFetch);

    job.setMapperClass(TTFMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(opts.output));

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.examples.simple.filedata.KeyUtil.java

License:Apache License

/**
 * Split a text object using a null byte separator into an array of strings.
 * /*w  w  w . j a v  a  2 s .co  m*/
 * @param t
 *          null-byte separated text object
 * @return an array of strings
 */
public static String[] splitNullSepText(Text t) {
    ArrayList<String> s = new ArrayList<String>();
    byte[] b = t.getBytes();
    int lastindex = 0;
    for (int i = 0; i < t.getLength(); i++) {
        if (b[i] == (byte) 0) {
            s.add(new String(b, lastindex, i - lastindex));
            lastindex = i + 1;
        }
    }
    s.add(new String(b, lastindex, t.getLength() - lastindex));
    return s.toArray(new String[s.size()]);
}

From source file:org.apache.accumulo.examples.simple.mapreduce.RowHash.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = JobUtil.getJob(getConf());
    job.setJobName(this.getClass().getName());
    job.setJarByClass(this.getClass());
    Opts opts = new Opts();
    opts.parseArgs(RowHash.class.getName(), args);
    job.setInputFormatClass(AccumuloInputFormat.class);
    opts.setAccumuloConfigs(job);/*w  ww.  j a  va  2  s .co m*/

    String col = opts.column;
    int idx = col.indexOf(":");
    Text cf = new Text(idx < 0 ? col : col.substring(0, idx));
    Text cq = idx < 0 ? null : new Text(col.substring(idx + 1));
    if (cf.getLength() > 0)
        AccumuloInputFormat.fetchColumns(job, Collections.singleton(new Pair<Text, Text>(cf, cq)));

    job.setMapperClass(HashDataMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.examples.simple.mapreduce.TableToFile.java

License:Apache License

@Override
public int run(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, AccumuloSecurityException {
    Job job = JobUtil.getJob(getConf());
    job.setJobName(this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());
    Opts opts = new Opts();
    opts.parseArgs(getClass().getName(), args);

    job.setInputFormatClass(AccumuloInputFormat.class);
    opts.setAccumuloConfigs(job);//w ww. j ava2  s. c  om

    HashSet<Pair<Text, Text>> columnsToFetch = new HashSet<Pair<Text, Text>>();
    for (String col : opts.columns.split(",")) {
        int idx = col.indexOf(":");
        Text cf = new Text(idx < 0 ? col : col.substring(0, idx));
        Text cq = idx < 0 ? null : new Text(col.substring(idx + 1));
        if (cf.getLength() > 0)
            columnsToFetch.add(new Pair<Text, Text>(cf, cq));
    }
    if (!columnsToFetch.isEmpty())
        AccumuloInputFormat.fetchColumns(job, columnsToFetch);

    job.setMapperClass(TTFMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(opts.output));

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.examples.wikisearch.iterator.FieldIndexIterator.java

License:Apache License

public boolean jump(Key jumpKey) throws IOException {
    if (log.isDebugEnabled()) {
        String pEndRow = "empty";
        if (parentEndRow != null) {
            pEndRow = parentEndRow.toString();
        }//from w  ww.  j  a va2s .  c om
        log.debug("jump, current range: " + range + "  parentEndRow is: " + pEndRow);

    }

    if (parentEndRow != null && jumpKey.getRow().compareTo(parentEndRow) > 0) {
        // can't go there.
        if (log.isDebugEnabled()) {
            log.debug("jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow);
        }
        return false;
    }

    int comp;
    if (!this.hasTop()) {
        if (log.isDebugEnabled()) {
            log.debug("current row: " + this.currentRow);
        }

        /*
         * if I don't have a top, then I should be out of my range for my current row. Need to check parent range to see if I'm supposed to continue to next row
         * or not. Current row can be null because maybe I never found anything in this row.
         */

        if (parentEndRow != null) {
            // if jumpKey row is greater than parentEndRow, stop
            if (jumpKey.getRow().compareTo(parentEndRow) > 0) {
                if (log.isDebugEnabled()) {
                    log.debug("jumpKey row is greater than my parentEndRow, done");
                }
                return false;
            }

            // if my current row is null, I must have hit the end of the tablet
            if (currentRow == null) {
                if (log.isDebugEnabled()) {
                    log.debug("I have parentEndRow, but no current row, must have hit end of tablet, done");
                }
                return false;
            }

            // if my current row is greater than jump row stop, a seek will be
            // called to get me going again. If my row is equal, but i don't
            // have a topkey, i'm done
            if (currentRow.compareTo(jumpKey.getRow()) >= 0) {
                if (log.isDebugEnabled()) {
                    log.debug("I have parentEndRow, but topKey, and my currentRow is >= jumpRow, done");
                }
                return false;
            }

        } else { // we're allowed to go to the end of the tablet
            // if my current row is null, I must have hit the end of the tablet
            if (currentRow == null) {
                if (log.isDebugEnabled()) {
                    log.debug("no parentEndRow and current Row is null, must have hit end of tablet, done");
                }
                return false;
            }

            if (currentRow.compareTo(jumpKey.getRow()) >= 0) {
                // i'm past or equal to the jump point and have no top,
                // jumping's not going to help
                if (log.isDebugEnabled()) {
                    log.debug("no parentEndRow, no topKey, and currentRow is >= jumpRow, done");
                }
                return false;
            }
        }

        // ok, jumpKey is ahead of me I'll mark it and allow the normal
        // flow to jump there and see if I have top.
        if (log.isDebugEnabled()) {
            log.debug("no topKey, but jumpRow is ahead and I'm allowed to go to it, marking");
        }
        comp = -1;

    } else { // I have a topKey, I can do the normal comparisons
        if (log.isDebugEnabled()) {
            log.debug("have top, can do normal comparisons");
        }
        comp = this.topKey.getRow().compareTo(jumpKey.getRow());
    }

    // ------------------
    // compare rows
    if (comp > 0) { // my row is ahead of jump key
        if (canBeInNextRow()) {
            if (log.isDebugEnabled()) {
                log.debug("I'm ahead of jump row & it's ok.");
                log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow: "
                        + parentEndRow);
            }
            return true;
        } else {
            if (log.isDebugEnabled()) {
                log.debug("I'm ahead of jump row & can't be here, or at end of tablet.");
            }
            topKey = null;
            topValue = null;
            return false;
        }

    } else if (comp < 0) { // a row behind jump key, need to move forward
        if (log.isDebugEnabled()) {
            String myRow = "";
            if (hasTop()) {
                myRow = topKey.getRow().toString();
            } else if (currentRow != null) {
                myRow = currentRow.toString();
            }
            log.debug("My row " + myRow + " is less than jump row: " + jumpKey.getRow() + " seeking");
        }
        range = buildRange(jumpKey.getRow());
        // this.seek(range, EMPTY_COL_FAMS, false);

        boolean success = jumpSeek(range);
        if (log.isDebugEnabled() && success) {
            log.debug("uid forced jump, found topKey: " + topKey);
        }

        if (!this.hasTop()) {
            log.debug("seeked with new row and had no top");
            topKey = null;
            topValue = null;
            return false;
        } else if (parentEndRow != null && currentRow.compareTo(parentEndRow) > 0) {
            if (log.isDebugEnabled()) {
                log.debug("myRow: " + getTopKey().getRow() + " is past parentEndRow: " + parentEndRow);
            }
            topKey = null;
            topValue = null;
            return false;
        }
        if (log.isDebugEnabled()) {
            log.debug("jumped, valid top: " + getTopKey());
        }

        return true;

    } else { // rows are equal, check the uid!

        keyParser.parse(topKey);
        String myUid = keyParser.getUid();
        keyParser.parse(jumpKey);
        String jumpUid = keyParser.getUid();

        int ucomp = myUid.compareTo(jumpUid);
        if (log.isDebugEnabled()) {
            log.debug("topKeyUid: " + myUid + "  jumpUid: " + jumpUid + "  myUid.compareTo(jumpUid)->" + ucomp);
        }
        if (ucomp < 0) { // need to move up
            log.debug("my uid is less than jumpUid, topUid: " + myUid + "   jumpUid: " + jumpUid);

            Text cq = jumpKey.getColumnQualifier();
            int index = cq.find(NULL_BYTE);
            if (0 <= index) {
                cq.set(cq.getBytes(), index + 1, cq.getLength() - index - 1);
            } else {
                log.error("Expected a NULL separator in the column qualifier");
                this.topKey = null;
                this.topValue = null;
                return false;
            }

            // note my internal range stays the same, I just need to move forward
            Key startKey = new Key(topKey.getRow(), fName, new Text(fValue + NULL_BYTE + cq));
            Key endKey = new Key(topKey.getRow(), fName, new Text(fValue + ONE_BYTE));
            range = new Range(startKey, true, endKey, false);
            log.debug("Using range: " + range + " to seek");
            // source.seek(range, EMPTY_COL_FAMS, false);
            boolean success = jumpSeek(range);
            if (log.isDebugEnabled() && success) {
                log.debug("uid forced jump, found topKey: " + topKey);
            }

            return success;

        } else { // else do nothing
            log.debug("my uid is greater than jumpUid, topKey: " + topKey + "   jumpKey: " + jumpKey);
            log.debug("doing nothing");
        }
    }

    return hasTop();
}

From source file:org.apache.accumulo.pig.AccumuloStorage.java

License:Apache License

/**
 * Takes a tuple and turns it into a mutation to write out to a table
 *//*from www. ja v a 2  s .c  om*/
public Collection<Mutation> getMutations(Tuple tuple) throws ExecException, IOException {
    Mutation mut = new Mutation(StorageUtils.objToText(tuple.get(0)));
    Text cf = StorageUtils.objToText(tuple.get(1));
    Text cq = StorageUtils.objToText(tuple.get(2));

    if (tuple.size() > 4) {
        Text cv = StorageUtils.objToText(tuple.get(3));
        Value val = new Value(StorageUtils.objToBytes(tuple.get(4)));
        if (cv.getLength() == 0) {
            mut.put(cf, cq, val);
        } else {
            mut.put(cf, cq, new ColumnVisibility(cv), val);
        }
    } else {
        Value val = new Value(StorageUtils.objToBytes(tuple.get(3)));
        mut.put(cf, cq, val);
    }

    return Collections.singleton(mut);
}

From source file:org.apache.accumulo.server.tabletserver.Tablet.java

License:Apache License

private SplitRowSpec findSplitRow(Collection<FileRef> files) {

    // never split the root tablet
    // check if we already decided that we can never split
    // check to see if we're big enough to split

    long splitThreshold = acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD);
    if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) {
        return null;
    }//from  ww  w .  j av  a  2  s .c om

    // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred.
    if (sawBigRow) {
        if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime
                || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) {
            // a minor compaction or map file import has occurred... check again
            sawBigRow = false;
        } else {
            // nothing changed, do not split
            return null;
        }
    }

    SortedMap<Double, Key> keys = null;

    try {
        // we should make .25 below configurable
        keys = FileUtil.findMidPoint(fs, tabletServer.getSystemConfiguration(), extent.getPrevEndRow(),
                extent.getEndRow(), files, .25);
    } catch (IOException e) {
        log.error("Failed to find midpoint " + e.getMessage());
        return null;
    }

    // check to see if one row takes up most of the tablet, in which case we can not split
    try {

        Text lastRow;
        if (extent.getEndRow() == null) {
            Key lastKey = (Key) FileUtil.findLastKey(fs, tabletServer.getSystemConfiguration(), files);
            lastRow = lastKey.getRow();
        } else {
            lastRow = extent.getEndRow();
        }

        // check to see that the midPoint is not equal to the end key
        if (keys.get(.5).compareRow(lastRow) == 0) {
            if (keys.firstKey() < .5) {
                Key candidate = keys.get(keys.firstKey());
                if (candidate.compareRow(lastRow) != 0) {
                    // we should use this ratio in split size estimations
                    if (log.isTraceEnabled())
                        log.trace(String.format(
                                "Splitting at %6.2f instead of .5, row at .5 is same as end row%n",
                                keys.firstKey()));
                    return new SplitRowSpec(keys.firstKey(), candidate.getRow());
                }

            }

            log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow);

            sawBigRow = true;
            timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
            timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

            return null;
        }
        Key mid = keys.get(.5);
        Text text = (mid == null) ? null : mid.getRow();
        SortedMap<Double, Key> firstHalf = keys.headMap(.5);
        if (firstHalf.size() > 0) {
            Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow();
            Text shorter = new Text();
            int trunc = longestCommonLength(text, beforeMid);
            shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1));
            text = shorter;
        }
        return new SplitRowSpec(.5, text);
    } catch (IOException e) {
        // don't split now, but check again later
        log.error("Failed to find lastkey " + e.getMessage());
        return null;
    }
}

From source file:org.apache.accumulo.server.tabletserver.Tablet.java

License:Apache License

private static int longestCommonLength(Text text, Text beforeMid) {
    int common = 0;
    while (common < text.getLength() && common < beforeMid.getLength()
            && text.getBytes()[common] == beforeMid.getBytes()[common]) {
        common++;/*from   w  ww. j  av  a 2 s .c o  m*/
    }
    return common;
}

From source file:org.apache.accumulo.server.test.performance.scan.CollectTabletStats.java

License:Apache License

private static void calcTabletStats(Connector conn, String table, String[] auths, int batchSize, KeyExtent ke,
        String[] columns) throws Exception {

    // long t1 = System.currentTimeMillis();

    Scanner scanner = conn.createScanner(table, new Authorizations(auths));
    scanner.setBatchSize(batchSize);//from w w  w .  j a  va2 s  .  c  om
    scanner.setRange(new Range(ke.getPrevEndRow(), false, ke.getEndRow(), true));

    for (String c : columns) {
        scanner.fetchColumnFamily(new Text(c));
    }

    Stat rowLen = new Stat();
    Stat cfLen = new Stat();
    Stat cqLen = new Stat();
    Stat cvLen = new Stat();
    Stat valLen = new Stat();
    Stat colsPerRow = new Stat();

    Text lastRow = null;
    int colsPerRowCount = 0;

    for (Entry<Key, Value> entry : scanner) {

        Key key = entry.getKey();
        Text row = key.getRow();

        if (lastRow == null) {
            lastRow = row;
        }

        if (!lastRow.equals(row)) {
            colsPerRow.addStat(colsPerRowCount);
            lastRow = row;
            colsPerRowCount = 0;
        }

        colsPerRowCount++;

        rowLen.addStat(row.getLength());
        cfLen.addStat(key.getColumnFamilyData().length());
        cqLen.addStat(key.getColumnQualifierData().length());
        cvLen.addStat(key.getColumnVisibilityData().length());
        valLen.addStat(entry.getValue().get().length);
    }

    synchronized (System.out) {
        System.out.println("");
        System.out.println("\tTablet " + ke.getUUID() + " statistics : ");
        printStat("Row length", rowLen);
        printStat("Column family length", cfLen);
        printStat("Column qualifier length", cqLen);
        printStat("Column visibility length", cvLen);
        printStat("Value length", valLen);
        printStat("Columns per row", colsPerRow);
        System.out.println("");
    }

}

From source file:org.apache.accumulo.server.util.VerifyTabletAssignments.java

License:Apache License

private static void checkTabletServer(ClientContext context, Entry<HostAndPort, List<KeyExtent>> entry,
        HashSet<KeyExtent> failures) throws ThriftSecurityException, TException, NoSuchScanIDException {
    TabletClientService.Iface client = ThriftUtil.getTServerClient(entry.getKey(), context);

    Map<TKeyExtent, List<TRange>> batch = new TreeMap<>();

    for (KeyExtent keyExtent : entry.getValue()) {
        Text row = keyExtent.getEndRow();
        Text row2 = null;/*w w  w . j ava  2s .c  om*/

        if (row == null) {
            row = keyExtent.getPrevEndRow();

            if (row != null) {
                row = new Text(row);
                row.append(new byte[] { 'a' }, 0, 1);
            } else {
                row = new Text("1234567890");
            }

            row2 = new Text(row);
            row2.append(new byte[] { '!' }, 0, 1);
        } else {
            row = new Text(row);
            row2 = new Text(row);

            row.getBytes()[row.getLength() - 1] = (byte) (row.getBytes()[row.getLength() - 1] - 1);
        }

        Range r = new Range(row, true, row2, false);
        batch.put(keyExtent.toThrift(), Collections.singletonList(r.toThrift()));
    }
    TInfo tinfo = Tracer.traceInfo();
    Map<String, Map<String, String>> emptyMapSMapSS = Collections.emptyMap();
    List<IterInfo> emptyListIterInfo = Collections.emptyList();
    List<TColumn> emptyListColumn = Collections.emptyList();
    InitialMultiScan is = client.startMultiScan(tinfo, context.rpcCreds(), batch, emptyListColumn,
            emptyListIterInfo, emptyMapSMapSS, Authorizations.EMPTY.getAuthorizationsBB(), false, null, 0L,
            null, null);
    if (is.result.more) {
        MultiScanResult result = client.continueMultiScan(tinfo, is.scanID);
        checkFailures(entry.getKey(), failures, result);

        while (result.more) {
            result = client.continueMultiScan(tinfo, is.scanID);
            checkFailures(entry.getKey(), failures, result);
        }
    }

    client.closeMultiScan(tinfo, is.scanID);

    ThriftUtil.returnClient((TServiceClient) client);
}