Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:org.apache.accumulo.core.util.TextUtil.java

License:Apache License

public static ByteBuffer getByteBuffer(Text text) {
    if (text == null)
        return null;
    byte[] bytes = text.getBytes();
    return ByteBuffer.wrap(bytes, 0, text.getLength());
}

From source file:org.apache.accumulo.core.util.TextUtil.java

License:Apache License

public static Text truncate(Text text, int maxLen) {
    if (text.getLength() > maxLen) {
        Text newText = new Text();
        newText.append(text.getBytes(), 0, maxLen);
        String suffix = "... TRUNCATED";
        newText.append(suffix.getBytes(UTF_8), 0, suffix.length());
        return newText;
    }//from ww  w. j a v a 2s .  c  o m

    return text;
}

From source file:org.apache.accumulo.core.util.TextUtilTest.java

License:Apache License

/**
 * co/*from   w w  w . j  av  a 2  s.c o  m*/
 */
public void testGetBytes() {
    String longMessage = "This is some text";
    Text longMessageText = new Text(longMessage);
    String smallerMessage = "a";
    Text smallerMessageText = new Text(smallerMessage);
    Text someText = new Text(longMessage);
    assertTrue(someText.equals(longMessageText));
    someText.set(smallerMessageText);
    assertTrue(someText.getLength() != someText.getBytes().length);
    assertTrue(TextUtil.getBytes(someText).length == smallerMessage.length());
    assertTrue((new Text(TextUtil.getBytes(someText))).equals(smallerMessageText));
}

From source file:org.apache.accumulo.examples.dirlist.QueryUtil.java

License:Apache License

/**
 * Returns either the {@link #DIR_COLF} or a decoded string version of the colf.
 *
 * @param colf/*from ww w . java2  s  .c  o  m*/
 *          the column family
 */
public static String getType(Text colf) {
    if (colf.equals(DIR_COLF))
        return colf.toString() + ":";
    return Long.toString(Ingest.encoder.decode(colf.getBytes())) + ":";
}

From source file:org.apache.accumulo.examples.filedata.KeyUtil.java

License:Apache License

/**
 * Split a text object using a null byte separator into an array of strings.
 *
 * @param t/*w  w w.j  av a  2s  .  c o  m*/
 *          null-byte separated text object
 * @return an array of strings
 */
public static String[] splitNullSepText(Text t) {
    ArrayList<String> s = new ArrayList<>();
    byte[] b = t.getBytes();
    int lastindex = 0;
    for (int i = 0; i < t.getLength(); i++) {
        if (b[i] == (byte) 0) {
            s.add(new String(b, lastindex, i - lastindex));
            lastindex = i + 1;
        }
    }
    s.add(new String(b, lastindex, t.getLength() - lastindex));
    return s.toArray(new String[s.size()]);
}

From source file:org.apache.accumulo.examples.simple.filedata.KeyUtil.java

License:Apache License

/**
 * Split a text object using a null byte separator into an array of strings.
 * //from   w  ww  .  j ava2  s.  co m
 * @param t
 *          null-byte separated text object
 * @return an array of strings
 */
public static String[] splitNullSepText(Text t) {
    ArrayList<String> s = new ArrayList<String>();
    byte[] b = t.getBytes();
    int lastindex = 0;
    for (int i = 0; i < t.getLength(); i++) {
        if (b[i] == (byte) 0) {
            s.add(new String(b, lastindex, i - lastindex));
            lastindex = i + 1;
        }
    }
    s.add(new String(b, lastindex, t.getLength() - lastindex));
    return s.toArray(new String[s.size()]);
}

From source file:org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    Article article = extractor//w  ww.  ja  va2s . c om
            .extract(new InputStreamReader(new ByteArrayInputStream(value.getBytes()), UTF8));
    String NULL_BYTE = "\u0000";
    String colfPrefix = language + NULL_BYTE;
    String indexPrefix = "fi" + NULL_BYTE;
    if (article != null) {
        int groupId = WikipediaMapper.getPartitionId(article, numGroups);
        if (groupId != myGroup) {
            return;
        }
        Text partitionId = new Text(Integer.toString(WikipediaMapper.getPartitionId(article, numPartitions)));

        // Create the mutations for the document.
        // Row is partition id, colf is language0articleid, colq is fieldName\0fieldValue
        Mutation m = new Mutation(partitionId);
        for (Entry<String, Object> entry : article.getFieldValues().entrySet()) {
            m.put(colfPrefix + article.getId(), entry.getKey() + NULL_BYTE + entry.getValue().toString(), cv,
                    article.getTimestamp(), NULL_VALUE);
            // Create mutations for the metadata table.
            String metadataKey = entry.getKey() + METADATA_EVENT_COLUMN_FAMILY + language;
            if (!metadataSent.contains(metadataKey)) {
                Mutation mm = new Mutation(entry.getKey());
                mm.put(METADATA_EVENT_COLUMN_FAMILY, language, cv, article.getTimestamp(), NULL_VALUE);
                context.write(metadataTableName, mm);
                metadataSent.add(metadataKey);
            }
        }

        // Tokenize the content
        Set<String> tokens = getTokens(article);

        // We are going to put the fields to be indexed into a multimap. This allows us to iterate
        // over the entire set once.
        Multimap<String, String> indexFields = HashMultimap.create();
        // Add the normalized field values
        LcNoDiacriticsNormalizer normalizer = new LcNoDiacriticsNormalizer();
        for (Entry<String, String> index : article.getNormalizedFieldValues().entrySet()) {
            indexFields.put(index.getKey(), index.getValue());
        }
        // Add the tokens
        for (String token : tokens) {
            indexFields.put(TOKENS_FIELD_NAME, normalizer.normalizeFieldValue("", token));
        }

        for (Entry<String, String> index : indexFields.entries()) {
            // Create mutations for the in partition index
            // Row is partition id, colf is 'fi'\0fieldName, colq is fieldValue\0language\0article id
            m.put(indexPrefix + index.getKey(), index.getValue() + NULL_BYTE + colfPrefix + article.getId(), cv,
                    article.getTimestamp(), NULL_VALUE);

            // Create mutations for the global index
            // Create a UID object for the Value
            Builder uidBuilder = Uid.List.newBuilder();
            uidBuilder.setIGNORE(false);
            uidBuilder.setCOUNT(1);
            uidBuilder.addUID(Integer.toString(article.getId()));
            Uid.List uidList = uidBuilder.build();
            Value val = new Value(uidList.toByteArray());

            // Create mutations for the global index
            // Row is field value, colf is field name, colq is partitionid\0language, value is Uid.List
            // object
            Mutation gm = new Mutation(index.getValue());
            gm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), val);
            context.write(indexTableName, gm);

            // Create mutations for the global reverse index
            Mutation grm = new Mutation(StringUtils.reverse(index.getValue()));
            grm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), val);
            context.write(reverseIndexTableName, grm);

            // Create mutations for the metadata table.
            String metadataKey = index.getKey() + METADATA_INDEX_COLUMN_FAMILY + language;
            if (!metadataSent.contains(metadataKey)) {
                Mutation mm = new Mutation(index.getKey());
                mm.put(METADATA_INDEX_COLUMN_FAMILY,
                        language + NULL_BYTE + LcNoDiacriticsNormalizer.class.getName(), cv,
                        article.getTimestamp(), NULL_VALUE);
                context.write(metadataTableName, mm);
                metadataSent.add(metadataKey);
            }
        }
        // Add the entire text to the document section of the table.
        // row is the partition, colf is 'd', colq is language\0articleid, value is Base64 encoded
        // GZIP'd document
        m.put(DOCUMENT_COLUMN_FAMILY, colfPrefix + article.getId(), cv, article.getTimestamp(),
                new Value(Base64.encodeBase64(article.getText().getBytes())));
        context.write(tablename, m);

    } else {
        context.getCounter("wikipedia", "invalid articles").increment(1);
    }
    context.progress();
}

From source file:org.apache.accumulo.examples.wikisearch.ingest.WikipediaPartitioner.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    Article article = extractor//from   ww  w. jav  a 2 s.  c  o  m
            .extract(new InputStreamReader(new ByteArrayInputStream(value.getBytes()), UTF8));
    if (article != null) {
        int groupId = WikipediaMapper.getPartitionId(article, numGroups);
        if (groupId != myGroup)
            return;
        context.write(new Text(language), article);
    } else {
        context.getCounter("wikipedia", "invalid articles").increment(1);
        context.progress();
    }
}

From source file:org.apache.accumulo.examples.wikisearch.iterator.FieldIndexIterator.java

License:Apache License

public boolean jump(Key jumpKey) throws IOException {
    if (log.isDebugEnabled()) {
        String pEndRow = "empty";
        if (parentEndRow != null) {
            pEndRow = parentEndRow.toString();
        }//ww w .  j  a  va  2 s .c o m
        log.debug("jump, current range: " + range + "  parentEndRow is: " + pEndRow);

    }

    if (parentEndRow != null && jumpKey.getRow().compareTo(parentEndRow) > 0) {
        // can't go there.
        if (log.isDebugEnabled()) {
            log.debug("jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow);
        }
        return false;
    }

    int comp;
    if (!this.hasTop()) {
        if (log.isDebugEnabled()) {
            log.debug("current row: " + this.currentRow);
        }

        /*
         * if I don't have a top, then I should be out of my range for my current row. Need to check parent range to see if I'm supposed to continue to next row
         * or not. Current row can be null because maybe I never found anything in this row.
         */

        if (parentEndRow != null) {
            // if jumpKey row is greater than parentEndRow, stop
            if (jumpKey.getRow().compareTo(parentEndRow) > 0) {
                if (log.isDebugEnabled()) {
                    log.debug("jumpKey row is greater than my parentEndRow, done");
                }
                return false;
            }

            // if my current row is null, I must have hit the end of the tablet
            if (currentRow == null) {
                if (log.isDebugEnabled()) {
                    log.debug("I have parentEndRow, but no current row, must have hit end of tablet, done");
                }
                return false;
            }

            // if my current row is greater than jump row stop, a seek will be
            // called to get me going again. If my row is equal, but i don't
            // have a topkey, i'm done
            if (currentRow.compareTo(jumpKey.getRow()) >= 0) {
                if (log.isDebugEnabled()) {
                    log.debug("I have parentEndRow, but topKey, and my currentRow is >= jumpRow, done");
                }
                return false;
            }

        } else { // we're allowed to go to the end of the tablet
            // if my current row is null, I must have hit the end of the tablet
            if (currentRow == null) {
                if (log.isDebugEnabled()) {
                    log.debug("no parentEndRow and current Row is null, must have hit end of tablet, done");
                }
                return false;
            }

            if (currentRow.compareTo(jumpKey.getRow()) >= 0) {
                // i'm past or equal to the jump point and have no top,
                // jumping's not going to help
                if (log.isDebugEnabled()) {
                    log.debug("no parentEndRow, no topKey, and currentRow is >= jumpRow, done");
                }
                return false;
            }
        }

        // ok, jumpKey is ahead of me I'll mark it and allow the normal
        // flow to jump there and see if I have top.
        if (log.isDebugEnabled()) {
            log.debug("no topKey, but jumpRow is ahead and I'm allowed to go to it, marking");
        }
        comp = -1;

    } else { // I have a topKey, I can do the normal comparisons
        if (log.isDebugEnabled()) {
            log.debug("have top, can do normal comparisons");
        }
        comp = this.topKey.getRow().compareTo(jumpKey.getRow());
    }

    // ------------------
    // compare rows
    if (comp > 0) { // my row is ahead of jump key
        if (canBeInNextRow()) {
            if (log.isDebugEnabled()) {
                log.debug("I'm ahead of jump row & it's ok.");
                log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow: "
                        + parentEndRow);
            }
            return true;
        } else {
            if (log.isDebugEnabled()) {
                log.debug("I'm ahead of jump row & can't be here, or at end of tablet.");
            }
            topKey = null;
            topValue = null;
            return false;
        }

    } else if (comp < 0) { // a row behind jump key, need to move forward
        if (log.isDebugEnabled()) {
            String myRow = "";
            if (hasTop()) {
                myRow = topKey.getRow().toString();
            } else if (currentRow != null) {
                myRow = currentRow.toString();
            }
            log.debug("My row " + myRow + " is less than jump row: " + jumpKey.getRow() + " seeking");
        }
        range = buildRange(jumpKey.getRow());
        // this.seek(range, EMPTY_COL_FAMS, false);

        boolean success = jumpSeek(range);
        if (log.isDebugEnabled() && success) {
            log.debug("uid forced jump, found topKey: " + topKey);
        }

        if (!this.hasTop()) {
            log.debug("seeked with new row and had no top");
            topKey = null;
            topValue = null;
            return false;
        } else if (parentEndRow != null && currentRow.compareTo(parentEndRow) > 0) {
            if (log.isDebugEnabled()) {
                log.debug("myRow: " + getTopKey().getRow() + " is past parentEndRow: " + parentEndRow);
            }
            topKey = null;
            topValue = null;
            return false;
        }
        if (log.isDebugEnabled()) {
            log.debug("jumped, valid top: " + getTopKey());
        }

        return true;

    } else { // rows are equal, check the uid!

        keyParser.parse(topKey);
        String myUid = keyParser.getUid();
        keyParser.parse(jumpKey);
        String jumpUid = keyParser.getUid();

        int ucomp = myUid.compareTo(jumpUid);
        if (log.isDebugEnabled()) {
            log.debug("topKeyUid: " + myUid + "  jumpUid: " + jumpUid + "  myUid.compareTo(jumpUid)->" + ucomp);
        }
        if (ucomp < 0) { // need to move up
            log.debug("my uid is less than jumpUid, topUid: " + myUid + "   jumpUid: " + jumpUid);

            Text cq = jumpKey.getColumnQualifier();
            int index = cq.find(NULL_BYTE);
            if (0 <= index) {
                cq.set(cq.getBytes(), index + 1, cq.getLength() - index - 1);
            } else {
                log.error("Expected a NULL separator in the column qualifier");
                this.topKey = null;
                this.topValue = null;
                return false;
            }

            // note my internal range stays the same, I just need to move forward
            Key startKey = new Key(topKey.getRow(), fName, new Text(fValue + NULL_BYTE + cq));
            Key endKey = new Key(topKey.getRow(), fName, new Text(fValue + ONE_BYTE));
            range = new Range(startKey, true, endKey, false);
            log.debug("Using range: " + range + " to seek");
            // source.seek(range, EMPTY_COL_FAMS, false);
            boolean success = jumpSeek(range);
            if (log.isDebugEnabled() && success) {
                log.debug("uid forced jump, found topKey: " + topKey);
            }

            return success;

        } else { // else do nothing
            log.debug("my uid is greater than jumpUid, topKey: " + topKey + "   jumpKey: " + jumpKey);
            log.debug("doing nothing");
        }
    }

    return hasTop();
}

From source file:org.apache.accumulo.pig.AccumuloWholeRowStorage.java

License:Apache License

private Tuple columnToTuple(Text colfam, Text colqual, Text colvis, long ts, Value val) throws IOException {
    Tuple tuple = TupleFactory.getInstance().newTuple(5);
    tuple.set(0, new DataByteArray(colfam.getBytes()));
    tuple.set(1, new DataByteArray(colqual.getBytes()));
    tuple.set(2, new DataByteArray(colvis.getBytes()));
    tuple.set(3, new Long(ts));
    tuple.set(4, new DataByteArray(val.get()));
    return tuple;
}