Example usage for org.apache.hadoop.io Text find

List of usage examples for org.apache.hadoop.io Text find

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text find.

Prototype

public int find(String what) 

Source Link

Usage

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java

License:Open Source License

@Override
public boolean nextKeyValue() throws IOException, CharacterCodingException {
    if (!lineRR.nextKeyValue())
        return false;

    Text line = getCurrentValue();
    int tabOne = line.find("\t");

    int rid = Integer.parseInt(Text.decode(line.getBytes(), 0, tabOne));

    int tabTwo = line.find("\t", tabOne + 1);
    int posBeg = tabOne + 1;
    int posEnd = tabTwo - 1;

    int pos = Integer.parseInt(Text.decode(line.getBytes(), posBeg, posEnd - posBeg + 1));

    key.set(BAMRecordReader.getKey0(rid, pos));
    return true;//from   w ww.j  a va2 s  .  c o  m
}

From source file:io.aos.hdfs.StringTextComparisonTest.java

License:Apache License

@Test
public void text() {

    Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00");
    assertThat(t.getLength(), is(10));/*w  w w .  jav a  2 s  .com*/

    assertThat(t.find("\u0041"), is(0));
    assertThat(t.find("\u00DF"), is(1));
    assertThat(t.find("\u6771"), is(3));
    assertThat(t.find("\uD801\uDC00"), is(6));

    assertThat(t.charAt(0), is(0x0041));
    assertThat(t.charAt(1), is(0x00DF));
    assertThat(t.charAt(3), is(0x6771));
    assertThat(t.charAt(6), is(0x10400));
}

From source file:io.aos.hdfs.TextTest.java

License:Apache License

@Test
public void find() throws IOException {
    // vv TextTest-Find
    Text t = new Text("hadoop");
    assertThat("Find a substring", t.find("do"), is(2));
    assertThat("Finds first 'o'", t.find("o"), is(3));
    assertThat("Finds 'o' from position 4 or later", t.find("o", 4), is(4));
    assertThat("No match", t.find("pig"), is(-1));
    // ^^ TextTest-Find
}

From source file:io.aos.hdfs.TextTest.java

License:Apache License

@Test
public void withSupplementaryCharacters() throws IOException {

    String s = "\u0041\u00DF\u6771\uD801\uDC00";
    assertThat(s.length(), is(5));/* w  w  w.ja v a 2 s .c om*/
    assertThat(s.getBytes("UTF-8").length, is(10));

    assertThat(s.indexOf('\u0041'), is(0));
    assertThat(s.indexOf('\u00DF'), is(1));
    assertThat(s.indexOf('\u6771'), is(2));
    assertThat(s.indexOf('\uD801'), is(3));
    assertThat(s.indexOf('\uDC00'), is(4));

    assertThat(s.charAt(0), is('\u0041'));
    assertThat(s.charAt(1), is('\u00DF'));
    assertThat(s.charAt(2), is('\u6771'));
    assertThat(s.charAt(3), is('\uD801'));
    assertThat(s.charAt(4), is('\uDC00'));

    Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00");

    assertThat(serializeToString(t), is("0a41c39fe69db1f0909080"));

    assertThat(t.charAt(t.find("\u0041")), is(0x0041));
    assertThat(t.charAt(t.find("\u00DF")), is(0x00DF));
    assertThat(t.charAt(t.find("\u6771")), is(0x6771));
    assertThat(t.charAt(t.find("\uD801\uDC00")), is(0x10400));

}

From source file:it.uniroma1.hadoop.pagerank.job1.PageRankJob1Mapper.java

License:Open Source License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    /* Job#1 mapper will simply parse a line of the input graph creating a map with key-value(s) pairs.
     * Input format is the following (separator is TAB):
     * /*from w  ww  .  j  a  v a 2  s. c  om*/
     *     <nodeA>    <nodeB>
     * 
     * which denotes an edge going from <nodeA> to <nodeB>.
     * We would need to skip comment lines (denoted by the # characters at the beginning of the line).
     * We will also collect all the distinct nodes in our graph: this is needed to compute the initial 
     * pagerank value in Job #1 reducer and also in later jobs.
     */

    if (value.charAt(0) != '#') {

        int tabIndex = value.find("\t");
        String nodeA = Text.decode(value.getBytes(), 0, tabIndex);
        String nodeB = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1));
        context.write(new Text(nodeA), new Text(nodeB));

        // add the current source node to the node list so we can 
        // compute the total amount of nodes of our graph in Job#2
        PageRank.NODES.add(nodeA);
        // also add the target node to the same list: we may have a target node 
        // with no outlinks (so it will never be parsed as source)
        PageRank.NODES.add(nodeB);

    }

}

From source file:it.uniroma1.hadoop.pagerank.job2.PageRankJob2Mapper.java

License:Open Source License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    /* PageRank calculation algorithm (mapper)
     * Input file format (separator is TAB):
     * /*from ww  w. j  a va  2 s  .c  o m*/
     *     <title>    <page-rank>    <link1>,<link2>,<link3>,<link4>,... ,<linkN>
     * 
     * Output has 2 kind of records:
     * One record composed by the collection of links of each page:
     *     
     *     <title>   |<link1>,<link2>,<link3>,<link4>, ... , <linkN>
     *     
     * Another record composed by the linked page, the page rank of the source page 
     * and the total amount of out links of the source page:
     *  
     *     <link>    <page-rank>    <total-links>
     */

    int tIdx1 = value.find("\t");
    int tIdx2 = value.find("\t", tIdx1 + 1);

    // extract tokens from the current line
    String page = Text.decode(value.getBytes(), 0, tIdx1);
    String pageRank = Text.decode(value.getBytes(), tIdx1 + 1, tIdx2 - (tIdx1 + 1));
    String links = Text.decode(value.getBytes(), tIdx2 + 1, value.getLength() - (tIdx2 + 1));

    String[] allOtherPages = links.split(",");
    for (String otherPage : allOtherPages) {
        Text pageRankWithTotalLinks = new Text(pageRank + "\t" + allOtherPages.length);
        context.write(new Text(otherPage), pageRankWithTotalLinks);
    }

    // put the original links so the reducer is able to produce the correct output
    context.write(new Text(page), new Text(PageRank.LINKS_SEPARATOR + links));

}

From source file:it.uniroma1.hadoop.pagerank.job3.PageRankJob3Mapper.java

License:Open Source License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    /* Rank Ordering (mapper only)
     * Input file format (separator is TAB):
     * /*from  w  w w  .j  a  v  a2  s  . c o  m*/
     *     <title>    <page-rank>    <link1>,<link2>,<link3>,<link4>,... ,<linkN>
     * 
     * This is a simple job which does the ordering of our documents according to the computed pagerank.
     * We will map the pagerank (key) to its value (page) and Hadoop will do the sorting on keys for us.
     * There is no need to implement a reducer: the mapping and sorting is enough for our purpose.
     */

    int tIdx1 = value.find("\t");
    int tIdx2 = value.find("\t", tIdx1 + 1);

    // extract tokens from the current line
    String page = Text.decode(value.getBytes(), 0, tIdx1);
    float pageRank = Float.parseFloat(Text.decode(value.getBytes(), tIdx1 + 1, tIdx2 - (tIdx1 + 1)));

    context.write(new DoubleWritable(pageRank), new Text(page));

}

From source file:org.apache.accumulo.core.iterators.user.IndexedDocIterator.java

License:Apache License

public static Text parseDocID(Key key) {
    Text colq = key.getColumnQualifier();
    int firstZeroIndex = colq.find("\0");
    if (firstZeroIndex < 0) {
        throw new IllegalArgumentException("bad docid: " + key.toString());
    }/*from   www . ja  v a2  s  . co m*/
    int secondZeroIndex = colq.find("\0", firstZeroIndex + 1);
    if (secondZeroIndex < 0) {
        throw new IllegalArgumentException("bad docid: " + key.toString());
    }
    int thirdZeroIndex = colq.find("\0", secondZeroIndex + 1);
    if (thirdZeroIndex < 0) {
        throw new IllegalArgumentException("bad docid: " + key.toString());
    }
    Text docID = new Text();
    try {
        docID.set(colq.getBytes(), firstZeroIndex + 1, thirdZeroIndex - 1 - firstZeroIndex);
    } catch (ArrayIndexOutOfBoundsException e) {
        throw new IllegalArgumentException("bad indices for docid: " + key.toString() + " " + firstZeroIndex
                + " " + secondZeroIndex + " " + thirdZeroIndex);
    }
    return docID;
}

From source file:org.apache.accumulo.core.iterators.user.IndexedDocIterator.java

License:Apache License

@Override
protected Text getTerm(Key key) {
    if (indexColf.compareTo(key.getColumnFamily().getBytes(), 0, indexColf.getLength()) < 0) {
        // We're past the index column family, so return a term that will sort lexicographically last.
        // The last unicode character should suffice
        return new Text("\uFFFD");
    }/*from ww w .j ava 2s.co m*/
    Text colq = key.getColumnQualifier();
    int zeroIndex = colq.find("\0");
    Text term = new Text();
    term.set(colq.getBytes(), 0, zeroIndex);
    return term;
}

From source file:org.apache.accumulo.examples.wikisearch.iterator.FieldIndexIterator.java

License:Apache License

public boolean jump(Key jumpKey) throws IOException {
    if (log.isDebugEnabled()) {
        String pEndRow = "empty";
        if (parentEndRow != null) {
            pEndRow = parentEndRow.toString();
        }//from  w ww  .  ja v  a  2 s  .  c o  m
        log.debug("jump, current range: " + range + "  parentEndRow is: " + pEndRow);

    }

    if (parentEndRow != null && jumpKey.getRow().compareTo(parentEndRow) > 0) {
        // can't go there.
        if (log.isDebugEnabled()) {
            log.debug("jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow);
        }
        return false;
    }

    int comp;
    if (!this.hasTop()) {
        if (log.isDebugEnabled()) {
            log.debug("current row: " + this.currentRow);
        }

        /*
         * if I don't have a top, then I should be out of my range for my current row. Need to check parent range to see if I'm supposed to continue to next row
         * or not. Current row can be null because maybe I never found anything in this row.
         */

        if (parentEndRow != null) {
            // if jumpKey row is greater than parentEndRow, stop
            if (jumpKey.getRow().compareTo(parentEndRow) > 0) {
                if (log.isDebugEnabled()) {
                    log.debug("jumpKey row is greater than my parentEndRow, done");
                }
                return false;
            }

            // if my current row is null, I must have hit the end of the tablet
            if (currentRow == null) {
                if (log.isDebugEnabled()) {
                    log.debug("I have parentEndRow, but no current row, must have hit end of tablet, done");
                }
                return false;
            }

            // if my current row is greater than jump row stop, a seek will be
            // called to get me going again. If my row is equal, but i don't
            // have a topkey, i'm done
            if (currentRow.compareTo(jumpKey.getRow()) >= 0) {
                if (log.isDebugEnabled()) {
                    log.debug("I have parentEndRow, but topKey, and my currentRow is >= jumpRow, done");
                }
                return false;
            }

        } else { // we're allowed to go to the end of the tablet
            // if my current row is null, I must have hit the end of the tablet
            if (currentRow == null) {
                if (log.isDebugEnabled()) {
                    log.debug("no parentEndRow and current Row is null, must have hit end of tablet, done");
                }
                return false;
            }

            if (currentRow.compareTo(jumpKey.getRow()) >= 0) {
                // i'm past or equal to the jump point and have no top,
                // jumping's not going to help
                if (log.isDebugEnabled()) {
                    log.debug("no parentEndRow, no topKey, and currentRow is >= jumpRow, done");
                }
                return false;
            }
        }

        // ok, jumpKey is ahead of me I'll mark it and allow the normal
        // flow to jump there and see if I have top.
        if (log.isDebugEnabled()) {
            log.debug("no topKey, but jumpRow is ahead and I'm allowed to go to it, marking");
        }
        comp = -1;

    } else { // I have a topKey, I can do the normal comparisons
        if (log.isDebugEnabled()) {
            log.debug("have top, can do normal comparisons");
        }
        comp = this.topKey.getRow().compareTo(jumpKey.getRow());
    }

    // ------------------
    // compare rows
    if (comp > 0) { // my row is ahead of jump key
        if (canBeInNextRow()) {
            if (log.isDebugEnabled()) {
                log.debug("I'm ahead of jump row & it's ok.");
                log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow: "
                        + parentEndRow);
            }
            return true;
        } else {
            if (log.isDebugEnabled()) {
                log.debug("I'm ahead of jump row & can't be here, or at end of tablet.");
            }
            topKey = null;
            topValue = null;
            return false;
        }

    } else if (comp < 0) { // a row behind jump key, need to move forward
        if (log.isDebugEnabled()) {
            String myRow = "";
            if (hasTop()) {
                myRow = topKey.getRow().toString();
            } else if (currentRow != null) {
                myRow = currentRow.toString();
            }
            log.debug("My row " + myRow + " is less than jump row: " + jumpKey.getRow() + " seeking");
        }
        range = buildRange(jumpKey.getRow());
        // this.seek(range, EMPTY_COL_FAMS, false);

        boolean success = jumpSeek(range);
        if (log.isDebugEnabled() && success) {
            log.debug("uid forced jump, found topKey: " + topKey);
        }

        if (!this.hasTop()) {
            log.debug("seeked with new row and had no top");
            topKey = null;
            topValue = null;
            return false;
        } else if (parentEndRow != null && currentRow.compareTo(parentEndRow) > 0) {
            if (log.isDebugEnabled()) {
                log.debug("myRow: " + getTopKey().getRow() + " is past parentEndRow: " + parentEndRow);
            }
            topKey = null;
            topValue = null;
            return false;
        }
        if (log.isDebugEnabled()) {
            log.debug("jumped, valid top: " + getTopKey());
        }

        return true;

    } else { // rows are equal, check the uid!

        keyParser.parse(topKey);
        String myUid = keyParser.getUid();
        keyParser.parse(jumpKey);
        String jumpUid = keyParser.getUid();

        int ucomp = myUid.compareTo(jumpUid);
        if (log.isDebugEnabled()) {
            log.debug("topKeyUid: " + myUid + "  jumpUid: " + jumpUid + "  myUid.compareTo(jumpUid)->" + ucomp);
        }
        if (ucomp < 0) { // need to move up
            log.debug("my uid is less than jumpUid, topUid: " + myUid + "   jumpUid: " + jumpUid);

            Text cq = jumpKey.getColumnQualifier();
            int index = cq.find(NULL_BYTE);
            if (0 <= index) {
                cq.set(cq.getBytes(), index + 1, cq.getLength() - index - 1);
            } else {
                log.error("Expected a NULL separator in the column qualifier");
                this.topKey = null;
                this.topValue = null;
                return false;
            }

            // note my internal range stays the same, I just need to move forward
            Key startKey = new Key(topKey.getRow(), fName, new Text(fValue + NULL_BYTE + cq));
            Key endKey = new Key(topKey.getRow(), fName, new Text(fValue + ONE_BYTE));
            range = new Range(startKey, true, endKey, false);
            log.debug("Using range: " + range + " to seek");
            // source.seek(range, EMPTY_COL_FAMS, false);
            boolean success = jumpSeek(range);
            if (log.isDebugEnabled() && success) {
                log.debug("uid forced jump, found topKey: " + topKey);
            }

            return success;

        } else { // else do nothing
            log.debug("my uid is greater than jumpUid, topKey: " + topKey + "   jumpKey: " + jumpKey);
            log.debug("doing nothing");
        }
    }

    return hasTop();
}