Example usage for org.apache.hadoop.io Text find

List of usage examples for org.apache.hadoop.io Text find

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text find.

Prototype

public int find(String what) 

Source Link

Usage

From source file:com.lovelysystems.hive.udf.UnescapeXMLUDF.java

License:Apache License

public Text evaluate(final Text s) {
    if (s == null) {
        return null;
    } else if (s.find("&") == -1) {
        res.set(s);//from   www .  ja v  a  2s  .c o m
    } else {
        res.set(s.toString());
    }
    return res;
}

From source file:com.pagerankcalculator.calculation.PageRankCalculationMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    int tabIdx1 = value.find("\t");
    int tabIdx2 = value.find("\t", tabIdx1 + 1);

    String userID = Text.decode(value.getBytes(), 0, tabIdx1);
    String pageRank = Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1));
    String CSVFollowingIDs = Text.decode(value.getBytes(), tabIdx2 + 1, value.getLength() - (tabIdx2 + 1));

    //        System.out.print(userID);
    //        System.out.print("\t");
    //        System.out.print(pageRank);
    //        System.out.print("\t");
    //        System.out.println(CSVFollowingIDs);

    String[] followingIDs = CSVFollowingIDs.split(TwitterPageRank.FOLLOWING_LIST_DELIMETER);
    Integer totalFollowingIDs = followingIDs.length;
    for (String followingID : followingIDs) {
        String pageRankWithTotalFollowing = pageRank + "\t" + totalFollowingIDs.toString();

        context.write(new Text(followingID), new Text(pageRankWithTotalFollowing));
    }/*from www.j a  v  a 2s. c  o  m*/

    context.write(new Text(userID), new Text(TwitterPageRank.FOLLOWING_LIST_TAG + CSVFollowingIDs));
}

From source file:com.pagerankcalculator.graphparsing.GraphParsingMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    int tabIndex = value.find("\t");

    userID = Text.decode(value.getBytes(), 0, tabIndex);
    followerID = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1));
    context.write(new Text(followerID), new Text(userID));
}

From source file:com.pagerankcalculator.ordering.PageRankSortingMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    int tabIdx1 = value.find("\t");
    int tabIdx2 = value.find("\t", tabIdx1 + 1);

    String username = Text.decode(value.getBytes(), 0, tabIdx1);

    Double pageRank = new Double(Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1)));

    context.write(new DoubleWritable(pageRank), new Text(username));
}

From source file:cosmos.impl.IndexToMultimapRecord.java

License:Apache License

@Override
public MultimapRecord apply(Entry<Key, Value> input) {
    Key k = input.getKey();/*from w  w w. ja v a  2 s. c om*/

    Text colqual = k.getColumnQualifier();

    int index = colqual.find(Defaults.NULL_BYTE_STR);
    if (-1 == index) {
        throw new RuntimeException("Was provided unexpected Key: " + k);
    }

    int start = index + 1;
    try {
        String docId = Text.decode(colqual.getBytes(), start, colqual.getLength() - start);

        return sorts.contents(id, docId);

    } catch (TableNotFoundException e) {
        throw new RuntimeException(e);
    } catch (UnexpectedStateException e) {
        throw new RuntimeException(e);
    } catch (CharacterCodingException e) {
        throw new RuntimeException(e);
    }
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void text() {

        Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00");
        assertThat(t.getLength(), is(10));

        assertThat(t.find("\u0041"), is(0));
        assertThat(t.find("\u00DF"), is(1));
        assertThat(t.find("\u6771"), is(3));
        assertThat(t.find("\uD801\uDC00"), is(6));

        assertThat(t.charAt(0), is(0x0041));
        assertThat(t.charAt(1), is(0x00DF));
        assertThat(t.charAt(3), is(0x6771));
        assertThat(t.charAt(6), is(0x10400));
    }/*w  w w.  jav  a 2  s .  c  o  m*/

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void find() throws IOException {
        // vv TextTest-Find
        Text t = new Text("hadoop");
        assertThat("Find a substring", t.find("do"), is(2));
        assertThat("Finds first 'o'", t.find("o"), is(3));
        assertThat("Finds 'o' from position 4 or later", t.find("o", 4), is(4));
        assertThat("No match", t.find("pig"), is(-1));
        // ^^ TextTest-Find
    }//from w ww  .j a v a 2s .c  o m

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void withSupplementaryCharacters() throws IOException {

        String s = "\u0041\u00DF\u6771\uD801\uDC00";
        assertThat(s.length(), is(5));/*from w ww  . ja va2s .  c o  m*/
        assertThat(s.getBytes("UTF-8").length, is(10));

        assertThat(s.indexOf('\u0041'), is(0));
        assertThat(s.indexOf('\u00DF'), is(1));
        assertThat(s.indexOf('\u6771'), is(2));
        assertThat(s.indexOf('\uD801'), is(3));
        assertThat(s.indexOf('\uDC00'), is(4));

        assertThat(s.charAt(0), is('\u0041'));
        assertThat(s.charAt(1), is('\u00DF'));
        assertThat(s.charAt(2), is('\u6771'));
        assertThat(s.charAt(3), is('\uD801'));
        assertThat(s.charAt(4), is('\uDC00'));

        Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00");

        assertThat(serializeToString(t), is("0a41c39fe69db1f0909080"));

        assertThat(t.charAt(t.find("\u0041")), is(0x0041));
        assertThat(t.charAt(t.find("\u00DF")), is(0x00DF));
        assertThat(t.charAt(t.find("\u6771")), is(0x6771));
        assertThat(t.charAt(t.find("\uD801\uDC00")), is(0x10400));

    }

From source file:edu.uci.ics.fuzzyjoin.hadoop.recordpairs.MapBasicJoin.java

License:Apache License

public void map(Object unused, Text record, OutputCollector<IntTripleWritable, Text> output, Reporter reporter)
        throws IOException {
    String recordString = record.toString();
    if (record.find("" + FuzzyJoinConfig.RECORD_SEPARATOR) >= 0) {
        /*/*from  w  ww  .  j  a  va2s .  c o m*/
         * VALUE1: RID:Record
         * 
         * KEY2: 0/1 (0: Relation R, 1: Relation S), RID, 0
         * 
         * VALUE2: Record
         */
        String valueSplit[] = recordString.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX);

        int relation = 0;
        if (reporter.getInputSplit().toString().contains(suffixSecond)) {
            relation = 1;
        }

        outputKey.set(relation, Integer.valueOf(valueSplit[FuzzyJoinConfig.RECORD_KEY]), 0);
        outputValue.set(record);
        output.collect(outputKey, outputValue);
    } else {
        /*
         * VALUE1: "RID-R RID-S Similarity"
         * 
         * KEY2: 0/1 (0: Relation R, 1: Relation S), RID, 1
         * 
         * VALUE2: "RIDOther Similarity"
         */
        String valueSplit[] = recordString.split(FuzzyJoinConfig.RIDPAIRS_SEPARATOR_REGEX);

        outputKey.set(0, Integer.parseInt(valueSplit[0]), 1);
        outputValue.set(valueSplit[1] + FuzzyJoinConfig.RIDPAIRS_SEPARATOR + valueSplit[2]);
        output.collect(outputKey, outputValue);

        outputKey.set(1, Integer.parseInt(valueSplit[1]), 1);
        outputValue.set(valueSplit[0] + FuzzyJoinConfig.RIDPAIRS_SEPARATOR + valueSplit[2]);
        output.collect(outputKey, outputValue);
    }
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.recordpairs.MapBasicSelfJoin.java

License:Apache License

public void map(Object unused, Text inputValue, OutputCollector<IntPairWritable, Text> output,
        Reporter reporter) throws IOException {
    String recordString = inputValue.toString();
    if (inputValue.find("" + FuzzyJoinConfig.RECORD_SEPARATOR) >= 0) {
        /*/*from w ww.  j  a va  2 s .c om*/
         * VALUE1: RID:Record
         * 
         * KEY2: RID, 0
         * 
         * VALUE2: Record
         */
        String valueSplit[] = recordString.split(FuzzyJoinConfig.RECORD_SEPARATOR_REGEX);
        outputKey.set(Integer.valueOf(valueSplit[FuzzyJoinConfig.RECORD_KEY]), 0);
        outputValue.set(inputValue);
        output.collect(outputKey, outputValue);
    } else {
        /*
         * VALUE1: "RID1 RID2 Similarity"
         * 
         * KEY2: RID1, 1 and RID2, 1
         * 
         * VALUE2: "RID2 Similarity" and "RID1 Similarity"
         */
        String valueSplit[] = recordString.split(FuzzyJoinConfig.RIDPAIRS_SEPARATOR_REGEX);

        outputKey.set(Integer.parseInt(valueSplit[0]), 1);
        outputValue.set(valueSplit[1] + FuzzyJoinConfig.RIDPAIRS_SEPARATOR + valueSplit[2]);
        output.collect(outputKey, outputValue);

        outputKey.set(Integer.parseInt(valueSplit[1]), 1);
        outputValue.set(valueSplit[0] + FuzzyJoinConfig.RIDPAIRS_SEPARATOR + valueSplit[2]);
        output.collect(outputKey, outputValue);
    }
}