Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.bark.hadoop.lab3.RedLinkMapper.java

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    //Possible single element nowiki tag makes xmlstreamparser to stop. Remove them first.
    String fixed = value.toString().replaceAll("<nowiki />|&lt;nowiki /&gt;", "");
    try {//from   w  w w . j a  v a2s  .com
        XMLStreamReader reader = XMLInputFactory.newInstance()
                .createXMLStreamReader(new ByteArrayInputStream(fixed.getBytes()));
        String title = "";
        String textData = "";
        String currentElement = "";
        while (reader.hasNext()) {
            int code = reader.next();
            switch (code) {
            case START_ELEMENT:
                currentElement = reader.getLocalName();
                break;
            case CHARACTERS:
                if (currentElement.equalsIgnoreCase("title")) {
                    title += reader.getText();
                } else if (currentElement.equalsIgnoreCase("text")) {
                    textData += reader.getText();
                }
                break;
            }
        }
        reader.close();
        //At this point we have the title and text data ready.
        title = title.trim().replaceAll(" ", "_");
        /**
         * Find type 1 links e.g. [[some text]] and type 2 links [[a|b]]
         */
        ArrayList<String> myLinks = new ArrayList<>();
        try {
            myLinks = findLinks(textData);
        } catch (Exception e) {
            Logger.getLogger(RedLinkMapper.class.getName()).log(Level.SEVERE, e.getMessage(), e);
        }
        /**
         * For every title that exists, write the title and "!"
         */
        context.write(new Text(title), new Text("!"));

        for (int i = 0; i < myLinks.size(); i++) {
            //Write (link,title) pairs (inlinks) (multiple writes are ok)
            String temp = myLinks.get(i).replaceAll(" ", "_").split("\\|")[0];
            if (!title.equals(temp)) {
                context.write(new Text(temp), new Text(title));
            }

        }
    } catch (XMLStreamException ex) {
        Logger.getLogger(RedLinkMapper.class.getName()).log(Level.SEVERE, ex.toString(), ex);
    }
}

From source file:com.bark.hadoop.lab3.RedLinkReducer.java

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    boolean isRedLink = true;
    //hashset to remove duplicates
    HashSet<String> myValues = new HashSet<>();
    for (Text t : values) {
        //if there exists a pair for page A with value ! ( ie. (A,!) ) page A exists and therefor the link is not a redlink
        if (t.toString().trim().equalsIgnoreCase("!")) {
            isRedLink = false;//from   w  w  w.  j  a v  a2 s  . c om
        }
        myValues.add(t.toString().trim());
    }
    //if the link is not identified as redlink, write it to ouput. else ignore.
    if (!isRedLink) {
        for (String t : myValues) {
            context.write(key, new Text(t));
        }
    }
}

From source file:com.bark.hadoop.lab3.SortMapper.java

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    double pageRank = 0;
    //intermediate files identify pageranks with _!0.0000.. pattern.
    Pattern pt = Pattern.compile("(_!\\d+.\\S+)");
    Matcher mt = pt.matcher(value.toString());
    if (mt.find()) {
        pageRank = Double.parseDouble(mt.group(1).substring(2));
    }/*  w  w  w .  jav a2  s .c om*/
    //ignore cases with pageranks below 5/N
    double minThreshold = 5d / (context.getConfiguration().getInt("N", 0));
    if (pageRank >= minThreshold) {
        context.write(new DoubleWritable(pageRank), new Text(value.toString().split("\t")[0]));
    }
}

From source file:com.basho.riak.hadoop.RiakRecordWriter.java

License:Apache License

@Override
public void write(Text key, V value) throws IOException, InterruptedException {
    try {/*from   ww w.j a va  2 s .  c  o m*/
        Location location = new Location(ns, key.toString());

        // Store object with default options
        StoreValue sv = new StoreValue.Builder(value).withLocation(location).build();
        StoreValue.Response svResponse = client.execute(sv);
    } catch (ExecutionException e) {
        throw new IOException(e);
    }
}

From source file:com.bizosys.hsearch.kv.impl.bytescooker.IndexField.java

License:Apache License

public byte[] index(Iterable<Text> values) throws IOException {

    byte[] finalData = null;
    boolean hasValue = false;
    String[] resultValue = new String[2];
    String line = null;//from w w  w.  ja v  a 2  s . c  o  m
    String currentF = null;

    try {

        for (Text text : values) {
            if (null == text)
                continue;
            Arrays.fill(resultValue, null);

            line = text.toString();

            int index = line.indexOf(KVIndexer.FIELD_SEPARATOR);
            if (index >= 0) {
                resultValue[0] = line.substring(0, index);
                if (index <= line.length() - 1)
                    resultValue[1] = line.substring(index + 1);
            }

            currentF = resultValue[0];
            int containerKey = Integer.parseInt(currentF);
            hasValue = true;
            add(containerKey, resultValue[1]);
        }
    } catch (NumberFormatException ex) {
        ex.printStackTrace();
        throw new IOException("Unable to parse number - [" + currentF + "] for input " + line
                + " with line sep :" + KVIndexer.FIELD_SEPARATOR + " because " + ex.getMessage());
    }

    if (hasValue) {
        finalData = getBytes();
    }
    return finalData;
}

From source file:com.bizosys.hsearch.kv.indexing.KVMapperFile.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    if (isSkipHeader) {
        isSkipHeader = false;//ww  w.j  av  a 2  s  .  com
        if (0 == key.get())
            return;
    }

    if (null == result) {
        ArrayList<String> resultL = new ArrayList<String>();
        LineReaderUtil.fastSplit(resultL, value.toString(), KVIndexer.FIELD_SEPARATOR);
        result = new String[resultL.size()];
    }

    Arrays.fill(result, null);

    LineReaderUtil.fastSplit(result, value.toString(), KVIndexer.FIELD_SEPARATOR);
    kBase.map(result, context);

}

From source file:com.bizosys.hsearch.kv.indexing.KVMapperHFile.java

License:Apache License

@Override
protected void map(Text key, ImmutableBytesWritable value, Context context) {

    try {/*  w  w  w . j a v  a 2  s  .  co  m*/

        String rowKey = key.toString();
        byte[] data = value.copyBytes();
        hKey.set(rowKey.getBytes());
        KeyValue kv = new KeyValue(hKey.get(), familyName, qualifier, data);
        context.write(hKey, kv);

    } catch (Exception e) {

        System.err.println(
                "Error in processing for row key : " + key.toString() + "\t and value size " + value.getLength()
                        + "\n Memory total:max:free(MB) " + Runtime.getRuntime().totalMemory() / 1024 * 1024
                        + " : " + Runtime.getRuntime().maxMemory() / 1024 * 1024 + " : "
                        + Runtime.getRuntime().freeMemory() / 1024 * 1024 + " : ");
    }

}

From source file:com.bizosys.hsearch.kv.indexing.KVMapperLocal.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    if (null == result) {
        ArrayList<String> resultL = new ArrayList<String>();
        LineReaderUtil.fastSplit(resultL, value.toString(), KVIndexer.FIELD_SEPARATOR);
        result = new String[resultL.size()];
    }/*from  w w w. j a  v a 2  s.  co m*/
    Arrays.fill(result, null);

    LineReaderUtil.fastSplit(result, value.toString(), KVIndexer.FIELD_SEPARATOR);

    kBase.map(result, context);
}

From source file:com.bizosys.hsearch.kv.indexing.KVMapperMapFile.java

License:Apache License

@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {

    if (isSkipHeader) {
        isSkipHeader = false;//from w  ww  . j a v  a 2  s.c  o m
        //if ( 0 == key.get()) return;
    }
    List<String> eachRow = new ArrayList<String>();
    LineReaderUtil.fastSplit(eachRow, value.toString(), LINE_SEPARATOR);

    for (String row : eachRow) {

        if (null == result) {
            ArrayList<String> resultL = new ArrayList<String>();
            LineReaderUtil.fastSplit(resultL, row, KVIndexer.FIELD_SEPARATOR);
            result = new String[resultL.size()];
        }

        Arrays.fill(result, null);

        LineReaderUtil.fastSplit(result, row, KVIndexer.FIELD_SEPARATOR);
        kBase.map(result, context);
    }

}

From source file:com.bizosys.hsearch.kv.indexing.KVReducerBase.java

License:Apache License

public byte[] cookBytes(StringBuilder key, Iterable<Text> values, byte[] existingData, Field fld,
        char dataTypeChar) throws IOException {

    byte[] finalData = null;
    String fieldName = null;/*  w w  w  . ja  v a2  s  .  c o  m*/
    boolean compressed = false;
    boolean repeatable = false;
    boolean analyzed = false;

    if (null != fld) {
        fieldName = fld.name;
        compressed = fld.isCompressed;
        repeatable = fld.isRepeatable;
        analyzed = fld.isAnalyzed;
    }

    switch (dataTypeChar) {

    case 't':
        finalData = IndexFieldString.cook(values, existingData, repeatable, compressed);
        break;

    case 'e':

        /**
         * Skip multi phrases which are only sighted once.
         */
        int keyLen = key.length();
        boolean skipSingle = false;
        if (keyLen > 1) {
            skipSingle = (key.charAt(keyLen - 1) == '*');
            if (skipSingle)
                key = key.delete(keyLen - 2, keyLen);
        }

        finalData = (repeatable)
                ? indexTextBitset(skipSingle, existingData, values, analyzed, fieldName, compressed)
                : indexTextSet(skipSingle, existingData, values, analyzed, fieldName);
        break;

    case 'i':
        finalData = IndexFieldInteger.cook(values, existingData, repeatable, compressed);
        break;

    case 'f':
        finalData = IndexFieldFloat.cook(values, existingData, repeatable, compressed);
        break;

    case 'd':
        finalData = IndexFieldDouble.cook(values, existingData, repeatable, compressed);
        break;

    case 'l':
        finalData = IndexFieldLong.cook(values, existingData, repeatable, compressed);
        break;

    case 's':
        finalData = IndexFieldShort.cook(values, existingData, repeatable, compressed);
        break;

    case 'b':
        finalData = IndexFieldBoolean.cook(values, existingData, repeatable, compressed);
        break;

    case 'c':
        finalData = IndexFieldByte.cook(values, existingData, repeatable, compressed);
        break;

    default: {
        List<String> mergeKeys = new ArrayList<String>();
        for (Text mergeKey : values) {
            mergeKeys.add(mergeKey.toString());
        }
        finalData = SortedBytesString.getInstance().toBytes(mergeKeys);
        break;
    }
    }
    return finalData;
}