Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerReduce.java

License:Open Source License

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    StringBuilder sb = new StringBuilder();

    boolean firstTime = true;

    while (values.hasNext()) {

        String newDocument = values.next().toString();

        if (newDocument != null && newDocument.trim().length() > 0 && !sb.toString().contains(newDocument)) {

            if (!firstTime) {
                sb.append(",");
            }//  w  ww.j ava 2  s.  com

            sb.append(newDocument);

            firstTime = false;
        }
    }
    output.collect(key, new Text(sb.toString()));
}

From source file:brickhouse.udf.dcache.DistributedMapUDF.java

License:Apache License

private void addValues(HashMap<Object, Object> map, String mapFilename) throws IOException, SerDeException {
    if (!mapFilename.endsWith("crc")) {
        File mapFile = new File(mapFilename);
        if (mapFile.isDirectory()) {
            String[] subFiles = mapFile.list();
            for (String subFile : subFiles) {
                LOG.info("Checking recursively " + subFile);
                addValues(map, mapFilename + "/" + subFile);
            }/*from w  w  w.  j  a v  a2s  . co  m*/
        } else {
            BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(mapFile)));

            SerDe lazy = getLineSerde();
            StructObjectInspector lineInsp = (StructObjectInspector) lazy.getObjectInspector();
            StructField keyRef = lineInsp.getStructFieldRef("key");
            StructField valueRef = lineInsp.getStructFieldRef("value");

            String line;
            while ((line = reader.readLine()) != null) {
                Writable lineText = new Text(line);
                Object lineObj = lazy.deserialize(lineText);
                List<Object> objList = lineInsp.getStructFieldsDataAsList(lineObj);
                Object key = ((PrimitiveObjectInspector) keyRef.getFieldObjectInspector())
                        .getPrimitiveJavaObject(objList.get(0));
                Object val = ((PrimitiveObjectInspector) valueRef.getFieldObjectInspector())
                        .getPrimitiveJavaObject(objList.get(1));
                map.put(key, val);
            }
        }
    } else {
        LOG.info(" Ignoring CRC file " + mapFilename);
    }
}

From source file:brickhouse.udf.sketch.Md5.java

License:Apache License

public Text evaluate(final Text s) {
    if (s == null) {
        return null;
    }/* www .j a v  a 2s .  c  o m*/
    try {
        MessageDigest md = MessageDigest.getInstance("MD5");
        md.update(s.toString().getBytes());
        byte[] md5hash = md.digest();
        StringBuilder builder = new StringBuilder();
        for (byte b : md5hash) {
            builder.append(Integer.toString((b & 0xff) + 0x100, 16).substring(1));
        }
        return new Text(builder.toString());
    } catch (NoSuchAlgorithmException nsae) {
        System.out.println("Cannot find digest algorithm");
        System.exit(1);
    }
    return null;
}

From source file:BU.MET.CS755.SpeciesGraphBuilderMapper.java

public void map(Text key, Text value, OutputCollector output, Reporter reporter) throws IOException {
    String title = null;/* w ww.j a va2 s. co  m*/
    String inputString;
    ArrayList<String> outlinks = null;

    //Get the DOM Builder Factory
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    try {
        //Get the DOM Builder
        DocumentBuilder builder = factory.newDocumentBuilder();

        //Load and Parse the XML document
        //document contains the complete XML as a Tree.
        inputString = key.toString();
        InputSource is = new InputSource(new StringReader(key.toString()));
        Document document = builder.parse(is);

        // Look for taxonavigation marker.
        if ((inputString.indexOf("== Taxonavigation ==") == -1)
                && (inputString.indexOf("==Taxonavigation==") == -1)) {
            return;
        }

        // Get the title node
        NodeList nodeList = document.getDocumentElement().getChildNodes();
        NodeList theTitle = document.getElementsByTagName("title");

        // Parse the species name from the title node.
        for (int i = 0; i < theTitle.getLength(); i++) {
            Node theNode = theTitle.item(i);
            Node nodeVal = theNode.getFirstChild();
            title = nodeVal.getNodeValue();
            title = title.replace(":", "_");
        }

        // Get the sub-species list from <text>
        NodeList theText = document.getElementsByTagName("text");

        for (int i = 0; i < theText.getLength(); i++) {
            Node theNode = theText.item(i);
            Node nodeVal = theNode.getFirstChild();

            if (nodeVal != null) {
                outlinks = GetOutlinks(nodeVal.getNodeValue());
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    if (title != null && title.length() > 0) {
        reporter.setStatus(title);
    } else {
        return;
    }

    StringBuilder builder = new StringBuilder();

    if (outlinks != null) {
        for (String link : outlinks) {
            link = link.replace(" ", "_");
            link = link.replace("\n", "");
            builder.append(" ");
            builder.append(link);
        }
    }

    // remove any newlines
    if (builder.toString().contains("\n")) {
        builder.toString().replace("\n", "");
    }

    output.collect(new Text(title), new Text(builder.toString()));
}

From source file:BU.MET.CS755.SpeciesGraphBuilderReducer.java

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    reporter.setStatus(key.toString());/* w w w  . j  a v  a  2s.  c o m*/
    String toWrite = "";
    int count = 0;

    while (values.hasNext()) {
        String page = ((Text) values.next()).toString();
        page.replaceAll(" ", "_");
        toWrite += " " + page;
        count += 1;
    }

    IntWritable i = new IntWritable(count);
    String num = (i).toString();
    toWrite = num + ":" + toWrite;

    output.collect(key, new Text(toWrite));
}

From source file:BU.MET.CS755.SpeciesIterMapper2.java

public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter)
        throws IOException {
    // get the current page
    String data = ((Text) value).toString();
    int index = data.indexOf(":");

    if (index == -1) {
        String[] splits = value.toString().split("\t");

        if (splits.length == 0) {
            splits = value.toString().split(" ");

            if (splits.length == 0) {
                return;
            }/*from w w w. jav a2s  .  c o m*/
        }

        String pagetitle = splits[0].trim();
        String pagerank = splits[splits.length - 1].trim();

        output.collect(new Text(pagetitle), new Text("oldrank=" + pagerank));
        return;
    }

    // split into title and PR (tab or variable number of blank spaces)
    String toParse = data.substring(0, index).trim();
    String[] splits = toParse.split("\t");

    if (splits.length == 0) {
        splits = toParse.split(" ");

        if (splits.length == 0) {
            return;
        }
    }

    String pagetitle = splits[0].trim();
    String pagerank = splits[splits.length - 1].trim();

    // parse current score
    double currScore = 0.0;
    try {
        currScore = Double.parseDouble(pagerank);
    } catch (Exception e) {
        currScore = 1.0;
    }

    // get number of outlinks
    data = data.substring(index + 1);
    String[] pages = data.split(" ");
    int numoutlinks = 0;

    if (pages.length == 0) {
        numoutlinks = 1;
    } else {
        for (String page : pages) {
            if (page.length() > 0) {
                numoutlinks = numoutlinks + 1;
            }
        }
    }

    // collect each outlink, with the dampened PR of its inlink, and its inlink
    Text toEmit = new Text((new Double(.85 * currScore / numoutlinks)).toString());

    for (String page : pages) {
        if (page.length() > 0) {
            output.collect(new Text(page), toEmit);

            //output.collect(new Text(page), new  Text(" " + pagetitle));
        }
    }

    // collect the inlink with its damping factor, and all outlinks
    double selfRank = (1 - 0.85) / totalLinks;
    output.collect(new Text(pagetitle), new Text(Double.toString(selfRank)));

    output.collect(new Text(pagetitle), new Text(" " + data));

    // Output the rank of the current page prepended by the string "oldrank="
    // for the reducer to compare the newly calculated page rank against.
    output.collect(new Text(pagetitle), new Text("oldrank=" + Double.toString(currScore)));
}

From source file:BU.MET.CS755.SpeciesIterReducer2.java

public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter)
        throws IOException {
    double score = 0;
    String outLinks = "";
    double oldScore = 0;

    // Counting links
    reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS, 1L);

    if (iterationNumber == 1) {
        return;//w ww .  ja v a2  s .  c  o  m
    }

    while (values.hasNext()) {
        String curr = ((Text) values.next()).toString();

        int colon = curr.indexOf(":");
        int space = curr.indexOf(" ");
        int oldrank = curr.indexOf("oldrank");

        if ((colon > -1)) {
            String presScore = curr.substring(0, colon);
            try {
                score += Double.parseDouble(presScore);
                oldScore = score;
                outLinks = curr.substring(colon + 1);
                continue;
            } catch (Exception e) {
            }
        }

        if (space > -1) {
            outLinks = curr;
        } else if (oldrank > -1) {
            oldScore = new Double(curr.substring(oldrank + 8));
        } else {
            score += Double.parseDouble(curr);
        }
    }

    String toEmit;

    if (outLinks.length() > 0) {
        toEmit = (new Double(score)).toString() + ":" + outLinks;
    } else {
        toEmit = (new Double(score)).toString();
    }

    // Output the new page rank
    output.collect(key, new Text(toEmit));

    double delta = oldScore - score;

    // Check how much the new page rank has changed. If the change is less
    // than two decimal places, treat it as a converged value. If not,
    // we need to re-calculate the rank with one more iteration; inform the
    // driver about that by incrementing the iterations needed counter.
    if ((delta > 0.009) || (delta < -0.009)) {
        Counter myCounter2 = reporter
                .getCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED);

        if (myCounter2 != null) {
            reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED, 1L);
        }
    }
}

From source file:BU.MET.CS755.SpeciesViewerMapper.java

public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter)
        throws IOException {
    // get the current page
    String data = ((Text) value).toString();
    int index = data.indexOf(":");

    // Can happen if no outlinks
    if (index == -1) {
        index = data.length();//  w  w w.  java2s . c  o m
    }

    // split into title and PR (tab or variable number of blank spaces)
    String toParse = data.substring(0, index).trim();
    String[] splits = toParse.split("\t");

    if (splits.length == 0) {
        splits = toParse.split(" ");

        if (splits.length == 0) {
            return;
        }
    }

    String pagetitle = splits[0].trim();
    String pagerank = splits[splits.length - 1].trim();

    // parse score
    double currScore = 0.0;
    try {
        currScore = Double.parseDouble(pagerank);
    } catch (Exception e) {
        currScore = 0.0;
    }

    // collect
    //output.collect(new FloatWritable((float) - currScore), key);
    output.collect(new FloatWritable((float) -currScore * 1000), new Text(pagetitle));
}

From source file:car.Classification.java

License:Open Source License

public static Text getClassPercentages(String path, String content) {
    int[] classes = new int[4];
    int total = 0;

    StringTokenizer lineTok = new StringTokenizer(content);
    String record = "";
    while (lineTok.hasMoreTokens()) {
        record = lineTok.nextToken();/* w w  w  .  j a va 2  s . com*/
        // Iterates over the String content, incrementing the corresponding class counters.
        classes[getClassIndex(record.substring(record.lastIndexOf(',') + 1))] += 1;
    }
    total = classes[0] + classes[1] + classes[2] + classes[3];
    return new Text(path + "-class=" + CLASS_UNACC + ":" + classes[0] * 100 / total + "%, class=" + CLASS_ACC
            + ":" + classes[1] * 100 / total + "%, class=" + CLASS_GOOD + ":" + classes[2] * 100 / total
            + "%, class=" + CLASS_VGOOD + ":" + classes[3] * 100 / total + "%");
}

From source file:cascading.hive.ORCFile.java

License:Apache License

private void tuple2Struct(Tuple tuple, OrcStruct struct) {
    Object value = null;/*from ww w. j  a v a 2 s . c o m*/
    for (int i = 0; i < types.length; i++) {
        switch (typeMapping.get(types[i].toLowerCase())) {
        case INT:
            value = tuple.getObject(i) == null ? null : new IntWritable(tuple.getInteger(i));
            break;
        case BOOLEAN:
            value = tuple.getObject(i) == null ? null : new BooleanWritable(tuple.getBoolean(i));
            break;
        case TINYINT:
            value = tuple.getObject(i) == null ? null : new ByteWritable(Byte.valueOf(tuple.getString(i)));
            break;
        case SMALLINT:
            value = tuple.getObject(i) == null ? null : new ShortWritable(tuple.getShort(i));
            break;
        case BIGINT:
            value = tuple.getObject(i) == null ? null : new LongWritable(tuple.getLong(i));
            break;
        case FLOAT:
            value = tuple.getObject(i) == null ? null : new FloatWritable(tuple.getFloat(i));
            break;
        case DOUBLE:
            value = tuple.getObject(i) == null ? null : new DoubleWritable(tuple.getDouble(i));
            break;
        case BIGDECIMAL:
            value = tuple.getObject(i) == null ? null
                    : new HiveDecimalWritable(HiveDecimal.create((new BigDecimal(tuple.getString(i)))));
            break;
        case STRING:
        default:
            value = tuple.getObject(i) == null ? null : new Text(tuple.getString(i));
        }
        struct.setFieldValue(i, value);
    }
}