Example usage for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8)

Source Link

Document

Construct from a byte array.

Usage

From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerReduce.java

License:Open Source License

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    StringBuilder sb = new StringBuilder();

    boolean firstTime = true;

    while (values.hasNext()) {

        String newDocument = values.next().toString();

        if (newDocument != null && newDocument.trim().length() > 0 && !sb.toString().contains(newDocument)) {

            if (!firstTime) {
                sb.append(",");
            }//  w  ww.j ava 2  s.  com

            sb.append(newDocument);

            firstTime = false;
        }
    }
    output.collect(key, new Text(sb.toString()));
}

From source file:brickhouse.udf.dcache.DistributedMapUDF.java

License:Apache License

private void addValues(HashMap<Object, Object> map, String mapFilename) throws IOException, SerDeException {
    if (!mapFilename.endsWith("crc")) {
        File mapFile = new File(mapFilename);
        if (mapFile.isDirectory()) {
            String[] subFiles = mapFile.list();
            for (String subFile : subFiles) {
                LOG.info("Checking recursively " + subFile);
                addValues(map, mapFilename + "/" + subFile);
            }/*from w  w  w.  j  a v  a2s  . co  m*/
        } else {
            BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(mapFile)));

            SerDe lazy = getLineSerde();
            StructObjectInspector lineInsp = (StructObjectInspector) lazy.getObjectInspector();
            StructField keyRef = lineInsp.getStructFieldRef("key");
            StructField valueRef = lineInsp.getStructFieldRef("value");

            String line;
            while ((line = reader.readLine()) != null) {
                Writable lineText = new Text(line);
                Object lineObj = lazy.deserialize(lineText);
                List<Object> objList = lineInsp.getStructFieldsDataAsList(lineObj);
                Object key = ((PrimitiveObjectInspector) keyRef.getFieldObjectInspector())
                        .getPrimitiveJavaObject(objList.get(0));
                Object val = ((PrimitiveObjectInspector) valueRef.getFieldObjectInspector())
                        .getPrimitiveJavaObject(objList.get(1));
                map.put(key, val);
            }
        }
    } else {
        LOG.info(" Ignoring CRC file " + mapFilename);
    }
}

From source file:brickhouse.udf.sketch.Md5.java

License:Apache License

public Text evaluate(final Text s) {
    if (s == null) {
        return null;
    }/* www .j a v  a 2s .  c  o m*/
    try {
        MessageDigest md = MessageDigest.getInstance("MD5");
        md.update(s.toString().getBytes());
        byte[] md5hash = md.digest();
        StringBuilder builder = new StringBuilder();
        for (byte b : md5hash) {
            builder.append(Integer.toString((b & 0xff) + 0x100, 16).substring(1));
        }
        return new Text(builder.toString());
    } catch (NoSuchAlgorithmException nsae) {
        System.out.println("Cannot find digest algorithm");
        System.exit(1);
    }
    return null;
}

From source file:BU.MET.CS755.SpeciesGraphBuilderMapper.java

public void map(Text key, Text value, OutputCollector output, Reporter reporter) throws IOException {
    String title = null;/* w ww.j a va2 s. co  m*/
    String inputString;
    ArrayList<String> outlinks = null;

    //Get the DOM Builder Factory
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    try {
        //Get the DOM Builder
        DocumentBuilder builder = factory.newDocumentBuilder();

        //Load and Parse the XML document
        //document contains the complete XML as a Tree.
        inputString = key.toString();
        InputSource is = new InputSource(new StringReader(key.toString()));
        Document document = builder.parse(is);

        // Look for taxonavigation marker.
        if ((inputString.indexOf("== Taxonavigation ==") == -1)
                && (inputString.indexOf("==Taxonavigation==") == -1)) {
            return;
        }

        // Get the title node
        NodeList nodeList = document.getDocumentElement().getChildNodes();
        NodeList theTitle = document.getElementsByTagName("title");

        // Parse the species name from the title node.
        for (int i = 0; i < theTitle.getLength(); i++) {
            Node theNode = theTitle.item(i);
            Node nodeVal = theNode.getFirstChild();
            title = nodeVal.getNodeValue();
            title = title.replace(":", "_");
        }

        // Get the sub-species list from <text>
        NodeList theText = document.getElementsByTagName("text");

        for (int i = 0; i < theText.getLength(); i++) {
            Node theNode = theText.item(i);
            Node nodeVal = theNode.getFirstChild();

            if (nodeVal != null) {
                outlinks = GetOutlinks(nodeVal.getNodeValue());
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    if (title != null && title.length() > 0) {
        reporter.setStatus(title);
    } else {
        return;
    }

    StringBuilder builder = new StringBuilder();

    if (outlinks != null) {
        for (String link : outlinks) {
            link = link.replace(" ", "_");
            link = link.replace("\n", "");
            builder.append(" ");
            builder.append(link);
        }
    }

    // remove any newlines
    if (builder.toString().contains("\n")) {
        builder.toString().replace("\n", "");
    }

    output.collect(new Text(title), new Text(builder.toString()));
}

From source file:BU.MET.CS755.SpeciesGraphBuilderReducer.java

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    reporter.setStatus(key.toString());/* w w w  . j  a v  a  2s.  c o m*/
    String toWrite = "";
    int count = 0;

    while (values.hasNext()) {
        String page = ((Text) values.next()).toString();
        page.replaceAll(" ", "_");
        toWrite += " " + page;
        count += 1;
    }

    IntWritable i = new IntWritable(count);
    String num = (i).toString();
    toWrite = num + ":" + toWrite;

    output.collect(key, new Text(toWrite));
}

From source file:BU.MET.CS755.SpeciesIterMapper2.java

public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter)
        throws IOException {
    // get the current page
    String data = ((Text) value).toString();
    int index = data.indexOf(":");

    if (index == -1) {
        String[] splits = value.toString().split("\t");

        if (splits.length == 0) {
            splits = value.toString().split(" ");

            if (splits.length == 0) {
                return;
            }/*from w w w. jav a2s  .  c o m*/
        }

        String pagetitle = splits[0].trim();
        String pagerank = splits[splits.length - 1].trim();

        output.collect(new Text(pagetitle), new Text("oldrank=" + pagerank));
        return;
    }

    // split into title and PR (tab or variable number of blank spaces)
    String toParse = data.substring(0, index).trim();
    String[] splits = toParse.split("\t");

    if (splits.length == 0) {
        splits = toParse.split(" ");

        if (splits.length == 0) {
            return;
        }
    }

    String pagetitle = splits[0].trim();
    String pagerank = splits[splits.length - 1].trim();

    // parse current score
    double currScore = 0.0;
    try {
        currScore = Double.parseDouble(pagerank);
    } catch (Exception e) {
        currScore = 1.0;
    }

    // get number of outlinks
    data = data.substring(index + 1);
    String[] pages = data.split(" ");
    int numoutlinks = 0;

    if (pages.length == 0) {
        numoutlinks = 1;
    } else {
        for (String page : pages) {
            if (page.length() > 0) {
                numoutlinks = numoutlinks + 1;
            }
        }
    }

    // collect each outlink, with the dampened PR of its inlink, and its inlink
    Text toEmit = new Text((new Double(.85 * currScore / numoutlinks)).toString());

    for (String page : pages) {
        if (page.length() > 0) {
            output.collect(new Text(page), toEmit);

            //output.collect(new Text(page), new  Text(" " + pagetitle));
        }
    }

    // collect the inlink with its damping factor, and all outlinks
    double selfRank = (1 - 0.85) / totalLinks;
    output.collect(new Text(pagetitle), new Text(Double.toString(selfRank)));

    output.collect(new Text(pagetitle), new Text(" " + data));

    // Output the rank of the current page prepended by the string "oldrank="
    // for the reducer to compare the newly calculated page rank against.
    output.collect(new Text(pagetitle), new Text("oldrank=" + Double.toString(currScore)));
}

From source file:BU.MET.CS755.SpeciesIterReducer2.java

public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter)
        throws IOException {
    double score = 0;
    String outLinks = "";
    double oldScore = 0;

    // Counting links
    reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS, 1L);

    if (iterationNumber == 1) {
        return;//w ww .  ja v a2  s .  c  o  m
    }

    while (values.hasNext()) {
        String curr = ((Text) values.next()).toString();

        int colon = curr.indexOf(":");
        int space = curr.indexOf(" ");
        int oldrank = curr.indexOf("oldrank");

        if ((colon > -1)) {
            String presScore = curr.substring(0, colon);
            try {
                score += Double.parseDouble(presScore);
                oldScore = score;
                outLinks = curr.substring(colon + 1);
                continue;
            } catch (Exception e) {
            }
        }

        if (space > -1) {
            outLinks = curr;
        } else if (oldrank > -1) {
            oldScore = new Double(curr.substring(oldrank + 8));
        } else {
            score += Double.parseDouble(curr);
        }
    }

    String toEmit;

    if (outLinks.length() > 0) {
        toEmit = (new Double(score)).toString() + ":" + outLinks;
    } else {
        toEmit = (new Double(score)).toString();
    }

    // Output the new page rank
    output.collect(key, new Text(toEmit));

    double delta = oldScore - score;

    // Check how much the new page rank has changed. If the change is less
    // than two decimal places, treat it as a converged value. If not,
    // we need to re-calculate the rank with one more iteration; inform the
    // driver about that by incrementing the iterations needed counter.
    if ((delta > 0.009) || (delta < -0.009)) {
        Counter myCounter2 = reporter
                .getCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED);

        if (myCounter2 != null) {
            reporter.incrCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.ITERATIONS_NEEDED, 1L);
        }
    }
}

From source file:BU.MET.CS755.SpeciesViewerMapper.java

public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter)
        throws IOException {
    // get the current page
    String data = ((Text) value).toString();
    int index = data.indexOf(":");

    // Can happen if no outlinks
    if (index == -1) {
        index = data.length();//  w  w w.  java2s . c  o m
    }

    // split into title and PR (tab or variable number of blank spaces)
    String toParse = data.substring(0, index).trim();
    String[] splits = toParse.split("\t");

    if (splits.length == 0) {
        splits = toParse.split(" ");

        if (splits.length == 0) {
            return;
        }
    }

    String pagetitle = splits[0].trim();
    String pagerank = splits[splits.length - 1].trim();

    // parse score
    double currScore = 0.0;
    try {
        currScore = Double.parseDouble(pagerank);
    } catch (Exception e) {
        currScore = 0.0;
    }

    // collect
    //output.collect(new FloatWritable((float) - currScore), key);
    output.collect(new FloatWritable((float) -currScore * 1000), new Text(pagetitle));
}

From source file:car.Classification.java

License:Open Source License

public static Text getClassPercentages(String path, String content) {
    int[] classes = new int[4];
    int total = 0;

    StringTokenizer lineTok = new StringTokenizer(content);
    String record = "";
    while (lineTok.hasMoreTokens()) {
        record = lineTok.nextToken();/* w w  w  .  j a va 2  s . com*/
        // Iterates over the String content, incrementing the corresponding class counters.
        classes[getClassIndex(record.substring(record.lastIndexOf(',') + 1))] += 1;
    }
    total = classes[0] + classes[1] + classes[2] + classes[3];
    return new Text(path + "-class=" + CLASS_UNACC + ":" + classes[0] * 100 / total + "%, class=" + CLASS_ACC
            + ":" + classes[1] * 100 / total + "%, class=" + CLASS_GOOD + ":" + classes[2] * 100 / total
            + "%, class=" + CLASS_VGOOD + ":" + classes[3] * 100 / total + "%");
}

From source file:cascading.hive.ORCFile.java

License:Apache License

private void tuple2Struct(Tuple tuple, OrcStruct struct) {
    Object value = null;/*from ww w. j  a v a 2 s . c o m*/
    for (int i = 0; i < types.length; i++) {
        switch (typeMapping.get(types[i].toLowerCase())) {
        case INT:
            value = tuple.getObject(i) == null ? null : new IntWritable(tuple.getInteger(i));
            break;
        case BOOLEAN:
            value = tuple.getObject(i) == null ? null : new BooleanWritable(tuple.getBoolean(i));
            break;
        case TINYINT:
            value = tuple.getObject(i) == null ? null : new ByteWritable(Byte.valueOf(tuple.getString(i)));
            break;
        case SMALLINT:
            value = tuple.getObject(i) == null ? null : new ShortWritable(tuple.getShort(i));
            break;
        case BIGINT:
            value = tuple.getObject(i) == null ? null : new LongWritable(tuple.getLong(i));
            break;
        case FLOAT:
            value = tuple.getObject(i) == null ? null : new FloatWritable(tuple.getFloat(i));
            break;
        case DOUBLE:
            value = tuple.getObject(i) == null ? null : new DoubleWritable(tuple.getDouble(i));
            break;
        case BIGDECIMAL:
            value = tuple.getObject(i) == null ? null
                    : new HiveDecimalWritable(HiveDecimal.create((new BigDecimal(tuple.getString(i)))));
            break;
        case STRING:
        default:
            value = tuple.getObject(i) == null ? null : new Text(tuple.getString(i));
        }
        struct.setFieldValue(i, value);
    }
}