Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString


In this page you can find the example usage for org.apache.hadoop.io Text toString.


public String toString() 

Source Link


Convert text back to string


From source file:br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterResultReducer.java

protected void reduce(Text key, Iterable<Text> values, Context context)
        throws java.io.IOException, InterruptedException {
    BigInteger partialCount = BigInteger.ZERO;
    long numPartialSolutions = 0;

    for (Text item : values) {
        String value = item.toString();

        if (NQueenIncrementalCounterResultMapper.PARTIAL_SOLUTION_ID.equals(value)
                && numPartialSolutions < Long.MAX_VALUE) {
            numPartialSolutions++;/*from  www .j  a va 2 s.c o  m*/
        } else {
            BigInteger solutionCount = new BigInteger(value);
            partialCount = partialCount.add(solutionCount);

    context.write(NullWritable.get(), new Text(key.toString() + "=" + partialCount.toString() + ";"));

    if (numPartialSolutions > 0) {
        context.write(NullWritable.get(), new Text(
                key.toString() + " NO FINALIZADO - EXISTEM " + numPartialSolutions + " SOLUES ABERTAS."));

From source file:br.com.lassal.nqueens.grid.mapreduce.NQueenPartialShotMapper.java

protected void map(LongWritable key, Text value, Context context)
        throws java.io.IOException, InterruptedException {

    NQueenPartialShot record = new NQueenPartialShot(value.toString());

    //   String rowSize = context.getConfiguration().get(NQueenPartialShotMapper.NQueenRowSize_PROP);
    String predicado = record.getNewPrefix();
    // predicado = predicado!= null ? predicado + "," : (rowSize + ":");

    int numIter = record.getNumColToRun(SliceSize);

    boolean isFinalRun = record.isFinalRun(SliceSize);

    if (!isFinalRun && record.hasPrefix() && record.getPrefix().length >= (SliceSize * 2)) {
        if (this.isPositionInvalid(record.getPrefix())) {
            return; // ignora esta linha 
        }/*from w ww  .  j  ava  2s.c  o m*/

    for (int a = 0; a < record.getColumnSize(); a++) {
        if (numIter > 1) {
            for (int b = 0; b < record.getColumnSize(); b++) {
                if (numIter > 2) {
                    for (int c = 0; c < record.getColumnSize(); c++) {
                        if (numIter > 3) {
                            for (int d = 0; d < record.getColumnSize(); d++) {
                                if (!(a == b || a == c || a == d || b == c || b == d || c == d || a == (b + 1)
                                        || a == (b - 1) || a == (c + 2) || a == (c - 2) || a == (d + 3)
                                        || a == (d - 3) || b == (c + 1) || b == (c - 1) || b == (d + 2)
                                        || b == (d - 2) || c == (d + 1) || c == (d - 1) || (isFinalRun
                                                && this.isPositionInvalid(record.getPrefix(), a, b, c, d)))) {
                                    nqueenShot.set(String.format("%s%s,%s,%s,%s", predicado, a, b, c, d));
                                    context.write(nqueenShot, nullValue);
                        } else {
                            if (!(a == b || a == c || b == c || a == (b + 1) || a == (b - 1) || a == (c + 2)
                                    || a == (c - 2) || b == (c + 1) || b == (c - 1)
                                    || isFinalRun && this.isPositionInvalid(record.getPrefix(), a, b, c))) {
                                nqueenShot.set(String.format("%s%s,%s,%s", predicado, a, b, c));
                                context.write(nqueenShot, nullValue);
                } else {
                    if (!(a == b || a == (b - 1) || a == (b + 1)
                            || isFinalRun && this.isPositionInvalid(record.getPrefix(), a, b))) {
                        nqueenShot.set(String.format("%s%s,%s", predicado, a, b));
                        context.write(nqueenShot, nullValue);
        } else {
            if (isFinalRun && this.isPositionInvalid(record.getPrefix(), a)) {
                nqueenShot.set(predicado + a);
                context.write(nqueenShot, nullValue);


From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerMap.java

License:Open Source License

public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
    String filename = fileSplit.getPath().getName();

    List<String> tokenizedLine = lineTokenizer.tokenize(value.toString());

    for (String auxWord : tokenizedLine) {
        output.collect(new Text(auxWord), new Text(filename));
    }//from w w  w  . jav a  2 s.c  o m

From source file:brickhouse.udf.sketch.Md5.java

License:Apache License

public Text evaluate(final Text s) {
    if (s == null) {
        return null;
    }//from w ww  .java  2 s.c  om
    try {
        MessageDigest md = MessageDigest.getInstance("MD5");
        byte[] md5hash = md.digest();
        StringBuilder builder = new StringBuilder();
        for (byte b : md5hash) {
            builder.append(Integer.toString((b & 0xff) + 0x100, 16).substring(1));
        return new Text(builder.toString());
    } catch (NoSuchAlgorithmException nsae) {
        System.out.println("Cannot find digest algorithm");
    return null;

From source file:BU.MET.CS755.SpeciesGraphBuilderMapper.java

public void map(Text key, Text value, OutputCollector output, Reporter reporter) throws IOException {
    String title = null;// w  w  w.  j a  v  a2  s  . c o  m
    String inputString;
    ArrayList<String> outlinks = null;

    //Get the DOM Builder Factory
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    try {
        //Get the DOM Builder
        DocumentBuilder builder = factory.newDocumentBuilder();

        //Load and Parse the XML document
        //document contains the complete XML as a Tree.
        inputString = key.toString();
        InputSource is = new InputSource(new StringReader(key.toString()));
        Document document = builder.parse(is);

        // Look for taxonavigation marker.
        if ((inputString.indexOf("== Taxonavigation ==") == -1)
                && (inputString.indexOf("==Taxonavigation==") == -1)) {

        // Get the title node
        NodeList nodeList = document.getDocumentElement().getChildNodes();
        NodeList theTitle = document.getElementsByTagName("title");

        // Parse the species name from the title node.
        for (int i = 0; i < theTitle.getLength(); i++) {
            Node theNode = theTitle.item(i);
            Node nodeVal = theNode.getFirstChild();
            title = nodeVal.getNodeValue();
            title = title.replace(":", "_");

        // Get the sub-species list from <text>
        NodeList theText = document.getElementsByTagName("text");

        for (int i = 0; i < theText.getLength(); i++) {
            Node theNode = theText.item(i);
            Node nodeVal = theNode.getFirstChild();

            if (nodeVal != null) {
                outlinks = GetOutlinks(nodeVal.getNodeValue());
    } catch (Exception e) {

    if (title != null && title.length() > 0) {
    } else {

    StringBuilder builder = new StringBuilder();

    if (outlinks != null) {
        for (String link : outlinks) {
            link = link.replace(" ", "_");
            link = link.replace("\n", "");
            builder.append(" ");

    // remove any newlines
    if (builder.toString().contains("\n")) {
        builder.toString().replace("\n", "");

    output.collect(new Text(title), new Text(builder.toString()));

From source file:BU.MET.CS755.SpeciesGraphBuilderReducer.java

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String toWrite = "";
    int count = 0;

    while (values.hasNext()) {
        String page = ((Text) values.next()).toString();
        page.replaceAll(" ", "_");
        toWrite += " " + page;
        count += 1;//  w ww.  j  av  a 2  s . c  om

    IntWritable i = new IntWritable(count);
    String num = (i).toString();
    toWrite = num + ":" + toWrite;

    output.collect(key, new Text(toWrite));

From source file:cascading.tap.hadoop.ZipInputFormatTest.java

License:Open Source License

public void testSplits() throws Exception {
    JobConf job = new JobConf();
    FileSystem currentFs = FileSystem.get(job);

    Path file = new Path(workDir, "test.zip");

    Reporter reporter = Reporter.NULL;/*from w  ww .j  av  a 2 s.c  om*/

    int seed = new Random().nextInt();
    LOG.info("seed = " + seed);
    Random random = new Random(seed);
    FileInputFormat.setInputPaths(job, file);

    for (int entries = 1; entries < MAX_ENTRIES; entries += random.nextInt(MAX_ENTRIES / 10) + 1) {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        ZipOutputStream zos = new ZipOutputStream(byteArrayOutputStream);
        long length = 0;

        LOG.debug("creating; zip file with entries = " + entries);

        // for each entry in the zip file
        for (int entryCounter = 0; entryCounter < entries; entryCounter++) {
            // construct zip entries splitting MAX_LENGTH between entries
            long entryLength = MAX_LENGTH / entries;
            ZipEntry zipEntry = new ZipEntry("/entry" + entryCounter + ".txt");

            for (length = entryCounter * entryLength; length < (entryCounter + 1) * entryLength; length++) {



        currentFs.delete(file, true);

        OutputStream outputStream = currentFs.create(file);


        ZipInputFormat format = new ZipInputFormat();
        LongWritable key = new LongWritable();
        Text value = new Text();
        InputSplit[] splits = format.getSplits(job, 100);

        BitSet bits = new BitSet((int) length);
        for (int j = 0; j < splits.length; j++) {
            LOG.debug("split[" + j + "]= " + splits[j]);
            RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter);

            try {
                int count = 0;

                while (reader.next(key, value)) {
                    int v = Integer.parseInt(value.toString());
                    LOG.debug("read " + v);

                    if (bits.get(v))
                        LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos());

                    assertFalse("key in multiple partitions.", bits.get(v));

                LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count);
            } finally {

        assertEquals("some keys in no partition.", length, bits.cardinality());

From source file:cc.slda.AnnotateDocuments.java

License:Apache License

public static Map<Integer, String> importParameter(SequenceFile.Reader sequenceFileReader) throws IOException {
    Map<Integer, String> hashMap = new HashMap<Integer, String>();

    IntWritable intWritable = new IntWritable();
    Text text = new Text();
    while (sequenceFileReader.next(intWritable, text)) {
        hashMap.put(intWritable.get(), text.toString());
    }//from  ww  w  .j a  va 2 s  . com

    return hashMap;

From source file:cc.slda.DisplayTopic.java

License:Apache License

public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(Settings.HELP_OPTION, false, "print the help message");
            .withDescription("input beta file").create(Settings.INPUT_OPTION));
            .withDescription("term index file").create(ParseCorpus.INDEX));
            .withDescription("display top terms only (default - 10)").create(TOP_DISPLAY_OPTION));

    String betaString = null;/*from   w  w  w  .j av  a2s.  c  o m*/
    String indexString = null;
    int topDisplay = TOP_DISPLAY;

    CommandLineParser parser = new GnuParser();
    HelpFormatter formatter = new HelpFormatter();
    try {
        CommandLine line = parser.parse(options, args);

        if (line.hasOption(Settings.HELP_OPTION)) {
            formatter.printHelp(ParseCorpus.class.getName(), options);

        if (line.hasOption(Settings.INPUT_OPTION)) {
            betaString = line.getOptionValue(Settings.INPUT_OPTION);
        } else {
            throw new ParseException("Parsing failed due to " + Settings.INPUT_OPTION + " not initialized...");

        if (line.hasOption(ParseCorpus.INDEX)) {
            indexString = line.getOptionValue(ParseCorpus.INDEX);
        } else {
            throw new ParseException("Parsing failed due to " + ParseCorpus.INDEX + " not initialized...");

        if (line.hasOption(TOP_DISPLAY_OPTION)) {
            topDisplay = Integer.parseInt(line.getOptionValue(TOP_DISPLAY_OPTION));
    } catch (ParseException pe) {
        formatter.printHelp(ParseCorpus.class.getName(), options);
    } catch (NumberFormatException nfe) {

    JobConf conf = new JobConf(DisplayTopic.class);
    FileSystem fs = FileSystem.get(conf);

    Path indexPath = new Path(indexString);
    Preconditions.checkArgument(fs.exists(indexPath) && fs.isFile(indexPath), "Invalid index path...");

    Path betaPath = new Path(betaString);
    Preconditions.checkArgument(fs.exists(betaPath) && fs.isFile(betaPath), "Invalid beta path...");

    SequenceFile.Reader sequenceFileReader = null;
    try {
        IntWritable intWritable = new IntWritable();
        Text text = new Text();
        Map<Integer, String> termIndex = new HashMap<Integer, String>();
        sequenceFileReader = new SequenceFile.Reader(fs, indexPath, conf);
        while (sequenceFileReader.next(intWritable, text)) {
            termIndex.put(intWritable.get(), text.toString());

        PairOfIntFloat pairOfIntFloat = new PairOfIntFloat();
        // HMapIFW hmap = new HMapIFW();
        HMapIDW hmap = new HMapIDW();
        TreeMap<Double, Integer> treeMap = new TreeMap<Double, Integer>();
        sequenceFileReader = new SequenceFile.Reader(fs, betaPath, conf);
        while (sequenceFileReader.next(pairOfIntFloat, hmap)) {

                    "Top ranked " + topDisplay + " terms for Topic " + pairOfIntFloat.getLeftElement());

            Iterator<Integer> itr1 = hmap.keySet().iterator();
            int temp1 = 0;
            while (itr1.hasNext()) {
                temp1 = itr1.next();
                treeMap.put(-hmap.get(temp1), temp1);
                if (treeMap.size() > topDisplay) {

            Iterator<Double> itr2 = treeMap.keySet().iterator();
            double temp2 = 0;
            while (itr2.hasNext()) {
                temp2 = itr2.next();
                if (termIndex.containsKey(treeMap.get(temp2))) {
                    System.out.println(termIndex.get(treeMap.get(temp2)) + "\t\t" + -temp2);
                } else {
                    System.out.println("How embarrassing! Term index not found...");
    } finally {

    return 0;

From source file:cereal.impl.ProtobufMessageMapping.java

License:Apache License

public void update(Iterable<Entry<Key, Value>> iter, InstanceOrBuilder<T> obj) {
    checkNotNull(iter, "Iterable was null");
    checkNotNull(obj, "InstanceOrBuilder was null");
    checkArgument(Type.BUILDER == obj.getType(), "Expected argument to be a builder");

    final GeneratedMessage.Builder<?> builder = (GeneratedMessage.Builder<?>) obj.get();
    final List<Entry<Key, Value>> leftoverFields = new LinkedList<>();

    for (Entry<Key, Value> entry : iter) {
        String fieldName = entry.getKey().getColumnQualifier().toString();

        int index = fieldName.indexOf(PERIOD);
        if (0 <= index) {
            leftoverFields.add(entry);//  w w w  .  j  a  v  a 2  s.co  m

        // Find the FieldDescriptor from the Key
        for (FieldDescriptor fieldDesc : builder.getDescriptorForType().getFields()) {
            if (fieldDesc.isRepeated()) {
                int offset = fieldName.lastIndexOf(DOLLAR);
                if (offset < 0) {
                    throw new RuntimeException(
                            "Could not find offset of separator for repeated field count in " + fieldName);
                fieldName = fieldName.substring(0, offset);
            if (fieldName.equals(fieldDesc.getName())) {
                Value value = entry.getValue();
                switch (fieldDesc.getJavaType()) {
                case INT:
                    Integer intVal = Integer.parseInt(value.toString());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, intVal);
                    } else {
                        builder.setField(fieldDesc, intVal);
                case LONG:
                    Long longVal = Long.parseLong(value.toString());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, longVal);
                    } else {
                        builder.setField(fieldDesc, longVal);
                case FLOAT:
                    Float floatVal = Float.parseFloat(value.toString());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, floatVal);
                    } else {
                        builder.setField(fieldDesc, floatVal);
                case DOUBLE:
                    Double doubleVal = Double.parseDouble(value.toString());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, doubleVal);
                    } else {
                        builder.setField(fieldDesc, doubleVal);
                case BOOLEAN:
                    Boolean booleanVal = Boolean.parseBoolean(value.toString());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, booleanVal);
                    } else {
                        builder.setField(fieldDesc, booleanVal);
                case STRING:
                    String strVal = value.toString();
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, strVal);
                    } else {
                        builder.setField(fieldDesc, strVal);
                case BYTE_STRING:
                    ByteString byteStrVal = ByteString.copyFrom(entry.getValue().get());
                    if (fieldDesc.isRepeated()) {
                        builder.addRepeatedField(fieldDesc, byteStrVal);
                    } else {
                        builder.setField(fieldDesc, byteStrVal);
                    log.warn("Ignoring unknown serialized type {}", fieldDesc.getJavaType());

    // All primitives in object should be filled out.
    // Make sure nested messages get filled out too.

    if (!leftoverFields.isEmpty()) {
        for (FieldDescriptor fieldDesc : builder.getDescriptorForType().getFields()) {
            if (JavaType.MESSAGE == fieldDesc.getJavaType()) {
                // For each Key-Value pair which have this prefix as the fieldname (column qualifier)
                final String fieldName = fieldDesc.getName();
                final String singularPrefix = fieldName + PERIOD, repeatedPrefix = fieldName + DOLLAR;

                log.debug("Extracting Key-Value pairs for {}", fieldDesc.getName());

                // Use a TreeMap to ensure the correct repetition order is preserved
                Map<Integer, List<Entry<Key, Value>>> fieldsForNestedMessage = new TreeMap<>();

                final Text _holder = new Text();
                Iterator<Entry<Key, Value>> leftoverFieldsIter = leftoverFields.iterator();
                while (leftoverFieldsIter.hasNext()) {
                    final Entry<Key, Value> entry = leftoverFieldsIter.next();
                    final Key key = entry.getKey();

                    String colqual = _holder.toString();
                    if (colqual.startsWith(singularPrefix)) {
                        // Make a copy of the original Key, stripping the prefix off of the qualifier
                        Key copy = new Key(key.getRow(), key.getColumnFamily(),
                                new Text(colqual.substring(singularPrefix.length())), key.getColumnVisibility(),

                        List<Entry<Key, Value>> kvPairs = fieldsForNestedMessage.get(-1);
                        if (null == kvPairs) {
                            kvPairs = new LinkedList<>();
                            fieldsForNestedMessage.put(-1, kvPairs);
                        kvPairs.add(Maps.immutableEntry(copy, entry.getValue()));

                        // Remove it from the list as we should never have to reread this one again
                    } else if (colqual.startsWith(repeatedPrefix)) {
                        // Make a copy of the original Key, stripping the prefix off of the qualifier
                        int index = colqual.indexOf(PERIOD, repeatedPrefix.length());
                        if (0 > index) {
                            throw new RuntimeException("Could not find period after dollar sign: " + colqual);

                        Integer repetition = Integer
                                .parseInt(colqual.substring(repeatedPrefix.length(), index));

                        Key copy = new Key(key.getRow(), key.getColumnFamily(),
                                new Text(colqual.substring(index + 1)), key.getColumnVisibility(),

                        List<Entry<Key, Value>> kvPairs = fieldsForNestedMessage.get(repetition);
                        if (null == kvPairs) {
                            kvPairs = new LinkedList<>();
                            fieldsForNestedMessage.put(repetition, kvPairs);
                        kvPairs.add(Maps.immutableEntry(copy, entry.getValue()));

                        // Remove it from the list as we should never have to reread this one again

                if (!fieldsForNestedMessage.isEmpty()) {
                    // We have keys, pass them down to the nested message
                    String nestedMsgClzName = getClassName(fieldDesc);

                    log.debug("Found {} Key-Value pairs for {}. Reconstituting the message.",
                            fieldsForNestedMessage.size(), nestedMsgClzName);

                    try {
                        // Get the class, builder and InstanceOrBuilder for the nested message
                        Class<GeneratedMessage> msgClz = (Class<GeneratedMessage>) Class
                        Method newBuilderMethod = msgClz.getMethod("newBuilder");

                        for (Entry<Integer, List<Entry<Key, Value>>> pairsPerRepetition : fieldsForNestedMessage
                                .entrySet()) {
                            Message.Builder subBuilder = (Message.Builder) newBuilderMethod.invoke(null);
                            InstanceOrBuilder<GeneratedMessage> subIob = new InstanceOrBuilderImpl<>(subBuilder,

                            // Get the mapping from the registry
                            ProtobufMessageMapping<GeneratedMessage> subMapping = (ProtobufMessageMapping<GeneratedMessage>) registry

                            // Invoke update on the mapping with the subset of Key-Values
                            subMapping.update(pairsPerRepetition.getValue(), subIob);

                            // Set the result on the top-level obj
                            if (fieldDesc.isRepeated()) {
                                builder.addRepeatedField(fieldDesc, subBuilder.build());
                            } else {
                                builder.setField(fieldDesc, subBuilder.build());
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                // No fields for the sub message, therefore it's empty
                log.debug("Found no Key-Value pairs for {}", fieldName);
            // Not a message, so we can ignore it

        if (!leftoverFields.isEmpty()) {
            log.warn("Found {} leftover Key-Value pairs that were not consumed", leftoverFields.size());