Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:TestString.java

License:Apache License

@Test
public void testTextSubstring() throws Exception {
    Text text = new Text("string");
    Text text1 = new Text();
    Text text2 = new Text();

    long start = System.nanoTime();
    for (int i = 0; i < 100000000; i++) {
        text1.set(text.getBytes(), 0, 2);
        text2.set(text.getBytes(), 3, text.getLength() - 3);
    }//from w  w w . j a  v a2  s . c om
    long end = System.nanoTime();
    System.out.println("TestTextSubString");
    System.out.println("text1: " + text1.toString());
    System.out.println("text2: " + text2.toString());
    System.out.println("Elapsed Time: " + (end - start) / 1000000000f + " seconds.");
}

From source file:PageRankIterationReducerTest.java

public void test1() {
    Text key = new Text("testKey");
    PageRankFollower pageRankFollower = new PageRankFollower(new Text(key.toString()), 1.0, 0,
            new ArrayList<Text>());
    List<PageRankFollower> values = new ArrayList<PageRankFollower>();
    values.add(pageRankFollower);//from   w w w. ja  v a  2 s .com

    PageRankFollower result = (new PageRankIterationReducer()).calculatePageRank(key, values);
    System.out.println("result: " + result);

}

From source file:AnagramReducer.java

License:Apache License

public void reduce(Text anagramKey, Iterator<Text> anagramValues, OutputCollector<Text, Text> results,
        Reporter reporter) throws IOException {
    String output = "";
    while (anagramValues.hasNext()) {
        Text anagam = anagramValues.next();
        output = output + anagam.toString() + "~";
    }/*from  ww  w. j  a v  a 2s  .com*/
    StringTokenizer outputTokenizer = new StringTokenizer(output, "~");
    /*      if(outputTokenizer.countTokens()>=2)
          {*/
    output = output.replace("~", ",");
    outputKey.set(anagramKey.toString() + "(" + outputTokenizer.countTokens() + "): ");
    outputValue.set(output);
    results.collect(outputKey, outputValue);
    //}
}

From source file:SampleUdf.java

License:Apache License

public Text evaluate(final Text s, Text sleepTime) throws InterruptedException {

    Long time = 180 * 1000L;//from  w  w  w . j  av  a 2  s.  c o  m

    if (sleepTime != null) {
        time = Long.parseLong(sleepTime.toString()) * 1000L;
    }

    System.out.println("Sleep Time : " + time);

    Thread.sleep(time);

    if (s == null) {
        return null;
    }

    return new Text(s.toString().toLowerCase());
}

From source file:$.ExampleBulkImporter.java

License:Apache License

/** {@inheritDoc} */
    @Override// w  ww  .j a v  a2 s .  c  om
    public void produce(LongWritable filePos, Text value, KijiTableContext context) throws IOException {
        final String[] split = value.toString().split(":");
        final String rowKey = split[0];
        final int integerValue = Integer.parseInt(split[1]);

        final EntityId eid = context.getEntityId(rowKey);
        context.put(eid, "primitives", "int", integerValue);
    }

From source file:$package.SparkPageRankProgram.java

License:Apache License

@Override
public void run(SparkContext sc) {
    LOG.info("Processing backlinkURLs data");
    JavaPairRDD<LongWritable, Text> backlinkURLs = sc.readFromStream("backlinkURLStream", Text.class);
    int iterationCount = getIterationCount(sc);

    LOG.info("Grouping data by key");
    // Grouping backlinks by unique URL in key
    JavaPairRDD<String, Iterable<String>> links = backlinkURLs.values()
            .mapToPair(new PairFunction<Text, String, String>() {
                @Override//  w ww .java  2s.co  m
                public Tuple2<String, String> call(Text s) {
                    String[] parts = SPACES.split(s.toString());
                    return new Tuple2<>(parts[0], parts[1]);
                }
            }).distinct().groupByKey().cache();

    // Initialize default rank for each key URL
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {
        @Override
        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });
    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < iterationCount; current++) {
        LOG.debug("Processing data with PageRank algorithm. Iteration {}/{}", current + 1, (iterationCount));
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {
                    @Override
                    public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                        LOG.debug("Processing {} with rank {}", s._1(), s._2());
                        int urlCount = Iterables.size(s._1());
                        List<Tuple2<String, Double>> results = new ArrayList<>();
                        for (String n : s._1()) {
                            results.add(new Tuple2<>(n, s._2() / urlCount));
                        }
                        return results;
                    }
                });
        // Re-calculates URL ranks based on backlink contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
            @Override
            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        });
    }

    LOG.info("Writing ranks data");

    final ServiceDiscoverer discoveryServiceContext = sc.getServiceDiscoverer();
    final Metrics sparkMetrics = sc.getMetrics();
    JavaPairRDD<byte[], Integer> ranksRaw = ranks
            .mapToPair(new PairFunction<Tuple2<String, Double>, byte[], Integer>() {
                @Override
                public Tuple2<byte[], Integer> call(Tuple2<String, Double> tuple) throws Exception {
                    LOG.debug("URL {} has rank {}", Arrays.toString(tuple._1().getBytes(Charsets.UTF_8)),
                            tuple._2());
                    URL serviceURL = discoveryServiceContext
                            .getServiceURL(SparkPageRankApp.GOOGLE_TYPE_PR_SERVICE_NAME);
                    if (serviceURL == null) {
                        throw new RuntimeException(
                                "Failed to discover service: " + SparkPageRankApp.GOOGLE_TYPE_PR_SERVICE_NAME);
                    }
                    try {
                        URLConnection connection = new URL(serviceURL,
                                String.format("transform/%s", tuple._2().toString())).openConnection();
                        try (BufferedReader reader = new BufferedReader(
                                new InputStreamReader(connection.getInputStream(), Charsets.UTF_8))) {
                            String pr = reader.readLine();
                            if ((Integer.parseInt(pr)) == POPULAR_PAGE_THRESHOLD) {
                                sparkMetrics.count(POPULAR_PAGES, 1);
                            } else if (Integer.parseInt(pr) <= UNPOPULAR_PAGE_THRESHOLD) {
                                sparkMetrics.count(UNPOPULAR_PAGES, 1);
                            } else {
                                sparkMetrics.count(REGULAR_PAGES, 1);
                            }
                            return new Tuple2(tuple._1().getBytes(Charsets.UTF_8), Integer.parseInt(pr));
                        }
                    } catch (Exception e) {
                        LOG.warn("Failed to read the Stream for service {}",
                                SparkPageRankApp.GOOGLE_TYPE_PR_SERVICE_NAME, e);
                        throw Throwables.propagate(e);
                    }
                }
            });

    // Store calculated results in output Dataset.
    // All calculated results are stored in one row.
    // Each result, the calculated URL rank based on backlink contributions, is an entry of the row.
    // The value of the entry is the URL rank.
    sc.writeToDataset(ranksRaw, "ranks", byte[].class, Integer.class);

    LOG.info("PageRanks successfuly computed and written to \"ranks\" dataset");
}

From source file:adept.mapreduce.AdeptMapper.java

License:Apache License

public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    try {/*www.j a  va2 s  .c om*/
        HltContentContainer hltcontentcontainer = (HltContentContainer) xmlserializer
                .deserializeString(value.toString(), HltContentContainer.class);
        hltcontentcontainer = doProcess(hltcontentcontainer);

        String serializedHltContainer = xmlserializer.serializeAsString(hltcontentcontainer);
        serializedHltContainer = serializedHltContainer.replaceAll("\\r\\n", " ");
        serializedHltContainer = serializedHltContainer.replaceAll("\\n", " ");
        output.collect(key, new Text(serializedHltContainer));
    } catch (Exception e) {
        //System.out.println(e.getMessage());
        System.out.println("Exception thrown in map function: " + e.getLocalizedMessage());
    }

}

From source file:adept.mapreduce.PreprocessingJob.java

License:Apache License

public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    HltContentContainer hltcontentcontainer = new HltContentContainer();
    Document doc = DocumentMaker.getInstance().createDefaultDocument(key.toString(), null, null, null, null,
            value.toString(), hltcontentcontainer);

    // sentence segmentation.For now, consider all text as a single sentence.
    List<Sentence> sentences = new ArrayList<Sentence>();
    sentences.addAll(/* ww  w .  ja va2 s  .  com*/
            OpenNLPSentenceSegmenter.getInstance().getSentences(doc.getValue(), doc.getDefaultTokenStream()));
    hltcontentcontainer.setSentences(sentences);

    XMLSerializer xmlserializer = new XMLSerializer(SerializationType.XML);
    String serializedHltContainer = xmlserializer.serializeAsString(hltcontentcontainer);
    serializedHltContainer = serializedHltContainer.replaceAll("\r\n", " ");
    serializedHltContainer = serializedHltContainer.replaceAll("\n", " ");
    output.collect(key, new Text(serializedHltContainer));
}

From source file:AllLab_Skeleton.Lab1.WordCount_Mapper.java

public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
    StringTokenizer itr = new StringTokenizer(value.toString());
    //System.out.println("Content on line " + key.toString() + " :- " + value.toString() );
    while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());/* ww  w . j a v a2  s . com*/
        context.write(word, one);
    }
}

From source file:AllLab_Skeleton.Lab2.Lab2Mapper.java

@Override
public void map(Object key, Text values, Context context) {
    if (values.toString().length() > 0) {
        try {/*w ww .ja v  a2s  .  co  m*/
            String value[] = values.toString().split("\t");
            CompositeKeyWritable cw = new CompositeKeyWritable(value[6], value[3]);
            context.write(cw, NullWritable.get());
        } catch (IOException | InterruptedException ex) {
            Logger.getLogger(Lab2Mapper.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}