Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:azkaban.security.HadoopSecurityManager_H_2_0.java

License:Apache License

private Text getMRTokenRenewerInternal(JobConf jobConf) throws IOException {
    // Taken from Oozie
    ///*from   w  ww . ja v a 2 s.co  m*/
    // Getting renewer correctly for JT principal also though JT in hadoop
    // 1.x does not have
    // support for renewing/cancelling tokens
    String servicePrincipal = jobConf.get(RM_PRINCIPAL, jobConf.get(JT_PRINCIPAL));
    Text renewer;
    if (servicePrincipal != null) {
        String target = jobConf.get(HADOOP_YARN_RM, jobConf.get(HADOOP_JOB_TRACKER_2));
        if (target == null) {
            target = jobConf.get(HADOOP_JOB_TRACKER);
        }

        String addr = NetUtils.createSocketAddr(target).getHostName();
        renewer = new Text(SecurityUtil.getServerPrincipal(servicePrincipal, addr));
    } else {
        // No security
        renewer = DEFAULT_RENEWER;
    }

    return renewer;
}

From source file:azkaban.viewer.hdfs.JsonSequenceFileViewer.java

License:Apache License

public Set<Capability> getCapabilities(AzkabanSequenceFileReader.Reader reader) {
    Text keySchema = reader.getMetadata().get(new Text("key.schema"));
    Text valueSchema = reader.getMetadata().get(new Text("value.schema"));
    if (keySchema != null && valueSchema != null) {
        return EnumSet.of(Capability.READ);
    }/*from  w w  w.  j a va2  s  .co m*/
    return EnumSet.noneOf(Capability.class);
}

From source file:azkaban.viewer.hdfs.JsonSequenceFileViewer.java

License:Apache License

public void displaySequenceFile(AzkabanSequenceFileReader.Reader reader, PrintWriter output, int startLine,
        int endLine) throws IOException {

    if (logger.isDebugEnabled()) {
        logger.debug("display json file");
    }/*w ww .j  av  a2s .  c om*/

    BytesWritable keyWritable = new BytesWritable();
    BytesWritable valueWritable = new BytesWritable();
    Text keySchema = reader.getMetadata().get(new Text("key.schema"));
    Text valueSchema = reader.getMetadata().get(new Text("value.schema"));

    JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema.toString());
    JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema.toString());

    // skip lines before the start line
    for (int i = 1; i < startLine; i++) {
        reader.next(keyWritable, valueWritable);
    }

    // now actually output lines
    for (int i = startLine; i <= endLine; i++) {
        boolean readSomething = reader.next(keyWritable, valueWritable);
        if (!readSomething) {
            break;
        }
        output.write(safeToString(keySerializer.toObject(keyWritable.getBytes())));
        output.write("\t=>\t");
        output.write(safeToString(valueSerializer.toObject(valueWritable.getBytes())));
        output.write("\n");
        output.flush();
    }
}

From source file:azkaban.webapp.servlet.hdfsviewer.JsonSequenceFileViewer.java

License:Apache License

public Set<Capability> getCapabilities(AzkabanSequenceFileReader.Reader reader) {
    Text keySchema = null;//from   w  ww  . j  av  a2s.c  om
    Text valueSchema = null;
    try {
        keySchema = reader.getMetadata().get(new Text("key.schema"));
        valueSchema = reader.getMetadata().get(new Text("value.schema"));
    } catch (Exception e) {
        logger.error("can't get schema. may not be json file");
    }

    if (keySchema != null && valueSchema != null) {
        return EnumSet.of(Capability.READ);
    }
    return EnumSet.noneOf(Capability.class);
}

From source file:babel.prep.corpus.CorpusGenMapper.java

License:Apache License

@Override
public void map(Text url, Page page, OutputCollector<Text, Page> output, Reporter reporter) throws IOException {
    Language lang = page.getLanguage();/*from   w  w  w .  j av a2  s.  c  o  m*/
    output.collect(new Text((lang == null) ? NO_LANG : lang.toString()), page);
}

From source file:babel.prep.datedcorpus.DatedCorpusGenMapper.java

License:Apache License

@Override
public void map(Text url, Page page, OutputCollector<Text, PageVersion> output, Reporter reporter)
        throws IOException {
    // Map to language and date
    Language lang = page.getLanguage();/*from   w  ww.  j a v  a  2 s .com*/
    String content;

    if (lang != null) // && isBBCEnglish(page))
    {
        Long modTime;

        // Only collect pages with language and date
        for (PageVersion ver : page.pageVersions()) {
            // For Testing: modTime = ver.getFetchTime();        
            modTime = ver.getModificationTime();
            content = ver.getContent();

            if (modTime != null && modTime != 0 && content != null && content.length() > 0) {
                output.collect(new Text(new String(lang.toString() + DATE_LANG_SEP + modTime.toString())), ver);
                content = ver.getContent();

                DatedCorpusGenerator.Stats.incLangPageVerCount(lang.toString());
                DatedCorpusGenerator.Stats.incLangWordCount(lang.toString(),
                        ver.getContent().split("\\s").length);
            }
        }
    }
}

From source file:babel.prep.datedcorpus.DatedCorpusGenReducer.java

License:Apache License

public void reduce(Text key, Iterator<PageVersion> values, OutputCollector<Text, Text> output,
        Reporter reporter) throws IOException {
    StringBuilder strBld = new StringBuilder();
    HashSet<String> seenVers = new HashSet<String>();
    String content;//from w ww  . j  a  v a 2s .c o  m

    while (values.hasNext()) {
        content = values.next().getContent().trim();

        if (!seenVers.contains(content)) {
            seenVers.add(content);
            strBld.append(content + "\n\n");
        }
    }

    output.collect(key, new Text(strBld.toString()));
}

From source file:batch.BatchScan2Html.java

License:Apache License

public static BatchScanner getConfiguredBatchScanner() throws TableNotFoundException {
    Collection<Range> ranges = new ArrayList<Range>(Arrays.asList(new Range((Key) null, (Key) null)));
    //Iterable<Entry<Key,Value>> 
    BatchScanner bscan = connector.createBatchScanner(opts.tableName, opts.auths, bsOpts.scanThreads);
    bscan.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS);
    //TODO add Jcommander optioins for ranges and columns
    bscan.setRanges(ranges);//from   w w  w  .j  a  va 2s  . c  om
    bscan.fetchColumnFamily(new Text("TEXT"));
    bscan.addScanIterator(new IteratorSetting(15, "GlobalIndexUidCombiner",
            "org.apache.accumulo.examples.wikisearch.iterator.GlobalIndexUidCombiner") {
    });
    //bscan.fetchColumn();
    return bscan;
}

From source file:be.uantwerpen.adrem.bigfim.AprioriPhaseMapper.java

License:Apache License

private void recReport(Context context, StringBuilder builder, ItemSetTrie trie)
        throws IOException, InterruptedException {
    int length = builder.length();
    for (Entry<Integer, ItemSetTrie> entry : trie.children.entrySet()) {
        ItemSetTrie recTrie = entry.getValue();
        if (recTrie.children.isEmpty()) {
            int support = ((SupportCountItemsetTrie) recTrie).support;
            if (support != 0) {
                Text key = new Text(builder.substring(0, Math.max(0, builder.length() - 1)));
                Text value = new Text(recTrie.id + " " + support);
                context.write(key, value);
            }//from  ww  w  .  ja  v a2s.c o m
        } else {
            builder.append(recTrie.id + " ");
            recReport(context, builder, recTrie);
        }
        builder.setLength(length);
    }
}

From source file:be.uantwerpen.adrem.bigfim.AprioriPhaseMapperTest.java

License:Apache License

@Test
public void phase_1_With_Input() throws Exception {
    AprioriPhaseMapper.Context ctx = createMock(Mapper.Context.class);

    ctx.write(new Text(""), new Text("1 5"));
    ctx.write(new Text(""), new Text("2 3"));
    ctx.write(new Text(""), new Text("3 6"));
    ctx.write(new Text(""), new Text("4 5"));
    ctx.write(new Text(""), new Text("5 4"));

    EasyMock.replay(ctx);//from   www . ja  va  2s  . c  o m

    AprioriPhaseMapper mapper = createMapper(1, create_Count_Trie_Empty());

    for (int i = 0; i < data.length; i++) {
        mapper.map(new LongWritable(i), new Text(data[i]), ctx);
    }

    mapper.cleanup(ctx);

    EasyMock.verify(ctx);
}