List of usage examples for org.apache.hadoop.io Text Text
public Text(byte[] utf8)
From source file:azkaban.security.HadoopSecurityManager_H_2_0.java
License:Apache License
private Text getMRTokenRenewerInternal(JobConf jobConf) throws IOException { // Taken from Oozie ///*from w ww . ja v a 2 s.co m*/ // Getting renewer correctly for JT principal also though JT in hadoop // 1.x does not have // support for renewing/cancelling tokens String servicePrincipal = jobConf.get(RM_PRINCIPAL, jobConf.get(JT_PRINCIPAL)); Text renewer; if (servicePrincipal != null) { String target = jobConf.get(HADOOP_YARN_RM, jobConf.get(HADOOP_JOB_TRACKER_2)); if (target == null) { target = jobConf.get(HADOOP_JOB_TRACKER); } String addr = NetUtils.createSocketAddr(target).getHostName(); renewer = new Text(SecurityUtil.getServerPrincipal(servicePrincipal, addr)); } else { // No security renewer = DEFAULT_RENEWER; } return renewer; }
From source file:azkaban.viewer.hdfs.JsonSequenceFileViewer.java
License:Apache License
public Set<Capability> getCapabilities(AzkabanSequenceFileReader.Reader reader) { Text keySchema = reader.getMetadata().get(new Text("key.schema")); Text valueSchema = reader.getMetadata().get(new Text("value.schema")); if (keySchema != null && valueSchema != null) { return EnumSet.of(Capability.READ); }/*from w w w. j a va2 s .co m*/ return EnumSet.noneOf(Capability.class); }
From source file:azkaban.viewer.hdfs.JsonSequenceFileViewer.java
License:Apache License
public void displaySequenceFile(AzkabanSequenceFileReader.Reader reader, PrintWriter output, int startLine, int endLine) throws IOException { if (logger.isDebugEnabled()) { logger.debug("display json file"); }/*w ww .j av a2s . c om*/ BytesWritable keyWritable = new BytesWritable(); BytesWritable valueWritable = new BytesWritable(); Text keySchema = reader.getMetadata().get(new Text("key.schema")); Text valueSchema = reader.getMetadata().get(new Text("value.schema")); JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema.toString()); JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema.toString()); // skip lines before the start line for (int i = 1; i < startLine; i++) { reader.next(keyWritable, valueWritable); } // now actually output lines for (int i = startLine; i <= endLine; i++) { boolean readSomething = reader.next(keyWritable, valueWritable); if (!readSomething) { break; } output.write(safeToString(keySerializer.toObject(keyWritable.getBytes()))); output.write("\t=>\t"); output.write(safeToString(valueSerializer.toObject(valueWritable.getBytes()))); output.write("\n"); output.flush(); } }
From source file:azkaban.webapp.servlet.hdfsviewer.JsonSequenceFileViewer.java
License:Apache License
public Set<Capability> getCapabilities(AzkabanSequenceFileReader.Reader reader) { Text keySchema = null;//from w ww . j av a2s.c om Text valueSchema = null; try { keySchema = reader.getMetadata().get(new Text("key.schema")); valueSchema = reader.getMetadata().get(new Text("value.schema")); } catch (Exception e) { logger.error("can't get schema. may not be json file"); } if (keySchema != null && valueSchema != null) { return EnumSet.of(Capability.READ); } return EnumSet.noneOf(Capability.class); }
From source file:babel.prep.corpus.CorpusGenMapper.java
License:Apache License
@Override public void map(Text url, Page page, OutputCollector<Text, Page> output, Reporter reporter) throws IOException { Language lang = page.getLanguage();/*from w w w . j av a2 s. c o m*/ output.collect(new Text((lang == null) ? NO_LANG : lang.toString()), page); }
From source file:babel.prep.datedcorpus.DatedCorpusGenMapper.java
License:Apache License
@Override public void map(Text url, Page page, OutputCollector<Text, PageVersion> output, Reporter reporter) throws IOException { // Map to language and date Language lang = page.getLanguage();/*from w ww. j a v a 2 s .com*/ String content; if (lang != null) // && isBBCEnglish(page)) { Long modTime; // Only collect pages with language and date for (PageVersion ver : page.pageVersions()) { // For Testing: modTime = ver.getFetchTime(); modTime = ver.getModificationTime(); content = ver.getContent(); if (modTime != null && modTime != 0 && content != null && content.length() > 0) { output.collect(new Text(new String(lang.toString() + DATE_LANG_SEP + modTime.toString())), ver); content = ver.getContent(); DatedCorpusGenerator.Stats.incLangPageVerCount(lang.toString()); DatedCorpusGenerator.Stats.incLangWordCount(lang.toString(), ver.getContent().split("\\s").length); } } } }
From source file:babel.prep.datedcorpus.DatedCorpusGenReducer.java
License:Apache License
public void reduce(Text key, Iterator<PageVersion> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { StringBuilder strBld = new StringBuilder(); HashSet<String> seenVers = new HashSet<String>(); String content;//from w ww . j a v a 2s .c o m while (values.hasNext()) { content = values.next().getContent().trim(); if (!seenVers.contains(content)) { seenVers.add(content); strBld.append(content + "\n\n"); } } output.collect(key, new Text(strBld.toString())); }
From source file:batch.BatchScan2Html.java
License:Apache License
public static BatchScanner getConfiguredBatchScanner() throws TableNotFoundException { Collection<Range> ranges = new ArrayList<Range>(Arrays.asList(new Range((Key) null, (Key) null))); //Iterable<Entry<Key,Value>> BatchScanner bscan = connector.createBatchScanner(opts.tableName, opts.auths, bsOpts.scanThreads); bscan.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS); //TODO add Jcommander optioins for ranges and columns bscan.setRanges(ranges);//from w w w .j a va 2s . c om bscan.fetchColumnFamily(new Text("TEXT")); bscan.addScanIterator(new IteratorSetting(15, "GlobalIndexUidCombiner", "org.apache.accumulo.examples.wikisearch.iterator.GlobalIndexUidCombiner") { }); //bscan.fetchColumn(); return bscan; }
From source file:be.uantwerpen.adrem.bigfim.AprioriPhaseMapper.java
License:Apache License
private void recReport(Context context, StringBuilder builder, ItemSetTrie trie) throws IOException, InterruptedException { int length = builder.length(); for (Entry<Integer, ItemSetTrie> entry : trie.children.entrySet()) { ItemSetTrie recTrie = entry.getValue(); if (recTrie.children.isEmpty()) { int support = ((SupportCountItemsetTrie) recTrie).support; if (support != 0) { Text key = new Text(builder.substring(0, Math.max(0, builder.length() - 1))); Text value = new Text(recTrie.id + " " + support); context.write(key, value); }//from ww w . ja v a2s.c o m } else { builder.append(recTrie.id + " "); recReport(context, builder, recTrie); } builder.setLength(length); } }
From source file:be.uantwerpen.adrem.bigfim.AprioriPhaseMapperTest.java
License:Apache License
@Test public void phase_1_With_Input() throws Exception { AprioriPhaseMapper.Context ctx = createMock(Mapper.Context.class); ctx.write(new Text(""), new Text("1 5")); ctx.write(new Text(""), new Text("2 3")); ctx.write(new Text(""), new Text("3 6")); ctx.write(new Text(""), new Text("4 5")); ctx.write(new Text(""), new Text("5 4")); EasyMock.replay(ctx);//from www . ja va 2s . c o m AprioriPhaseMapper mapper = createMapper(1, create_Count_Trie_Empty()); for (int i = 0; i < data.length; i++) { mapper.map(new LongWritable(i), new Text(data[i]), ctx); } mapper.cleanup(ctx); EasyMock.verify(ctx); }