List of usage examples for org.apache.hadoop.io Text Text
public Text(byte[] utf8)
From source file:hadoop_serialize.java
License:Apache License
public static void main(String[] args) throws java.io.IOException { //System.err.println("Writing byte stream to stdout"); DataOutputStream os = new DataOutputStream(System.out); //System.err.println("Writing a sequence of numbers"); //System.err.println("WritableUtils.writeVInt: 42, 4242, 424242, 42424242, -42"); WritableUtils.writeVInt(os, 42);/*w w w . ja v a 2 s . c om*/ WritableUtils.writeVInt(os, 4242); WritableUtils.writeVInt(os, 424242); WritableUtils.writeVInt(os, 42424242); WritableUtils.writeVInt(os, -42); //System.err.println("WritableUtils.writeVLong 42, 424242, 4242424242"); WritableUtils.writeVLong(os, 42L); WritableUtils.writeVLong(os, 424242L); WritableUtils.writeVLong(os, 4242424242L); // //System.err.println("WritableUtils.writeString \"hello world\""); WritableUtils.writeString(os, "hello world"); WritableUtils.writeString(os, "oggi \u00e8 gioved\u00ec"); // This file contains: writeVInt of 42, 4242, 424242, 42424242, -42; writeVLong of 42, 424242, 4242424242; 2 writeString calls //System.err.println("Text.write \"I'm a Text object\""); Text t = new Text("\u00e0 Text object"); t.write(os); os.close(); }
From source file:$package.TextFileSetSink.java
License:Apache License
@Override public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, Text>> emitter) throws Exception { StringBuilder joinedFields = new StringBuilder(); Iterator<Schema.Field> fieldIter = input.getSchema().getFields().iterator(); if (!fieldIter.hasNext()) { // shouldn't happen return;// w w w .j av a 2 s. c om } Object val = input.get(fieldIter.next().getName()); if (val != null) { joinedFields.append(val); } while (fieldIter.hasNext()) { String fieldName = fieldIter.next().getName(); joinedFields.append(config.fieldSeparator); val = input.get(fieldName); if (val != null) { joinedFields.append(val); } } emitter.emit(new KeyValue<>(NullWritable.get(), new Text(joinedFields.toString()))); }
From source file:accumulo.balancer.GroupBalancer.java
License:Apache License
public GroupBalancer(String tableId) { this.tableId = tableId; this.textTableId = new Text(tableId); }
From source file:accumulo.RowsWithoutColumnIterator.java
License:Apache License
@Override public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException { super.init(source, options, env); // Get the columns whose rows that contain them we don't want to return if (options.containsKey(COLUMNS_TO_IGNORE)) { String columnsToIgnoreValue = options.get(COLUMNS_TO_IGNORE); Iterable<String> splitColumns = Splitter.on(',').split(columnsToIgnoreValue); for (String splitColumn : splitColumns) { columnsToIgnore.add(new Text(splitColumn)); }// ww w .j a v a 2s . co m } }
From source file:adept.mapreduce.AdeptMapper.java
License:Apache License
public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { try {/*from w ww .ja va2 s. co m*/ HltContentContainer hltcontentcontainer = (HltContentContainer) xmlserializer .deserializeString(value.toString(), HltContentContainer.class); hltcontentcontainer = doProcess(hltcontentcontainer); String serializedHltContainer = xmlserializer.serializeAsString(hltcontentcontainer); serializedHltContainer = serializedHltContainer.replaceAll("\\r\\n", " "); serializedHltContainer = serializedHltContainer.replaceAll("\\n", " "); output.collect(key, new Text(serializedHltContainer)); } catch (Exception e) { //System.out.println(e.getMessage()); System.out.println("Exception thrown in map function: " + e.getLocalizedMessage()); } }
From source file:adept.mapreduce.PreprocessingJob.java
License:Apache License
public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { HltContentContainer hltcontentcontainer = new HltContentContainer(); Document doc = DocumentMaker.getInstance().createDefaultDocument(key.toString(), null, null, null, null, value.toString(), hltcontentcontainer); // sentence segmentation.For now, consider all text as a single sentence. List<Sentence> sentences = new ArrayList<Sentence>(); sentences.addAll(//from www . j a v a 2 s . co m OpenNLPSentenceSegmenter.getInstance().getSentences(doc.getValue(), doc.getDefaultTokenStream())); hltcontentcontainer.setSentences(sentences); XMLSerializer xmlserializer = new XMLSerializer(SerializationType.XML); String serializedHltContainer = xmlserializer.serializeAsString(hltcontentcontainer); serializedHltContainer = serializedHltContainer.replaceAll("\r\n", " "); serializedHltContainer = serializedHltContainer.replaceAll("\n", " "); output.collect(key, new Text(serializedHltContainer)); }
From source file:AllLab_Skeleton.Lab1.WordCount_Reducer.java
@Override protected void cleanup(Context context) throws IOException, InterruptedException { context.write(new Text("Count : "), new IntWritable(count)); }
From source file:alluxio.client.hadoop.AccumulatingReducer.java
License:Apache License
/** * This method accumulates values based on their type. * * @param key the type of values//w w w . ja va2s .com * @param values the values to accumulates * @param output collect the result of accumulating * @param reporter to report progress and update status information */ public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String field = key.toString(); reporter.setStatus("starting " + field + " ::host = " + mHostname); // concatenate strings if (field.startsWith(VALUE_TYPE_STRING)) { StringBuilder sSum = new StringBuilder(); while (values.hasNext()) { sSum.append(values.next().toString()).append(";"); } output.collect(key, new Text(sSum.toString())); reporter.setStatus("finished " + field + " ::host = " + mHostname); return; } // sum long values if (field.startsWith(VALUE_TYPE_FLOAT)) { float fSum = 0; while (values.hasNext()) { fSum += Float.parseFloat(values.next().toString()); } output.collect(key, new Text(String.valueOf(fSum))); reporter.setStatus("finished " + field + " ::host = " + mHostname); return; } // sum long values if (field.startsWith(VALUE_TYPE_LONG)) { long lSum = 0; while (values.hasNext()) { lSum += Long.parseLong(values.next().toString()); } output.collect(key, new Text(String.valueOf(lSum))); } reporter.setStatus("finished " + field + " ::host = " + mHostname); }
From source file:alluxio.client.hadoop.DFSIOIntegrationTest.java
License:Apache License
@SuppressWarnings("deprecation") private void createControlFile(org.apache.hadoop.fs.FileSystem fs, long nrBytes, // in bytes int nrFiles) throws IOException { LOG.info("creating control file: " + nrBytes + " bytes, " + nrFiles + " files"); Path controlDir = getControlDir(mConfig); if (!fs.exists(controlDir)) { fs.delete(controlDir, true);// w ww. jav a 2s.co m for (int i = 0; i < nrFiles; i++) { String name = getFileName(i); Path controlFile = new Path(controlDir, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, mConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(name), new LongWritable(nrBytes)); } catch (Exception e) { throw new IOException(e.getLocalizedMessage()); } finally { if (writer != null) { writer.close(); } writer = null; } } } LOG.info("created control files for: " + nrFiles + " files"); }
From source file:alluxio.hadoop.fs.AccumulatingReducer.java
License:Apache License
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String field = key.toString(); reporter.setStatus("starting " + field + " ::host = " + mHostname); // concatenate strings if (field.startsWith(VALUE_TYPE_STRING)) { StringBuffer sSum = new StringBuffer(); while (values.hasNext()) { sSum.append(values.next().toString()).append(";"); }/*from w w w . ja va 2 s. c om*/ output.collect(key, new Text(sSum.toString())); reporter.setStatus("finished " + field + " ::host = " + mHostname); return; } // sum long values if (field.startsWith(VALUE_TYPE_FLOAT)) { float fSum = 0; while (values.hasNext()) { fSum += Float.parseFloat(values.next().toString()); } output.collect(key, new Text(String.valueOf(fSum))); reporter.setStatus("finished " + field + " ::host = " + mHostname); return; } // sum long values if (field.startsWith(VALUE_TYPE_LONG)) { long lSum = 0; while (values.hasNext()) { lSum += Long.parseLong(values.next().toString()); } output.collect(key, new Text(String.valueOf(lSum))); } reporter.setStatus("finished " + field + " ::host = " + mHostname); }