List of usage examples for org.apache.hadoop.io MapWritable put
@Override
public Writable put(Writable key, Writable value)
From source file:org.wonderbee.elasticsearch.hive.ElasticSearchSerDe.java
License:Apache License
/** * Recursively converts an arbitrary object into the appropriate writable. Please enlighten me if there is an existing * method for doing this.//from w w w . j a v a2 s . c o m */ private Writable toWritable(Object thing) { if (thing instanceof String) { return new Text((String) thing); } else if (thing instanceof Long) { return new LongWritable((Long) thing); } else if (thing instanceof Integer) { return new IntWritable((Integer) thing); } else if (thing instanceof Double) { return new DoubleWritable((Double) thing); } else if (thing instanceof Float) { return new FloatWritable((Float) thing); } else if (thing instanceof Boolean) { return new BooleanWritable((Boolean) thing); } else if (thing instanceof Map) { MapWritable result = new MapWritable(); for (Map.Entry<String, Object> entry : ((Map<String, Object>) thing).entrySet()) { result.put(new Text(entry.getKey().toString()), toWritable(entry.getValue())); } return result; } else if (thing instanceof List) { if (((List) thing).size() > 0) { Object first = ((List) thing).get(0); Writable[] listOfThings = new Writable[((List) thing).size()]; for (int i = 0; i < listOfThings.length; i++) { listOfThings[i] = toWritable(((List) thing).get(i)); } return new ArrayWritable(toWritable(first).getClass(), listOfThings); } } return NullWritable.get(); }
From source file:smile.wide.AttributeValueHistogramMapper.java
License:Apache License
@SuppressWarnings("unchecked") @Override//from w ww . ja v a 2 s. c o m public void map(LongWritable offsetkey, Text value, Context context) { if (initializing_) { conf_ = context.getConfiguration(); fileReaderClass_ = conf_.get("xdata.bayesnets.datasetreader.class"); fileReaderFilter_ = conf_.get("xdata.bayesnets.datasetreader.filter"); columnNames_ = conf_.get("xdata.bayesnets.datasetreader.variablenames").split(","); assertEquals(columnNames_.length, fileReaderFilter_.split(",").length); try { Object r = Class.forName(fileReaderClass_).newInstance(); reader_ = (DataSetReader<Integer, String>) r; } catch (InstantiationException e) { s_logger.error("Instantiation exception for DataSetReader " + fileReaderClass_); e.printStackTrace(); System.exit(1); } catch (IllegalAccessException e) { s_logger.error("IllegalAccess exception for DataSetReader " + fileReaderClass_); e.printStackTrace(); System.exit(1); } catch (ClassNotFoundException e) { s_logger.error("ClassDefNotFoundException for DataSetReader " + fileReaderClass_); e.printStackTrace(); System.exit(1); } catch (ClassCastException e) { s_logger.error("ClassCastException for DataSetReader " + fileReaderClass_); e.printStackTrace(); System.exit(1); } reader_.setFilter(fileReaderFilter_); reader_.setInstanceIDColumn(1); // doesn't matter, won't use initializing_ = false; } // we're initialized Instance<Integer, String> inst = reader_.parseLine(value.toString()); String[] vals = inst.getValue(); try { for (int i = 0; i < vals.length; ++i) { MapWritable mw = new MapWritable(); mw.put(new Text(vals[i]), new IntWritable(1)); context.write(new Text(columnNames_[i]), mw); } } catch (IOException e) { s_logger.error("I/O exception writing the map output"); e.printStackTrace(); } catch (InterruptedException e) { s_logger.error("Interrupted writing the map output"); e.printStackTrace(); } catch (NullPointerException e) { s_logger.error("Null pointer, probably unexpected data"); s_logger.error("Instance ID = " + inst.getID()); for (int i = 0; i < inst.getValue().length; ++i) { s_logger.error("Attribute_" + i + " = " + inst.getValue()[i]); } ; } }
From source file:smile.wide.AttributeValueHistogramReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<MapWritable> values, Context context) throws IOException, InterruptedException { // Let's have a map and internally collect them int maps = 0; int vals = 0; HashMap<Text, Integer> myMap = new HashMap<Text, Integer>(); for (MapWritable m : values) { maps++;//w ww. j ava 2 s. co m for (Writable valName : m.keySet()) { Text val = (Text) valName; Integer count = ((IntWritable) (m.get(valName))).get(); if (myMap.containsKey(val)) { myMap.put(val, myMap.get(val) + count); } else { myMap.put(val, count); vals++; } } } s_logger.debug("Reducer/combiner got " + maps + " maps, with a total of " + vals + " distinct values for attribute `" + key + "`"); // now output // key is key // value is myMap as MapWritable<Text, IntWritable> MapWritable output = new MapWritable(); for (Text t : myMap.keySet()) { s_logger.debug("Outputting count " + myMap.get(t) + " for attribute " + t); output.put(t, new IntWritable(myMap.get(t))); } context.write(key, output); }
From source file:uk.ac.cam.eng.extraction.hadoop.util.ExtractorDataLoader.java
License:Apache License
/** * Loads word aligned parallel text to HDFS. * /* w w w . j a v a 2s. c o m*/ * @param sourceTextFile The source text file, gzipped, with one sentence * per line, same number of lines as targetTextFile. * @param targetTextFile The target text file, gzipped, with one sentence * per line, same number of lines as sourceTextFile. * @param wordAlignmentFile The word alignment file, gzipped, one alignment * per line in Berkeley format ("0-0<SPACE>1-2, etc.", zero-based source * index on the left), same number of lines as sourceTextFile. * @param provenanceFile The provenance file, gzipped, one set of * provenances per line with format "prov1<SPACE>prov2, etc.", same number * of lines as sourceTextFile. * @param hdfsName * @throws IOException */ public void loadTrainingData2Hdfs(String sourceTextFile, String targetTextFile, String wordAlignmentFile, String provenanceFile, String hdfsName) throws FileNotFoundException, IOException { try (BufferedReader src = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(sourceTextFile)))); BufferedReader trg = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(targetTextFile)))); BufferedReader align = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(wordAlignmentFile)))); BufferedReader prov = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(provenanceFile))))) { String srcLine = null, trgLine = null, alignLine = null, provLine = null; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(hdfsName); try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, MapWritable.class, TextArrayWritable.class)) { Text sourceSentenceText = new Text(); Text targetSentenceText = new Text(); Text alignmentText = new Text(); Text[] array = new Text[3]; array[0] = sourceSentenceText; array[1] = targetSentenceText; array[2] = alignmentText; TextArrayWritable arrayWritable = new TextArrayWritable(); // metadata: provenance, e.g. genre, collection, training // instance // id, doc id, etc. MapWritable metadata = new MapWritable(); while ((srcLine = src.readLine()) != null && (trgLine = trg.readLine()) != null && (alignLine = align.readLine()) != null && (provLine = prov.readLine()) != null) { metadata.clear(); String[] provenances = provLine.split("\\s+"); for (String provenance : provenances) { metadata.put(new Text(provenance), NullWritable.get()); } sourceSentenceText.set(srcLine); targetSentenceText.set(trgLine); // note, alignLine can be the empty string alignmentText.set(alignLine); arrayWritable.set(array); writer.append(metadata, arrayWritable); } } } }