List of usage examples for org.apache.hadoop.io MapWritable clear
@Override public void clear()
From source file:org.apache.giraph.types.ops.MapTypeOps.java
License:Apache License
@Override public void set(MapWritable to, MapWritable from) { to.clear(); to.putAll(from); }
From source file:org.vilcek.hive.kv.KVHiveRecordReader.java
License:Apache License
@Override public boolean next(LongWritable k, MapWritable v) throws IOException { boolean ret = iter.hasNext(); if (ret) {/*from w ww. ja v a2 s. c o m*/ current = iter.next(); k.set(cnt); v.clear(); List<String> majorKeysList = current.getKey().getMajorPath(); List<String> minorKeysList = current.getKey().getMinorPath(); for (int i = 0; i < majorKeyLabelsArray.length; i++) { try { String key = majorKeyLabelsArray[i]; String value = majorKeysList.get(i); if (!value.equals(SERIALIZED_NULL)) { v.put(new Text(key), new Text(value)); } } catch (ArrayIndexOutOfBoundsException e) { } } byte[] value = current.getValue().getValue(); if (!value.toString().equals(SERIALIZED_NULL)) { if (Format.AVRO == current.getValue().getFormat() && binding != null) { try { JsonRecord object = binding.toObject(current.getValue()); JsonNode jsonNode = object.getJsonNode(); value = jsonNode.toString().getBytes("UTF8"); } catch (Throwable ignored) { } } if (minorKeysList.isEmpty()) { v.put(new Text("value"), new Text(value)); } else { for (int j = 0; j < minorKeysList.size(); j++) { String key = minorKeysList.get(j); v.put(new Text(key), new Text(value)); } } } cnt++; return ret; } else { return false; } }
From source file:uk.ac.cam.eng.extraction.hadoop.util.ExtractorDataLoader.java
License:Apache License
/** * Loads word aligned parallel text to HDFS. * /*from ww w.j a v a2s . c o m*/ * @param sourceTextFile The source text file, gzipped, with one sentence * per line, same number of lines as targetTextFile. * @param targetTextFile The target text file, gzipped, with one sentence * per line, same number of lines as sourceTextFile. * @param wordAlignmentFile The word alignment file, gzipped, one alignment * per line in Berkeley format ("0-0<SPACE>1-2, etc.", zero-based source * index on the left), same number of lines as sourceTextFile. * @param provenanceFile The provenance file, gzipped, one set of * provenances per line with format "prov1<SPACE>prov2, etc.", same number * of lines as sourceTextFile. * @param hdfsName * @throws IOException */ public void loadTrainingData2Hdfs(String sourceTextFile, String targetTextFile, String wordAlignmentFile, String provenanceFile, String hdfsName) throws FileNotFoundException, IOException { try (BufferedReader src = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(sourceTextFile)))); BufferedReader trg = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(targetTextFile)))); BufferedReader align = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(wordAlignmentFile)))); BufferedReader prov = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(provenanceFile))))) { String srcLine = null, trgLine = null, alignLine = null, provLine = null; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(hdfsName); try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, MapWritable.class, TextArrayWritable.class)) { Text sourceSentenceText = new Text(); Text targetSentenceText = new Text(); Text alignmentText = new Text(); Text[] array = new Text[3]; array[0] = sourceSentenceText; array[1] = targetSentenceText; array[2] = alignmentText; TextArrayWritable arrayWritable = new TextArrayWritable(); // metadata: provenance, e.g. genre, collection, training // instance // id, doc id, etc. MapWritable metadata = new MapWritable(); while ((srcLine = src.readLine()) != null && (trgLine = trg.readLine()) != null && (alignLine = align.readLine()) != null && (provLine = prov.readLine()) != null) { metadata.clear(); String[] provenances = provLine.split("\\s+"); for (String provenance : provenances) { metadata.put(new Text(provenance), NullWritable.get()); } sourceSentenceText.set(srcLine); targetSentenceText.set(trgLine); // note, alignLine can be the empty string alignmentText.set(alignLine); arrayWritable.set(array); writer.append(metadata, arrayWritable); } } } }