Example usage for org.apache.hadoop.io MapWritable clear

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapWritable clear.

Prototype

@Override
    public void clear()

Source Link

Usage

From source file:org.apache.giraph.types.ops.MapTypeOps.java

License:Apache License

@Override
public void set(MapWritable to, MapWritable from) {
    to.clear();
    to.putAll(from);
}

From source file:org.vilcek.hive.kv.KVHiveRecordReader.java

License:Apache License

@Override
public boolean next(LongWritable k, MapWritable v) throws IOException {
    boolean ret = iter.hasNext();
    if (ret) {/*from   w  ww. ja v  a2 s.  c o  m*/
        current = iter.next();
        k.set(cnt);
        v.clear();
        List<String> majorKeysList = current.getKey().getMajorPath();
        List<String> minorKeysList = current.getKey().getMinorPath();
        for (int i = 0; i < majorKeyLabelsArray.length; i++) {
            try {
                String key = majorKeyLabelsArray[i];
                String value = majorKeysList.get(i);
                if (!value.equals(SERIALIZED_NULL)) {
                    v.put(new Text(key), new Text(value));
                }
            } catch (ArrayIndexOutOfBoundsException e) {
            }
        }
        byte[] value = current.getValue().getValue();
        if (!value.toString().equals(SERIALIZED_NULL)) {
            if (Format.AVRO == current.getValue().getFormat() && binding != null) {
                try {
                    JsonRecord object = binding.toObject(current.getValue());
                    JsonNode jsonNode = object.getJsonNode();
                    value = jsonNode.toString().getBytes("UTF8");
                } catch (Throwable ignored) {
                }
            }
            if (minorKeysList.isEmpty()) {
                v.put(new Text("value"), new Text(value));
            } else {
                for (int j = 0; j < minorKeysList.size(); j++) {
                    String key = minorKeysList.get(j);
                    v.put(new Text(key), new Text(value));
                }
            }
        }
        cnt++;
        return ret;
    } else {
        return false;
    }
}

From source file:uk.ac.cam.eng.extraction.hadoop.util.ExtractorDataLoader.java

License:Apache License

/**
 * Loads word aligned parallel text to HDFS.
 * /*from  ww  w.j  a v a2s . c  o  m*/
 * @param sourceTextFile The source text file, gzipped, with one sentence
 * per line, same number of lines as targetTextFile.
 * @param targetTextFile The target text file, gzipped, with one sentence
 * per line, same number of lines as sourceTextFile.
 * @param wordAlignmentFile The word alignment file, gzipped, one alignment
 * per line in Berkeley format ("0-0<SPACE>1-2, etc.", zero-based source
 * index on the left), same number of lines as sourceTextFile.
 * @param provenanceFile The provenance file, gzipped, one set of
 * provenances per line with format "prov1<SPACE>prov2, etc.", same number
 * of lines as sourceTextFile.
 * @param hdfsName
 * @throws IOException
 */
public void loadTrainingData2Hdfs(String sourceTextFile, String targetTextFile, String wordAlignmentFile,
        String provenanceFile, String hdfsName) throws FileNotFoundException, IOException {

    try (BufferedReader src = new BufferedReader(
            new InputStreamReader(new GZIPInputStream(new FileInputStream(sourceTextFile))));
            BufferedReader trg = new BufferedReader(
                    new InputStreamReader(new GZIPInputStream(new FileInputStream(targetTextFile))));
            BufferedReader align = new BufferedReader(
                    new InputStreamReader(new GZIPInputStream(new FileInputStream(wordAlignmentFile))));
            BufferedReader prov = new BufferedReader(
                    new InputStreamReader(new GZIPInputStream(new FileInputStream(provenanceFile))))) {

        String srcLine = null, trgLine = null, alignLine = null, provLine = null;
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path path = new Path(hdfsName);
        try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, MapWritable.class,
                TextArrayWritable.class)) {
            Text sourceSentenceText = new Text();
            Text targetSentenceText = new Text();
            Text alignmentText = new Text();
            Text[] array = new Text[3];
            array[0] = sourceSentenceText;
            array[1] = targetSentenceText;
            array[2] = alignmentText;
            TextArrayWritable arrayWritable = new TextArrayWritable();
            // metadata: provenance, e.g. genre, collection, training
            // instance
            // id, doc id, etc.
            MapWritable metadata = new MapWritable();

            while ((srcLine = src.readLine()) != null && (trgLine = trg.readLine()) != null
                    && (alignLine = align.readLine()) != null && (provLine = prov.readLine()) != null) {
                metadata.clear();
                String[] provenances = provLine.split("\\s+");
                for (String provenance : provenances) {
                    metadata.put(new Text(provenance), NullWritable.get());
                }
                sourceSentenceText.set(srcLine);
                targetSentenceText.set(trgLine);
                // note, alignLine can be the empty string
                alignmentText.set(alignLine);
                arrayWritable.set(array);
                writer.append(metadata, arrayWritable);
            }
        }
    }
}