Example usage for org.apache.hadoop.io MapWritable get

List of usage examples for org.apache.hadoop.io MapWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapWritable get.

Prototype

@Override
    public Writable get(Object key) 

Source Link

Usage

From source file:org.elasticsearch.hadoop.integration.crunch.writable.CrunchReadWriteTest.java

License:Apache License

private void testReadFromES() {

    MRPipeline pipeline = new MRPipeline(CrunchReadWriteTest.class);

    Iterable<Artist> artists = pipeline
            .read(new ESSource.Builder<MapWritable>("radio/artists/_search?q=me*", MapWritable.class)
                    .setPort(9500).build())
            .parallelDo(new MapFn<MapWritable, Artist>() {
                @Override/*from   ww w  . ja  va 2 s.c  om*/
                public Artist map(MapWritable input) {
                    String name = input.get(new Text("name")).toString();
                    String url = input.get(new Text("url")).toString();
                    String picture = input.get(new Text("picture")).toString();
                    return new Artist(name, url, picture);
                }
            }, records(Artist.class)).materialize();

    assertEquals(15, Lists.newArrayList(artists).size());
}

From source file:org.freeeed.mr.FreeEedReducer.java

License:Apache License

protected void processMap(MapWritable value) throws IOException, InterruptedException {
    columnMetadata.reinit();/*  ww  w  .  ja va2 s  .  c o m*/
    ++outputFileCount;
    DocumentMetadata allMetadata = getAllMetadata(value);
    Metadata standardMetadata = getStandardMetadata(allMetadata, outputFileCount);
    columnMetadata.addMetadata(standardMetadata);
    columnMetadata.addMetadata(allMetadata);
    // documents other than the first one in this loop are either duplicates or attachments
    if (first) {
        masterOutputFileCount = outputFileCount;
    } else {
        if (allMetadata.hasParent()) {
            columnMetadata.addMetadataValue(DocumentMetadataKeys.ATTACHMENT_PARENT,
                    UPIFormat.format(masterOutputFileCount));
        } else {
            columnMetadata.addMetadataValue(DocumentMetadataKeys.MASTER_DUPLICATE,
                    UPIFormat.format(masterOutputFileCount));
        }
    }

    //String uniqueId = allMetadata.getUniqueId();

    String originalFileName = new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName();
    // add the text to the text folder
    String documentText = allMetadata.get(DocumentMetadataKeys.DOCUMENT_TEXT);
    String textEntryName = ParameterProcessing.TEXT + "/" + UPIFormat.format(outputFileCount) + "_"
            + originalFileName + ".txt";
    if (textEntryName != null) {
        zipFileWriter.addTextFile(textEntryName, documentText);
    }
    columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_TEXT, textEntryName);
    // add the native file to the native folder
    String nativeEntryName = ParameterProcessing.NATIVE + "/" + UPIFormat.format(outputFileCount) + "_"
            + originalFileName;
    BytesWritable bytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE));
    if (bytesWritable != null) { // some large exception files are not passed
        zipFileWriter.addBinaryFile(nativeEntryName, bytesWritable.getBytes(), bytesWritable.getLength());
        logger.trace("Processing file: {}", nativeEntryName);
    }
    columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_NATIVE, nativeEntryName);
    // add the pdf made from native to the PDF folder
    String pdfNativeEntryName = ParameterProcessing.PDF_FOLDER + "/" + UPIFormat.format(outputFileCount) + "_"
            + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".pdf";
    BytesWritable pdfBytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_PDF));
    if (pdfBytesWritable != null) {
        zipFileWriter.addBinaryFile(pdfNativeEntryName, pdfBytesWritable.getBytes(),
                pdfBytesWritable.getLength());
        logger.trace("Processing file: {}", pdfNativeEntryName);
    }

    processHtmlContent(value, allMetadata, UPIFormat.format(outputFileCount));

    // add exception to the exception folder
    String exception = allMetadata.get(DocumentMetadataKeys.PROCESSING_EXCEPTION);
    if (exception != null) {
        String exceptionEntryName = "exception/" + UPIFormat.format(outputFileCount) + "_"
                + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName();
        if (bytesWritable != null) {
            zipFileWriter.addBinaryFile(exceptionEntryName, bytesWritable.getBytes(),
                    bytesWritable.getLength());
        }
        columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_EXCEPTION, exceptionEntryName);
    }
    // write this all to the reduce map
    //context.write(new Text(outputKey), new Text(columnMetadata.delimiterSeparatedValues()));
    // drop the key altogether, because it messes up the format - but put it in the value
    // TODO use NullWritable
    if (OsUtil.isNix()) {
        context.write(null, new Text(columnMetadata.delimiterSeparatedValues()));
    }
    // prepare for the next file with the same key, if there is any
    first = false;
}

From source file:org.freeeed.mr.FreeEedReducer.java

License:Apache License

private void processHtmlContent(MapWritable value, Metadata allMetadata, String uniqueId) throws IOException {
    BytesWritable htmlBytesWritable = (BytesWritable) value
            .get(new Text(ParameterProcessing.NATIVE_AS_HTML_NAME));
    if (htmlBytesWritable != null) {
        String htmlNativeEntryName = ParameterProcessing.HTML_FOLDER + "/" + uniqueId + "_"
                + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".html";
        zipFileWriter.addBinaryFile(htmlNativeEntryName, htmlBytesWritable.getBytes(),
                htmlBytesWritable.getLength());
        logger.trace("Processing file: {}", htmlNativeEntryName);
    }/*from  www . j  a va  2  s  . c o  m*/

    // get the list with other files part of the html output
    Text htmlFiles = (Text) value.get(new Text(ParameterProcessing.NATIVE_AS_HTML));
    if (htmlFiles != null) {
        String fileNames = htmlFiles.toString();
        String[] fileNamesArr = fileNames.split(",");
        for (String fileName : fileNamesArr) {
            String entry = ParameterProcessing.HTML_FOLDER + "/" + fileName;

            BytesWritable imageBytesWritable = (BytesWritable) value
                    .get(new Text(ParameterProcessing.NATIVE_AS_HTML + "_" + fileName));
            if (imageBytesWritable != null) {
                zipFileWriter.addBinaryFile(entry, imageBytesWritable.getBytes(),
                        imageBytesWritable.getLength());
                logger.trace("Processing file: {}", entry);
            }
        }
    }
}

From source file:org.freeeed.mr.FreeEedReducer.java

License:Apache License

private DocumentMetadata getAllMetadata(MapWritable map) {
    DocumentMetadata metadata = new DocumentMetadata();
    Set<Writable> set = map.keySet();
    Iterator<Writable> iter = set.iterator();
    while (iter.hasNext()) {
        String name = iter.next().toString();
        if (!ParameterProcessing.NATIVE.equals(name) && !ParameterProcessing.NATIVE_AS_PDF.equals(name)
                && !name.startsWith(ParameterProcessing.NATIVE_AS_HTML)) { // all metadata but native - which is bytes!
            Text value = (Text) map.get(new Text(name));
            metadata.set(name, value.toString());
        }/*  w w  w  .j  ava 2  s.c om*/
    }
    return metadata;
}

From source file:org.freeeed.mr.MetadataWriter.java

License:Apache License

public void processMap(MapWritable value) throws IOException {
    columnMetadata.reinit();// w w  w.  ja v  a2s .  c o m

    DocumentMetadata allMetadata = getAllMetadata(value);

    Metadata standardMetadata = getStandardMetadata(allMetadata);
    columnMetadata.addMetadata(standardMetadata);
    columnMetadata.addMetadata(allMetadata);

    // TODO deal with attachments
    if (allMetadata.hasParent()) {
        columnMetadata.addMetadataValue(DocumentMetadataKeys.ATTACHMENT_PARENT,
                ParameterProcessing.UPIFormat.format(masterOutputFileCount));
    }

    //String uniqueId = allMetadata.getUniqueId();
    String originalFileName = new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName();
    // add the text to the text folder
    String documentText = allMetadata.get(DocumentMetadataKeys.DOCUMENT_TEXT);
    String textEntryName = ParameterProcessing.TEXT + "/" + allMetadata.getUniqueId() + "_" + originalFileName
            + ".txt";
    if (textEntryName != null) {
        zipFileWriter.addTextFile(textEntryName, documentText);
    }
    columnMetadata.addMetadataValue(DocumentMetadata.TEXT_LINK(), textEntryName);
    // add the native file to the native folder
    String nativeEntryName = ParameterProcessing.NATIVE + "/" + allMetadata.getUniqueId() + "_"
            + originalFileName;
    BytesWritable bytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE));
    if (bytesWritable != null) { // some large exception files are not passed
        zipFileWriter.addBinaryFile(nativeEntryName, bytesWritable.getBytes(), bytesWritable.getLength());
        LOGGER.trace("Processing file: {}", nativeEntryName);
    }
    columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_NATIVE, nativeEntryName);
    // add the pdf made from native to the PDF folder
    String pdfNativeEntryName = ParameterProcessing.PDF_FOLDER + "/" + allMetadata.getUniqueId() + "_"
            + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".pdf";
    BytesWritable pdfBytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_PDF));
    if (pdfBytesWritable != null) {
        zipFileWriter.addBinaryFile(pdfNativeEntryName, pdfBytesWritable.getBytes(),
                pdfBytesWritable.getLength());
        LOGGER.trace("Processing file: {}", pdfNativeEntryName);
    }

    processHtmlContent(value, allMetadata, allMetadata.getUniqueId());

    // add exception to the exception folder
    String exception = allMetadata.get(DocumentMetadataKeys.PROCESSING_EXCEPTION);
    if (exception != null) {
        String exceptionEntryName = "exception/" + allMetadata.getUniqueId() + "_"
                + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName();
        if (bytesWritable != null) {
            zipFileWriter.addBinaryFile(exceptionEntryName, bytesWritable.getBytes(),
                    bytesWritable.getLength());
        }
        columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_EXCEPTION, exceptionEntryName);
    }
    appendMetadata(columnMetadata.delimiterSeparatedValues());
    // prepare for the next file with the same key, if there is any
    first = false;
}

From source file:org.freeeed.mr.MetadataWriter.java

License:Apache License

private void processHtmlContent(MapWritable value, Metadata allMetadata, String uniqueId) throws IOException {
    BytesWritable htmlBytesWritable = (BytesWritable) value
            .get(new Text(ParameterProcessing.NATIVE_AS_HTML_NAME));
    if (htmlBytesWritable != null) {
        String htmlNativeEntryName = ParameterProcessing.HTML_FOLDER + "/" + uniqueId + "_"
                + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".html";
        zipFileWriter.addBinaryFile(htmlNativeEntryName, htmlBytesWritable.getBytes(),
                htmlBytesWritable.getLength());
        LOGGER.trace("Processing file: {}", htmlNativeEntryName);

        // get the list with other files part of the html output
        Text htmlFiles = (Text) value.get(new Text(ParameterProcessing.NATIVE_AS_HTML));
        if (htmlFiles != null) {
            String fileNames = htmlFiles.toString();
            String[] fileNamesArr = fileNames.split(",");
            for (String fileName : fileNamesArr) {
                String entry = ParameterProcessing.HTML_FOLDER + "/" + fileName;

                BytesWritable imageBytesWritable = (BytesWritable) value
                        .get(new Text(ParameterProcessing.NATIVE_AS_HTML + "/" + fileName));
                if (imageBytesWritable != null) {
                    zipFileWriter.addBinaryFile(entry, imageBytesWritable.getBytes(),
                            imageBytesWritable.getLength());
                    LOGGER.trace("Processing file: {}", entry);
                }/*w ww  .  j av a  2  s. c  o  m*/
            }
        }
    }
}

From source file:org.huahinframework.core.util.ObjectUtilTest.java

License:Apache License

@Test
public void testPrimitive2HadoopIOMap() {
    Map<String, Integer> o = new HashMap<String, Integer>();
    MapWritable m = new MapWritable();

    o.put("0", 0);
    m.put(new Text("0"), new IntWritable(0));

    o.put("1", 1);
    m.put(new Text("1"), new IntWritable(1));

    HadoopObject ho = ObjectUtil.primitive2Hadoop(o);
    assertEquals(ObjectUtil.MAP, ho.getType());
    assertEquals(MapWritable.class, ho.getObject().getClass());

    MapWritable mw = (MapWritable) ho.getObject();
    if (mw.size() != m.size()) {
        fail("map not equals size: " + mw.size() + " != " + m.size());
    }/*w  ww.  j av  a 2 s. c om*/

    for (Entry<Writable, Writable> entry : m.entrySet()) {
        if (mw.get(entry.getKey()) == null) {
            fail("map key not found");
        }

        assertEquals(mw.get(entry.getKey()), entry.getValue());
    }
}

From source file:org.rad.qa.reduce.QuoteAnalyzerReducer.java

License:Open Source License

@Override
public void reduce(Text key, Iterable<MapWritable> values, Context context)
        throws IOException, InterruptedException {
    Iterator<MapWritable> writables = values.iterator();
    MapWritable mw;
    double open = 0.0, close = 0.0, low = 0.0, high = 0.0, volume = 0.0;
    int size = 0;

    while (writables.hasNext()) {
        size++;//ww w  .java2  s . co m
        mw = writables.next();
        open += ((DoubleWritable) mw.get(QuoteAnalyzerConstants.OPEN)).get();
        high += ((DoubleWritable) mw.get(QuoteAnalyzerConstants.HIGH)).get();
        low += ((DoubleWritable) mw.get(QuoteAnalyzerConstants.LOW)).get();
        close += ((DoubleWritable) mw.get(QuoteAnalyzerConstants.CLOSE)).get();
        volume += ((DoubleWritable) mw.get(QuoteAnalyzerConstants.VOLUME)).get();
    }

    MapWritable result = new QuoteAnalyzerOutput(key.toString());
    result.put(QuoteAnalyzerConstants.OPEN, new DoubleWritable(open / size));
    result.put(QuoteAnalyzerConstants.HIGH, new DoubleWritable(high / size));
    result.put(QuoteAnalyzerConstants.LOW, new DoubleWritable(low / size));
    result.put(QuoteAnalyzerConstants.CLOSE, new DoubleWritable(close / size));
    result.put(QuoteAnalyzerConstants.VOLUME, new DoubleWritable(volume / size));

    context.write(key, result);
}

From source file:org.vilcek.hive.kv.KVHiveSerDe.java

License:Apache License

@Override
public Object deserialize(Writable wrtbl) throws SerDeException {
    MapWritable input = (MapWritable) wrtbl;
    Text t = new Text();
    row.clear();/*ww  w  .  j  ava  2s. c  om*/
    for (int i = 0; i < fieldCount; i++) {
        t.set(majorMinorKeys.get(i));
        Writable value = input.get(t);
        if (value != null && !NullWritable.get().equals(value)) {
            row.add(value.toString());
        } else {
            row.add(null);
        }
    }
    return row;
}

From source file:org.vroyer.hive.solr.SolrSerDe.java

License:Open Source License

/**
 * returns a Row as a List<Object> from the provided MapWritable.
 *//*from   www . j a v  a  2s .c  o  m*/
@Override
public Object deserialize(Writable wr) throws SerDeException {
    if (!(wr instanceof MapWritable)) {
        throw new SerDeException("Expected MapWritable, received " + wr.getClass().getName());
    }

    final MapWritable input = (MapWritable) wr;
    final Text key = new Text();
    row.clear();

    for (int i = 0; i < colNames.size(); i++) {
        key.set(colNames.get(i));
        final Writable value = input.get(key);
        if (value != null && !NullWritable.get().equals(value)) {
            //parse as double to avoid NumberFormatException...
            //TODO:need more test,especially for type 'bigint'
            String hiveType = colTypes.get(i).getTypeName();
            log.debug(" value=" + value + " type=" + value.getClass().getName() + " hiveType=" + hiveType);
            if (HIVE_TYPE_INT.equalsIgnoreCase(hiveType)) {
                row.add(Double.valueOf(value.toString()).intValue());
            } else if (SolrSerDe.HIVE_TYPE_SMALLINT.equalsIgnoreCase(hiveType)) {
                row.add(Double.valueOf(value.toString()).shortValue());
            } else if (SolrSerDe.HIVE_TYPE_TINYINT.equalsIgnoreCase(hiveType)) {
                row.add(Double.valueOf(value.toString()).byteValue());
            } else if (SolrSerDe.HIVE_TYPE_BIGINT.equalsIgnoreCase(hiveType)) {
                row.add(Long.valueOf(value.toString()));
            } else if (SolrSerDe.HIVE_TYPE_BOOLEAN.equalsIgnoreCase(hiveType)) {
                row.add(Boolean.valueOf(value.toString()));
            } else if (SolrSerDe.HIVE_TYPE_FLOAT.equalsIgnoreCase(hiveType)) {
                row.add(Double.valueOf(value.toString()).floatValue());
            } else if (SolrSerDe.HIVE_TYPE_DOUBLE.equalsIgnoreCase(hiveType)) {
                row.add(Double.valueOf(value.toString()));
            } else if (SolrSerDe.HIVE_TYPE_TIMESTAMP.equalsIgnoreCase(hiveType)) {
                row.add(((org.apache.hadoop.hive.serde2.io.TimestampWritable) value).getTimestamp());
            } else {
                row.add(value.toString());
            }
        } else {
            row.add(null);
        }
    }
    return row;
}