List of usage examples for org.apache.hadoop.io MapWritable get
@Override
public Writable get(Object key)
From source file:org.elasticsearch.hadoop.integration.crunch.writable.CrunchReadWriteTest.java
License:Apache License
private void testReadFromES() { MRPipeline pipeline = new MRPipeline(CrunchReadWriteTest.class); Iterable<Artist> artists = pipeline .read(new ESSource.Builder<MapWritable>("radio/artists/_search?q=me*", MapWritable.class) .setPort(9500).build()) .parallelDo(new MapFn<MapWritable, Artist>() { @Override/*from ww w . ja va 2 s.c om*/ public Artist map(MapWritable input) { String name = input.get(new Text("name")).toString(); String url = input.get(new Text("url")).toString(); String picture = input.get(new Text("picture")).toString(); return new Artist(name, url, picture); } }, records(Artist.class)).materialize(); assertEquals(15, Lists.newArrayList(artists).size()); }
From source file:org.freeeed.mr.FreeEedReducer.java
License:Apache License
protected void processMap(MapWritable value) throws IOException, InterruptedException { columnMetadata.reinit();/* ww w . ja va2 s . c o m*/ ++outputFileCount; DocumentMetadata allMetadata = getAllMetadata(value); Metadata standardMetadata = getStandardMetadata(allMetadata, outputFileCount); columnMetadata.addMetadata(standardMetadata); columnMetadata.addMetadata(allMetadata); // documents other than the first one in this loop are either duplicates or attachments if (first) { masterOutputFileCount = outputFileCount; } else { if (allMetadata.hasParent()) { columnMetadata.addMetadataValue(DocumentMetadataKeys.ATTACHMENT_PARENT, UPIFormat.format(masterOutputFileCount)); } else { columnMetadata.addMetadataValue(DocumentMetadataKeys.MASTER_DUPLICATE, UPIFormat.format(masterOutputFileCount)); } } //String uniqueId = allMetadata.getUniqueId(); String originalFileName = new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName(); // add the text to the text folder String documentText = allMetadata.get(DocumentMetadataKeys.DOCUMENT_TEXT); String textEntryName = ParameterProcessing.TEXT + "/" + UPIFormat.format(outputFileCount) + "_" + originalFileName + ".txt"; if (textEntryName != null) { zipFileWriter.addTextFile(textEntryName, documentText); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_TEXT, textEntryName); // add the native file to the native folder String nativeEntryName = ParameterProcessing.NATIVE + "/" + UPIFormat.format(outputFileCount) + "_" + originalFileName; BytesWritable bytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE)); if (bytesWritable != null) { // some large exception files are not passed zipFileWriter.addBinaryFile(nativeEntryName, bytesWritable.getBytes(), bytesWritable.getLength()); logger.trace("Processing file: {}", nativeEntryName); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_NATIVE, nativeEntryName); // add the pdf made from native to the PDF folder String pdfNativeEntryName = ParameterProcessing.PDF_FOLDER + "/" + UPIFormat.format(outputFileCount) + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".pdf"; BytesWritable pdfBytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_PDF)); if (pdfBytesWritable != null) { zipFileWriter.addBinaryFile(pdfNativeEntryName, pdfBytesWritable.getBytes(), pdfBytesWritable.getLength()); logger.trace("Processing file: {}", pdfNativeEntryName); } processHtmlContent(value, allMetadata, UPIFormat.format(outputFileCount)); // add exception to the exception folder String exception = allMetadata.get(DocumentMetadataKeys.PROCESSING_EXCEPTION); if (exception != null) { String exceptionEntryName = "exception/" + UPIFormat.format(outputFileCount) + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName(); if (bytesWritable != null) { zipFileWriter.addBinaryFile(exceptionEntryName, bytesWritable.getBytes(), bytesWritable.getLength()); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_EXCEPTION, exceptionEntryName); } // write this all to the reduce map //context.write(new Text(outputKey), new Text(columnMetadata.delimiterSeparatedValues())); // drop the key altogether, because it messes up the format - but put it in the value // TODO use NullWritable if (OsUtil.isNix()) { context.write(null, new Text(columnMetadata.delimiterSeparatedValues())); } // prepare for the next file with the same key, if there is any first = false; }
From source file:org.freeeed.mr.FreeEedReducer.java
License:Apache License
private void processHtmlContent(MapWritable value, Metadata allMetadata, String uniqueId) throws IOException { BytesWritable htmlBytesWritable = (BytesWritable) value .get(new Text(ParameterProcessing.NATIVE_AS_HTML_NAME)); if (htmlBytesWritable != null) { String htmlNativeEntryName = ParameterProcessing.HTML_FOLDER + "/" + uniqueId + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".html"; zipFileWriter.addBinaryFile(htmlNativeEntryName, htmlBytesWritable.getBytes(), htmlBytesWritable.getLength()); logger.trace("Processing file: {}", htmlNativeEntryName); }/*from www . j a va 2 s . c o m*/ // get the list with other files part of the html output Text htmlFiles = (Text) value.get(new Text(ParameterProcessing.NATIVE_AS_HTML)); if (htmlFiles != null) { String fileNames = htmlFiles.toString(); String[] fileNamesArr = fileNames.split(","); for (String fileName : fileNamesArr) { String entry = ParameterProcessing.HTML_FOLDER + "/" + fileName; BytesWritable imageBytesWritable = (BytesWritable) value .get(new Text(ParameterProcessing.NATIVE_AS_HTML + "_" + fileName)); if (imageBytesWritable != null) { zipFileWriter.addBinaryFile(entry, imageBytesWritable.getBytes(), imageBytesWritable.getLength()); logger.trace("Processing file: {}", entry); } } } }
From source file:org.freeeed.mr.FreeEedReducer.java
License:Apache License
private DocumentMetadata getAllMetadata(MapWritable map) { DocumentMetadata metadata = new DocumentMetadata(); Set<Writable> set = map.keySet(); Iterator<Writable> iter = set.iterator(); while (iter.hasNext()) { String name = iter.next().toString(); if (!ParameterProcessing.NATIVE.equals(name) && !ParameterProcessing.NATIVE_AS_PDF.equals(name) && !name.startsWith(ParameterProcessing.NATIVE_AS_HTML)) { // all metadata but native - which is bytes! Text value = (Text) map.get(new Text(name)); metadata.set(name, value.toString()); }/* w w w .j ava 2 s.c om*/ } return metadata; }
From source file:org.freeeed.mr.MetadataWriter.java
License:Apache License
public void processMap(MapWritable value) throws IOException { columnMetadata.reinit();// w w w. ja v a2s . c o m DocumentMetadata allMetadata = getAllMetadata(value); Metadata standardMetadata = getStandardMetadata(allMetadata); columnMetadata.addMetadata(standardMetadata); columnMetadata.addMetadata(allMetadata); // TODO deal with attachments if (allMetadata.hasParent()) { columnMetadata.addMetadataValue(DocumentMetadataKeys.ATTACHMENT_PARENT, ParameterProcessing.UPIFormat.format(masterOutputFileCount)); } //String uniqueId = allMetadata.getUniqueId(); String originalFileName = new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName(); // add the text to the text folder String documentText = allMetadata.get(DocumentMetadataKeys.DOCUMENT_TEXT); String textEntryName = ParameterProcessing.TEXT + "/" + allMetadata.getUniqueId() + "_" + originalFileName + ".txt"; if (textEntryName != null) { zipFileWriter.addTextFile(textEntryName, documentText); } columnMetadata.addMetadataValue(DocumentMetadata.TEXT_LINK(), textEntryName); // add the native file to the native folder String nativeEntryName = ParameterProcessing.NATIVE + "/" + allMetadata.getUniqueId() + "_" + originalFileName; BytesWritable bytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE)); if (bytesWritable != null) { // some large exception files are not passed zipFileWriter.addBinaryFile(nativeEntryName, bytesWritable.getBytes(), bytesWritable.getLength()); LOGGER.trace("Processing file: {}", nativeEntryName); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_NATIVE, nativeEntryName); // add the pdf made from native to the PDF folder String pdfNativeEntryName = ParameterProcessing.PDF_FOLDER + "/" + allMetadata.getUniqueId() + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".pdf"; BytesWritable pdfBytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_PDF)); if (pdfBytesWritable != null) { zipFileWriter.addBinaryFile(pdfNativeEntryName, pdfBytesWritable.getBytes(), pdfBytesWritable.getLength()); LOGGER.trace("Processing file: {}", pdfNativeEntryName); } processHtmlContent(value, allMetadata, allMetadata.getUniqueId()); // add exception to the exception folder String exception = allMetadata.get(DocumentMetadataKeys.PROCESSING_EXCEPTION); if (exception != null) { String exceptionEntryName = "exception/" + allMetadata.getUniqueId() + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName(); if (bytesWritable != null) { zipFileWriter.addBinaryFile(exceptionEntryName, bytesWritable.getBytes(), bytesWritable.getLength()); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_EXCEPTION, exceptionEntryName); } appendMetadata(columnMetadata.delimiterSeparatedValues()); // prepare for the next file with the same key, if there is any first = false; }
From source file:org.freeeed.mr.MetadataWriter.java
License:Apache License
private void processHtmlContent(MapWritable value, Metadata allMetadata, String uniqueId) throws IOException { BytesWritable htmlBytesWritable = (BytesWritable) value .get(new Text(ParameterProcessing.NATIVE_AS_HTML_NAME)); if (htmlBytesWritable != null) { String htmlNativeEntryName = ParameterProcessing.HTML_FOLDER + "/" + uniqueId + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".html"; zipFileWriter.addBinaryFile(htmlNativeEntryName, htmlBytesWritable.getBytes(), htmlBytesWritable.getLength()); LOGGER.trace("Processing file: {}", htmlNativeEntryName); // get the list with other files part of the html output Text htmlFiles = (Text) value.get(new Text(ParameterProcessing.NATIVE_AS_HTML)); if (htmlFiles != null) { String fileNames = htmlFiles.toString(); String[] fileNamesArr = fileNames.split(","); for (String fileName : fileNamesArr) { String entry = ParameterProcessing.HTML_FOLDER + "/" + fileName; BytesWritable imageBytesWritable = (BytesWritable) value .get(new Text(ParameterProcessing.NATIVE_AS_HTML + "/" + fileName)); if (imageBytesWritable != null) { zipFileWriter.addBinaryFile(entry, imageBytesWritable.getBytes(), imageBytesWritable.getLength()); LOGGER.trace("Processing file: {}", entry); }/*w ww . j av a 2 s. c o m*/ } } } }
From source file:org.huahinframework.core.util.ObjectUtilTest.java
License:Apache License
@Test public void testPrimitive2HadoopIOMap() { Map<String, Integer> o = new HashMap<String, Integer>(); MapWritable m = new MapWritable(); o.put("0", 0); m.put(new Text("0"), new IntWritable(0)); o.put("1", 1); m.put(new Text("1"), new IntWritable(1)); HadoopObject ho = ObjectUtil.primitive2Hadoop(o); assertEquals(ObjectUtil.MAP, ho.getType()); assertEquals(MapWritable.class, ho.getObject().getClass()); MapWritable mw = (MapWritable) ho.getObject(); if (mw.size() != m.size()) { fail("map not equals size: " + mw.size() + " != " + m.size()); }/*w ww. j av a 2 s. c om*/ for (Entry<Writable, Writable> entry : m.entrySet()) { if (mw.get(entry.getKey()) == null) { fail("map key not found"); } assertEquals(mw.get(entry.getKey()), entry.getValue()); } }
From source file:org.rad.qa.reduce.QuoteAnalyzerReducer.java
License:Open Source License
@Override public void reduce(Text key, Iterable<MapWritable> values, Context context) throws IOException, InterruptedException { Iterator<MapWritable> writables = values.iterator(); MapWritable mw; double open = 0.0, close = 0.0, low = 0.0, high = 0.0, volume = 0.0; int size = 0; while (writables.hasNext()) { size++;//ww w .java2 s . co m mw = writables.next(); open += ((DoubleWritable) mw.get(QuoteAnalyzerConstants.OPEN)).get(); high += ((DoubleWritable) mw.get(QuoteAnalyzerConstants.HIGH)).get(); low += ((DoubleWritable) mw.get(QuoteAnalyzerConstants.LOW)).get(); close += ((DoubleWritable) mw.get(QuoteAnalyzerConstants.CLOSE)).get(); volume += ((DoubleWritable) mw.get(QuoteAnalyzerConstants.VOLUME)).get(); } MapWritable result = new QuoteAnalyzerOutput(key.toString()); result.put(QuoteAnalyzerConstants.OPEN, new DoubleWritable(open / size)); result.put(QuoteAnalyzerConstants.HIGH, new DoubleWritable(high / size)); result.put(QuoteAnalyzerConstants.LOW, new DoubleWritable(low / size)); result.put(QuoteAnalyzerConstants.CLOSE, new DoubleWritable(close / size)); result.put(QuoteAnalyzerConstants.VOLUME, new DoubleWritable(volume / size)); context.write(key, result); }
From source file:org.vilcek.hive.kv.KVHiveSerDe.java
License:Apache License
@Override public Object deserialize(Writable wrtbl) throws SerDeException { MapWritable input = (MapWritable) wrtbl; Text t = new Text(); row.clear();/*ww w . j ava 2s. c om*/ for (int i = 0; i < fieldCount; i++) { t.set(majorMinorKeys.get(i)); Writable value = input.get(t); if (value != null && !NullWritable.get().equals(value)) { row.add(value.toString()); } else { row.add(null); } } return row; }
From source file:org.vroyer.hive.solr.SolrSerDe.java
License:Open Source License
/** * returns a Row as a List<Object> from the provided MapWritable. *//*from www . j a v a 2s .c o m*/ @Override public Object deserialize(Writable wr) throws SerDeException { if (!(wr instanceof MapWritable)) { throw new SerDeException("Expected MapWritable, received " + wr.getClass().getName()); } final MapWritable input = (MapWritable) wr; final Text key = new Text(); row.clear(); for (int i = 0; i < colNames.size(); i++) { key.set(colNames.get(i)); final Writable value = input.get(key); if (value != null && !NullWritable.get().equals(value)) { //parse as double to avoid NumberFormatException... //TODO:need more test,especially for type 'bigint' String hiveType = colTypes.get(i).getTypeName(); log.debug(" value=" + value + " type=" + value.getClass().getName() + " hiveType=" + hiveType); if (HIVE_TYPE_INT.equalsIgnoreCase(hiveType)) { row.add(Double.valueOf(value.toString()).intValue()); } else if (SolrSerDe.HIVE_TYPE_SMALLINT.equalsIgnoreCase(hiveType)) { row.add(Double.valueOf(value.toString()).shortValue()); } else if (SolrSerDe.HIVE_TYPE_TINYINT.equalsIgnoreCase(hiveType)) { row.add(Double.valueOf(value.toString()).byteValue()); } else if (SolrSerDe.HIVE_TYPE_BIGINT.equalsIgnoreCase(hiveType)) { row.add(Long.valueOf(value.toString())); } else if (SolrSerDe.HIVE_TYPE_BOOLEAN.equalsIgnoreCase(hiveType)) { row.add(Boolean.valueOf(value.toString())); } else if (SolrSerDe.HIVE_TYPE_FLOAT.equalsIgnoreCase(hiveType)) { row.add(Double.valueOf(value.toString()).floatValue()); } else if (SolrSerDe.HIVE_TYPE_DOUBLE.equalsIgnoreCase(hiveType)) { row.add(Double.valueOf(value.toString())); } else if (SolrSerDe.HIVE_TYPE_TIMESTAMP.equalsIgnoreCase(hiveType)) { row.add(((org.apache.hadoop.hive.serde2.io.TimestampWritable) value).getTimestamp()); } else { row.add(value.toString()); } } else { row.add(null); } } return row; }