List of usage examples for org.apache.hadoop.io MapWritable get
@Override
public Writable get(Object key)
From source file:com.csiro.hadoop.WritableTest.java
public static void main(String[] args) { System.out.println("*** Primitive Writable ***"); BooleanWritable bool1 = new BooleanWritable(true); ByteWritable byte1 = new ByteWritable((byte) 3); System.out.printf("Boolean:%s Byte:%d\n", bool1, byte1.get()); IntWritable int1 = new IntWritable(5); IntWritable int2 = new IntWritable(17); System.out.printf("I1:%d I2:%d\n", int1.get(), int2.get()); int1.set(int2.get()); System.out.printf("I1:%d I2:%d\n", int1.get(), int2.get()); Integer int3 = new Integer(23); int1.set(int3); System.out.printf("I1:%d I2:%d\n", int1.get(), int2.get()); System.out.println("*** Array Writable ***"); ArrayWritable a = new ArrayWritable(IntWritable.class); a.set(new IntWritable[] { new IntWritable(1), new IntWritable(3), new IntWritable(5) }); IntWritable[] values = (IntWritable[]) a.get(); for (IntWritable i : values) { System.out.println(i);//from w w w . ja v a2 s .com } IntArrayWritable ia = new IntArrayWritable(); ia.set(new IntWritable[] { new IntWritable(1), new IntWritable(3), new IntWritable(5) }); IntWritable[] ivalues = (IntWritable[]) ia.get(); ia.set((new LongWritable[] { new LongWritable(10001) })); System.out.println("*** Map Writables ***"); MapWritable m = new MapWritable(); IntWritable key1 = new IntWritable(5); NullWritable value1 = NullWritable.get(); m.put(key1, value1); System.out.println(m.containsKey(key1)); System.out.println(m.get(key1)); m.put(new LongWritable(100000000), key1); Set<Writable> keys = m.keySet(); for (Writable k : keys) System.out.println(k.getClass()); }
From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java
License:Apache License
@Test public void testGetDetails() throws Exception { miniCluster.deployIndex(LuceneTestResources.INDEX1, 1); LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol()); final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("content: the"); final Hits hits = client.search(query, null, 10); assertNotNull(hits);/*from www. j a v a2 s .c om*/ assertEquals(10, hits.getHits().size()); for (final Hit hit : hits.getHits()) { final MapWritable details = client.getDetails(hit); final Set<Writable> keySet = details.keySet(); assertFalse(keySet.isEmpty()); assertNotNull(details.get(new Text("path"))); assertNotNull(details.get(new Text("category"))); } client.close(); }
From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java
License:Apache License
@Test public void testGetDetailsWithFieldNames() throws Exception { miniCluster.deployIndex(LuceneTestResources.INDEX1, 1); LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol()); final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("content: the"); final Hits hits = client.search(query, null, 10); assertNotNull(hits);/*from w w w .ja v a 2 s. com*/ assertEquals(10, hits.getHits().size()); for (final Hit hit : hits.getHits()) { final MapWritable details = client.getDetails(hit, new String[] { "path" }); final Set<Writable> keySet = details.keySet(); assertFalse(keySet.isEmpty()); assertNotNull(details.get(new Text("path"))); assertNull(details.get(new Text("category"))); } client.close(); }
From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java
License:Apache License
@Test public void testGetBinaryDetails() throws Exception { File index = temporaryFolder.newFolder("indexWithBinaryData"); File indexShard = new File(index, "binaryShard"); if (!indexShard.mkdirs()) { throw new RuntimeException("Unable to create directory " + indexShard.getAbsolutePath()); }//from ww w . ja va 2s . c om String textFieldName = "textField"; String binaryFieldName = "binaryField"; String textFieldContent = "sample text"; byte[] bytesFieldContent = new byte[] { 1, 2, 3 }; IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexShard), new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED); Document document = new Document(); document.add(new Field(binaryFieldName, bytesFieldContent, Store.YES)); document.add(new Field(textFieldName, textFieldContent, Store.NO, Index.ANALYZED)); indexWriter.addDocument(document); indexWriter.close(true); DeployClient deployClient = new DeployClient(miniCluster.getProtocol()); IndexState indexState = deployClient.addIndex(index.getName(), index.getAbsolutePath(), 1).joinDeployment(); assertEquals(IndexState.DEPLOYED, indexState); LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol()); final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()) .parse(textFieldName + ": " + textFieldContent); final Hits hits = client.search(query, new String[] { index.getName() }, 10); assertNotNull(hits); assertEquals(1, hits.getHits().size()); final Hit hit = hits.getHits().get(0); final MapWritable details = client.getDetails(hit); final Set<Writable> keySet = details.keySet(); assertEquals(1, keySet.size()); final Writable writable = details.get(new Text(binaryFieldName)); assertNotNull(writable); assertThat(writable, instanceOf(BytesWritable.class)); BytesWritable bytesWritable = (BytesWritable) writable; bytesWritable.setCapacity(bytesWritable.getLength());// getBytes() returns // the full array assertArrayEquals(bytesFieldContent, bytesWritable.getBytes()); client.close(); }
From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java
License:Apache License
@Test public void testGetDetailsConcurrently() throws Exception { miniCluster.deployIndex(LuceneTestResources.INDEX1, 1); LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol()); final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("content: the"); final Hits hits = client.search(query, null, 10); assertNotNull(hits);//from w w w .j a v a 2 s.c om assertEquals(10, hits.getHits().size()); List<MapWritable> detailList = client.getDetails(hits.getHits()); assertEquals(hits.getHits().size(), detailList.size()); for (int i = 0; i < detailList.size(); i++) { final MapWritable details1 = client.getDetails(hits.getHits().get(i)); final MapWritable details2 = detailList.get(i); assertEquals(details1.entrySet(), details2.entrySet()); final Set<Writable> keySet = details2.keySet(); assertFalse(keySet.isEmpty()); final Writable writable = details2.get(new Text("path")); assertNotNull(writable); } client.close(); }
From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java
License:Apache License
@Test public void testFilteredSearch() throws Exception { // write and deploy test index File filterIndex = temporaryFolder.newFolder("filterIndex"); File filterShard = new File(filterIndex, "filterShard"); String textFieldName = "textField"; String filterFieldName = "filterField"; IndexWriter indexWriter = new IndexWriter(FSDirectory.open(filterShard), new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED); for (int i = 0; i < 100; i++) { Document document = new Document(); document.add(new Field(textFieldName, "sample " + i, Store.YES, Index.NOT_ANALYZED)); document.add(new Field(filterFieldName, "" + (i % 10), Store.YES, Index.NOT_ANALYZED)); indexWriter.addDocument(document); }//from www .j av a 2 s. c om indexWriter.close(true); DeployClient deployClient = new DeployClient(miniCluster.createInteractionProtocol()); IndexState indexState = deployClient.addIndex(filterIndex.getName(), filterIndex.getAbsolutePath(), 1) .joinDeployment(); assertEquals(IndexState.DEPLOYED, indexState); // build filter for terms in set {i | (i % 10) == 3}. LuceneClient client = new LuceneClient(miniCluster.getZkConfiguration()); TermQuery filterQuery = new TermQuery(new Term(filterFieldName, "3")); QueryWrapperFilter filter = new QueryWrapperFilter(filterQuery); final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()) .parse(textFieldName + ":" + "sample*3"); final Hits hits = client.search(query, new String[] { filterIndex.getName() }, 100, null, filter); assertNotNull(hits); List<Hit> hitsList = hits.getHits(); for (final Hit hit : hitsList) { writeToLog(hit); } assertEquals(10, hits.size()); assertEquals(10, hitsList.size()); // check that returned results conform to the filter for (final Hit hit : hitsList) { MapWritable mw = client.getDetails(hit); Text text = (Text) mw.get(new Text("textField")); assertNotNull(text); String[] parts = text.toString().split(" "); assertTrue(parts.length == 2); int num = Integer.valueOf(parts[1]); assertTrue((num % 10) == 3); } client.close(); }
From source file:com.digitalpebble.behemoth.DocumentFilter.java
License:Apache License
/** Returns true if the document can be kept, false otherwise **/ public boolean keep(BehemothDocument input) { // filter if null if (input == null) return false; // check length content if (input.getContent() != null && maxContentLength != -1) { if (input.getContent().length > maxContentLength) return false; }// w w w . j av a 2 s .co m // check on the URL if (URLRegex != null) { if (input.getUrl() == null) return false; boolean match = URLRegex.matcher(input.getUrl()).matches(); if (!match) return false; } // check on the MimeType if (MimetypeRegex != null) { if (input.getContentType() == null) return false; boolean match = MimetypeRegex.matcher(input.getContentType()).matches(); if (!match) return false; } MapWritable metadata = input.getMetadata(); // no rules at all -> fine! if (KVpatterns.size() == 0) return true; // document MUST have a certain value to be kept if (metadata == null || metadata.isEmpty()) { if (!negativeMode) return false; else return true; } boolean hasMatch = false; // find common keys between filters and content of doc boolean matchesAll = true; Iterator<String> kiter = KVpatterns.keySet().iterator(); while (kiter.hasNext()) { String k = kiter.next(); String regex = KVpatterns.get(k); // see if we have a metadata for that key Writable value = metadata.get(new Text(k)); if (value == null) { matchesAll = false; continue; } if (value.toString().matches(regex)) { hasMatch = true; } else matchesAll = false; } boolean successMatching = false; if (medataMode.equalsIgnoreCase("AND")) { if (matchesAll) successMatching = true; } else if (hasMatch) successMatching = true; if (successMatching) { return (!negativeMode); } // no negative rule matching if (negativeMode) return true; // no positive rule matching return false; }
From source file:com.mapred.JoinReducer.java
protected void reduce(DepDatePair key, Iterable<MapWritable> values, Context context) throws IOException, InterruptedException { Iterator valuesIterator = values.iterator(); if (valuesIterator.hasNext()) { this.index = 1L; MapWritable depMap = (MapWritable) valuesIterator.next(); this.departmentName = depMap.get(MRUtils.DEPARTMENT_NAME).toString(); if (this.departmentName == null) { this.departmentName = "DEP#NAME#ERROR"; }/* w w w.java 2 s . c o m*/ while (valuesIterator.hasNext()) { MapWritable map = (MapWritable) valuesIterator.next(); Set keySet = map.keySet(); for (Object singleKey : keySet) { this.builder.append(map.get((Text) singleKey)); this.builder.append(","); } this.builder.append(this.departmentName); this.builder.append(";"); this.outputValue.set(this.builder.toString()); context.write(new LongWritable(this.index++), outputValue); this.builder.delete(0, this.builder.length()); } } }
From source file:com.redgate.hadoop.hive.azuretables.AzureTablesSerDe.java
License:Apache License
@Override public Object deserialize(final Writable wr) throws SerDeException { if (!(wr instanceof MapWritable)) { throw new SerDeException("Expected MapWritable, received " + wr.getClass().getName()); }// w w w. j a v a 2 s . com final MapWritable input = (MapWritable) wr; final Text t = new Text(); row.clear(); for (int i = 0; i < fieldCount; i++) { t.set(columnNames.get(i)); final Writable value = input.get(t); if (value != null && !NullWritable.get().equals(value)) { row.add(value.toString()); } else { row.add(null); } } return row; }
From source file:com.shmsoft.dmass.main.Reduce.java
License:Apache License
protected void processMap(MapWritable value) throws IOException { Metadata allMetadata = getAllMetadata(value); Metadata standardMetadata = getStandardMetadata(allMetadata, outputFileCount); columnMetadata.addMetadata(standardMetadata); columnMetadata.addMetadata(allMetadata); if (!isMaster) { columnMetadata.addMetadataValue(DocumentMetadataKeys.MASTER_DUPLICATE, UPIFormat.format(outputFileCount)); }// w ww. j a v a 2 s . c o m String originalFileName = new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName(); // add the text to the text folder String documentText = allMetadata.get(DocumentMetadataKeys.DOCUMENT_TEXT); String textEntryName = ParameterProcessing.TEXT + "/" + UPIFormat.format(outputFileCount) + "_" + originalFileName + ".txt"; if (textEntryName != null) { zipFileWriter.addTextFile(textEntryName, documentText); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_TEXT, textEntryName); // add the native file to the native folder String nativeEntryName = ParameterProcessing.NATIVE + "/" + UPIFormat.format(outputFileCount) + "_" + originalFileName; BytesWritable bytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE)); if (bytesWritable != null) { // some large exception files are not passed zipFileWriter.addBinaryFile(nativeEntryName, bytesWritable.getBytes(), bytesWritable.getLength()); History.appendToHistory(nativeEntryName); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_NATIVE, nativeEntryName); // add the pdf made from native to the PDF folder String pdfNativeEntryName = ParameterProcessing.PDF_FOLDER + "/" + UPIFormat.format(outputFileCount) + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".pdf"; BytesWritable pdfBytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_PDF)); if (pdfBytesWritable != null) { zipFileWriter.addBinaryFile(pdfNativeEntryName, pdfBytesWritable.getBytes(), pdfBytesWritable.getLength()); History.appendToHistory(pdfNativeEntryName); } // add exception to the exception folder String exception = allMetadata.get(DocumentMetadataKeys.PROCESSING_EXCEPTION); if (exception != null) { String exceptionEntryName = "exception/" + UPIFormat.format(outputFileCount) + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName(); if (bytesWritable != null) { zipFileWriter.addBinaryFile(exceptionEntryName, bytesWritable.getBytes(), bytesWritable.getLength()); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_EXCEPTION, exceptionEntryName); } }