List of usage examples for org.apache.hadoop.io BytesWritable BytesWritable
public BytesWritable(byte[] bytes)
From source file:ZipFileRecordReader.java
License:Apache License
/** * This is where the magic happens, each ZipEntry is decompressed and * readied for the Mapper. The contents of each file is held *in memory* * in a BytesWritable object.//from ww w.j a v a 2 s. c om * * If the ZipFileInputFormat has been set to Lenient (not the default), * certain exceptions will be gracefully ignored to prevent a larger job * from failing. */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { ZipEntry entry = null; try { entry = zip.getNextEntry(); } catch (ZipException e) { if (ZipFileInputFormat.getLenient() == false) throw e; } // Sanity check if (entry == null) { isFinished = true; return false; } // Filename currentKey = new Text(entry.getName()); // Read the file contents ByteArrayOutputStream bos = new ByteArrayOutputStream(); byte[] temp = new byte[8192]; while (true) { int bytesRead = 0; try { bytesRead = zip.read(temp, 0, 8192); } catch (EOFException e) { if (ZipFileInputFormat.getLenient() == false) throw e; return false; } if (bytesRead > 0) bos.write(temp, 0, bytesRead); else break; } zip.closeEntry(); // Uncompressed contents currentValue = new BytesWritable(bos.toByteArray()); return true; }
From source file:alluxio.hadoop.mapreduce.KeyValueRecordReader.java
License:Apache License
@Override public synchronized boolean nextKeyValue() throws IOException { if (!mKeyValuePairIterator.hasNext()) { return false; }/* w ww . java 2 s. c om*/ KeyValuePair pair; try { pair = mKeyValuePairIterator.next(); } catch (AlluxioException e) { throw new IOException(e); } // TODO(cc): Implement a ByteBufferInputStream which is backed by a ByteBuffer so we could // benefit from zero-copy. mCurrentKey.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getKey()))); mCurrentValue.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getValue()))); mNumVisitedKeyValuePairs++; return true; }
From source file:bme.iclef.hadoop.file2seq.TarToSeqFile.java
License:Apache License
/** Performs the conversion. */ public void execute() throws Exception { TarInputStream input = null;/*from ww w. j a v a 2 s. c o m*/ SequenceFile.Writer output = null; try { input = openInputFile(); output = openOutputFile(); TarEntry entry; while ((entry = input.getNextEntry()) != null) { if (entry.isDirectory()) { continue; } String filename = entry.getName(); byte[] data = TarToSeqFile.getBytes(input, entry.getSize()); Text key = new Text(filename); BytesWritable value = new BytesWritable(data); output.append(key, value); } } finally { if (input != null) { input.close(); } if (output != null) { output.close(); } } }
From source file:boa.datagen.scm.AbstractCommit.java
License:Apache License
private Builder processChangeFile(String path, boolean parse, Writer astWriter, String revKey, String keyDelim) {//from w ww . j a va2 s . c o m final ChangedFile.Builder fb = ChangedFile.newBuilder(); fb.setName(path); fb.setKind(FileKind.OTHER); final String lowerPath = path.toLowerCase(); if (lowerPath.endsWith(".txt")) fb.setKind(FileKind.TEXT); else if (lowerPath.endsWith(".xml")) fb.setKind(FileKind.XML); else if (lowerPath.endsWith(".jar") || lowerPath.endsWith(".class")) fb.setKind(FileKind.BINARY); else if (lowerPath.endsWith(".java") && parse) { final String content = getFileContents(path); fb.setKind(FileKind.SOURCE_JAVA_JLS2); if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_4, AST.JLS2, false, astWriter, revKey + keyDelim + path)) { if (debug) System.err.println("Found JLS2 parse error in: revision " + id + ": file " + path); fb.setKind(FileKind.SOURCE_JAVA_JLS3); if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_5, AST.JLS3, false, astWriter, revKey + keyDelim + path)) { if (debug) System.err.println("Found JLS3 parse error in: revision " + id + ": file " + path); fb.setKind(FileKind.SOURCE_JAVA_JLS4); if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_7, AST.JLS4, false, astWriter, revKey + keyDelim + path)) { if (debug) System.err.println("Found JLS4 parse error in: revision " + id + ": file " + path); //fb.setContent(content); fb.setKind(FileKind.SOURCE_JAVA_ERROR); try { astWriter.append(new Text(revKey + keyDelim + fb.getName()), new BytesWritable(ASTRoot.newBuilder().build().toByteArray())); } catch (IOException e) { e.printStackTrace(); } } else if (debug) System.err.println("Accepted JLS4: revision " + id + ": file " + path); } else if (debug) System.err.println("Accepted JLS3: revision " + id + ": file " + path); } else if (debug) System.err.println("Accepted JLS2: revision " + id + ": file " + path); } fb.setKey(revKey); return fb; }
From source file:boa.datagen.scm.AbstractCommit.java
License:Apache License
private boolean parseJavaScriptFile(final String path, final ChangedFile.Builder fb, final String content, final String compliance, final int astLevel, final boolean storeOnError, Writer astWriter, String key) { try {/*from ww w . ja v a 2s.c om*/ //System.out.println("parsing=" + (++count) + "\t" + path); final org.eclipse.wst.jsdt.core.dom.ASTParser parser = org.eclipse.wst.jsdt.core.dom.ASTParser .newParser(astLevel); parser.setKind(ASTParser.K_COMPILATION_UNIT); parser.setResolveBindings(true); parser.setSource(content.toCharArray()); final Map options = JavaCore.getOptions(); JavaCore.setComplianceOptions(compliance, options); parser.setCompilerOptions(options); JavaScriptUnit cu; try { cu = (JavaScriptUnit) parser.createAST(null); } catch (java.lang.IllegalArgumentException ex) { return false; } final JavaScriptErrorCheckVisitor errorCheck = new JavaScriptErrorCheckVisitor(); cu.accept(errorCheck); if (!errorCheck.hasError || storeOnError) { final ASTRoot.Builder ast = ASTRoot.newBuilder(); // final CommentsRoot.Builder comments = // CommentsRoot.newBuilder(); final JavaScriptVisitor visitor = new JavaScriptVisitor(content); try { ast.addNamespaces(visitor.getNamespaces(cu)); // for (final String s : visitor.getImports()) // ast.addImports(s); /* * for (final Comment c : visitor.getComments()) * comments.addComments(c); */ } catch (final UnsupportedOperationException e) { return false; } catch (final Exception e) { if (debug) System.err.println("Error visiting: " + path); //e.printStackTrace(); return false; } if (astWriter != null) { try { // System.out.println("writing=" + count + "\t" + path); astWriter.append(new Text(key), new BytesWritable(ast.build().toByteArray())); } catch (IOException e) { e.printStackTrace(); } } else fb.setAst(ast); // fb.setComments(comments); } return !errorCheck.hasError; } catch (final Exception e) { e.printStackTrace(); return false; } }
From source file:boa.datagen.scm.AbstractCommit.java
License:Apache License
private boolean parseJavaFile(final String path, final ChangedFile.Builder fb, final String content, final String compliance, final int astLevel, final boolean storeOnError, Writer astWriter, String key) { try {//www . ja va 2s .c o m final ASTParser parser = ASTParser.newParser(astLevel); parser.setKind(ASTParser.K_COMPILATION_UNIT); parser.setResolveBindings(true); parser.setSource(content.toCharArray()); final Map options = JavaCore.getOptions(); JavaCore.setComplianceOptions(compliance, options); parser.setCompilerOptions(options); final CompilationUnit cu = (CompilationUnit) parser.createAST(null); final JavaErrorCheckVisitor errorCheck = new JavaErrorCheckVisitor(); cu.accept(errorCheck); if (!errorCheck.hasError || storeOnError) { final ASTRoot.Builder ast = ASTRoot.newBuilder(); //final CommentsRoot.Builder comments = CommentsRoot.newBuilder(); final JavaVisitor visitor = new JavaVisitor(content, connector.nameIndices); try { ast.addNamespaces(visitor.getNamespaces(cu)); for (final String s : visitor.getImports()) ast.addImports(s); /*for (final Comment c : visitor.getComments()) comments.addComments(c);*/ } catch (final UnsupportedOperationException e) { return false; } catch (final Exception e) { if (debug) System.err.println("Error visiting: " + path); e.printStackTrace(); return false; } if (astWriter != null) { try { astWriter.append(new Text(key), new BytesWritable(ast.build().toByteArray())); } catch (IOException e) { e.printStackTrace(); } } else fb.setAst(ast); //fb.setComments(comments); } return !errorCheck.hasError; } catch (final Exception e) { e.printStackTrace(); return false; } }
From source file:ca.sparkera.adapters.mapreduce.MainframeVBRecordReader.java
License:Apache License
@Override public synchronized boolean nextKeyValue() throws IOException { LOG.debug("VLR nextKey value started: pos" + pos + "leninit" + recordLength); byte[] lengthByte = new byte[4]; int offset = 0; int numBytesRead = 0; if (numBytesRemainingInSplit > 0) { int numBytesToRead = 4; while (numBytesToRead > 0) { byte[] tempByte = new byte[4]; numBytesRead = inputStream.read(tempByte, offset, numBytesToRead); if (numBytesRead == -1) { // EOF return false; }/* w ww .j a v a 2s . c om*/ for (int i = 0; i < numBytesRead; i++) { lengthByte[4 - numBytesToRead + i] = tempByte[i]; } numBytesToRead -= numBytesRead; } if (numBytesToRead == 0) { pos += numBytesRead; recordLength = hex2decimal(lengthByte) - 4; numBytesRemainingInSplit -= numBytesRead; } else { throw new IOException("Error Reading RDW at pos = " + pos); } } if (key == null) { key = new LongWritable(); } if (value == null) { value = new BytesWritable(new byte[recordLength]); } LOG.debug("VLR nextKey record length" + recordLength + ":pos:" + pos); boolean dataRead = false; value.setSize(recordLength); byte[] record = value.getBytes(); if (numBytesRemainingInSplit > 0) { key.set(pos); int numBytesToRead = recordLength; while (numBytesToRead > 0) { numBytesRead = inputStream.read(record, offset, numBytesToRead); if (numBytesRead == -1) { // EOF break; } offset += numBytesRead; numBytesToRead -= numBytesRead; } numBytesRead = recordLength - numBytesToRead; pos += numBytesRead; if (numBytesRead > 0) { dataRead = true; if (numBytesRead >= recordLength) { if (!isCompressedInput) { numBytesRemainingInSplit -= numBytesRead; } } else { dataRead = false; throw new IOException( "Partial record(length = " + numBytesRead + ") found at the end of split."); } } else { numBytesRemainingInSplit = 0L; // End of input. } } return dataRead; }
From source file:cascading.avro.AvroSchemeTest.java
License:Apache License
@Test public void testRoundTrip() throws Exception { final Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("test1.avsc")); final Fields fields = new Fields("aBoolean", "anInt", "aLong", "aDouble", "aFloat", "aBytes", "aFixed", "aNull", "aString", "aList", "aMap", "aUnion"); String in = tempDir.getRoot().toString() + "/testRoundTrip/in"; String out = tempDir.getRoot().toString() + "/testRoundTrip/out"; Tap lfsSource = new Lfs(new AvroScheme(schema), in, SinkMode.REPLACE); TupleEntryCollector write = lfsSource.openForWrite(new HadoopFlowProcess()); List<Integer> aList = new ArrayList<Integer>(); Map<String, Integer> aMap = new HashMap<String, Integer>(); aMap.put("one", 1); aMap.put("two", 2); aList.add(0);/*from w w w. j a va 2 s.c o m*/ aList.add(1); BytesWritable bytesWritable = new BytesWritable( new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }); BytesWritable bytesWritable2 = new BytesWritable(new byte[] { 1, 2, 3 }); Tuple tuple = new Tuple(false, 1, 2L, 3.0, 4.0F, bytesWritable2, bytesWritable, null, "test-string", aList, aMap, 5); write.add(new TupleEntry(fields, tuple)); write.add(new TupleEntry(fields, new Tuple(false, 1, 2L, 3.0, 4.0F, new BytesWritable(new byte[0]), new BytesWritable(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6 }), null, "other string", aList, aMap, null))); write.close(); Pipe writePipe = new Pipe("tuples to avro"); Tap avroSink = new Lfs(new AvroScheme(schema), out); Flow flow = new HadoopFlowConnector().connect(lfsSource, avroSink, writePipe); flow.complete(); // Now read it back in, and verify that the data/types match up. Tap avroSource = new Lfs(new AvroScheme(schema), out); TupleEntryIterator iterator = avroSource.openForRead(new HadoopFlowProcess()); assertTrue(iterator.hasNext()); final TupleEntry readEntry1 = iterator.next(); assertEquals(false, readEntry1.getBoolean(0)); assertEquals(1, readEntry1.getInteger(1)); assertEquals(2L, readEntry1.getLong(2)); assertEquals(3.0, readEntry1.getDouble(3), 0.01); assertEquals(4.0F, readEntry1.getFloat(4), 0.01); assertEquals(bytesWritable2, readEntry1.getObject(5)); assertEquals(bytesWritable, readEntry1.getObject(6)); assertEquals("test-string", readEntry1.getString(8)); assertEquals("0", ((List) readEntry1.getObject(9)).get(0).toString()); assertEquals(1, ((Map) readEntry1.getObject(10)).get("one")); assertTrue(iterator.hasNext()); final TupleEntry readEntry2 = iterator.next(); assertNull(readEntry2.get("aUnion")); }
From source file:cascading.avro.AvroToCascading.java
License:Apache License
protected static Object fromAvroFixed(Object obj, Schema schema) { Fixed fixed = (Fixed) obj; return new BytesWritable(fixed.bytes()); }
From source file:cascading.avro.AvroToCascading.java
License:Apache License
protected static BytesWritable fromAvroBytes(ByteBuffer val) { BytesWritable result = new BytesWritable(val.array()); return result; }