Example usage for org.apache.hadoop.io BytesWritable BytesWritable

List of usage examples for org.apache.hadoop.io BytesWritable BytesWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable BytesWritable.

Prototype

public BytesWritable(byte[] bytes) 

Source Link

Document

Create a BytesWritable using the byte array as the initial value.

Usage

From source file:ZipFileRecordReader.java

License:Apache License

/**
 * This is where the magic happens, each ZipEntry is decompressed and
 * readied for the Mapper. The contents of each file is held *in memory*
 * in a BytesWritable object.//from ww  w.j  a  v  a  2  s. c  om
 *
 * If the ZipFileInputFormat has been set to Lenient (not the default),
 * certain exceptions will be gracefully ignored to prevent a larger job
 * from failing.
 */
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    ZipEntry entry = null;
    try {
        entry = zip.getNextEntry();
    } catch (ZipException e) {
        if (ZipFileInputFormat.getLenient() == false)
            throw e;
    }

    // Sanity check
    if (entry == null) {
        isFinished = true;
        return false;
    }

    // Filename
    currentKey = new Text(entry.getName());

    // Read the file contents
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    byte[] temp = new byte[8192];
    while (true) {
        int bytesRead = 0;
        try {
            bytesRead = zip.read(temp, 0, 8192);
        } catch (EOFException e) {
            if (ZipFileInputFormat.getLenient() == false)
                throw e;
            return false;
        }
        if (bytesRead > 0)
            bos.write(temp, 0, bytesRead);
        else
            break;
    }
    zip.closeEntry();

    // Uncompressed contents
    currentValue = new BytesWritable(bos.toByteArray());
    return true;
}

From source file:alluxio.hadoop.mapreduce.KeyValueRecordReader.java

License:Apache License

@Override
public synchronized boolean nextKeyValue() throws IOException {
    if (!mKeyValuePairIterator.hasNext()) {
        return false;
    }/*  w ww . java  2 s.  c  om*/

    KeyValuePair pair;
    try {
        pair = mKeyValuePairIterator.next();
    } catch (AlluxioException e) {
        throw new IOException(e);
    }

    // TODO(cc): Implement a ByteBufferInputStream which is backed by a ByteBuffer so we could
    // benefit from zero-copy.
    mCurrentKey.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getKey())));
    mCurrentValue.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getValue())));
    mNumVisitedKeyValuePairs++;
    return true;
}

From source file:bme.iclef.hadoop.file2seq.TarToSeqFile.java

License:Apache License

/** Performs the conversion. */
public void execute() throws Exception {
    TarInputStream input = null;/*from   ww w. j a v  a  2 s. c  o  m*/
    SequenceFile.Writer output = null;
    try {
        input = openInputFile();
        output = openOutputFile();
        TarEntry entry;
        while ((entry = input.getNextEntry()) != null) {
            if (entry.isDirectory()) {
                continue;
            }
            String filename = entry.getName();
            byte[] data = TarToSeqFile.getBytes(input, entry.getSize());

            Text key = new Text(filename);
            BytesWritable value = new BytesWritable(data);
            output.append(key, value);
        }
    } finally {
        if (input != null) {
            input.close();
        }
        if (output != null) {
            output.close();
        }
    }
}

From source file:boa.datagen.scm.AbstractCommit.java

License:Apache License

private Builder processChangeFile(String path, boolean parse, Writer astWriter, String revKey,
        String keyDelim) {//from  w ww .  j a va2 s  .  c o m
    final ChangedFile.Builder fb = ChangedFile.newBuilder();
    fb.setName(path);
    fb.setKind(FileKind.OTHER);

    final String lowerPath = path.toLowerCase();
    if (lowerPath.endsWith(".txt"))
        fb.setKind(FileKind.TEXT);
    else if (lowerPath.endsWith(".xml"))
        fb.setKind(FileKind.XML);
    else if (lowerPath.endsWith(".jar") || lowerPath.endsWith(".class"))
        fb.setKind(FileKind.BINARY);
    else if (lowerPath.endsWith(".java") && parse) {
        final String content = getFileContents(path);

        fb.setKind(FileKind.SOURCE_JAVA_JLS2);
        if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_4, AST.JLS2, false, astWriter,
                revKey + keyDelim + path)) {
            if (debug)
                System.err.println("Found JLS2 parse error in: revision " + id + ": file " + path);

            fb.setKind(FileKind.SOURCE_JAVA_JLS3);
            if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_5, AST.JLS3, false, astWriter,
                    revKey + keyDelim + path)) {
                if (debug)
                    System.err.println("Found JLS3 parse error in: revision " + id + ": file " + path);

                fb.setKind(FileKind.SOURCE_JAVA_JLS4);
                if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_7, AST.JLS4, false, astWriter,
                        revKey + keyDelim + path)) {
                    if (debug)
                        System.err.println("Found JLS4 parse error in: revision " + id + ": file " + path);

                    //fb.setContent(content);
                    fb.setKind(FileKind.SOURCE_JAVA_ERROR);
                    try {
                        astWriter.append(new Text(revKey + keyDelim + fb.getName()),
                                new BytesWritable(ASTRoot.newBuilder().build().toByteArray()));
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                } else if (debug)
                    System.err.println("Accepted JLS4: revision " + id + ": file " + path);
            } else if (debug)
                System.err.println("Accepted JLS3: revision " + id + ": file " + path);
        } else if (debug)
            System.err.println("Accepted JLS2: revision " + id + ": file " + path);
    }
    fb.setKey(revKey);

    return fb;
}

From source file:boa.datagen.scm.AbstractCommit.java

License:Apache License

private boolean parseJavaScriptFile(final String path, final ChangedFile.Builder fb, final String content,
        final String compliance, final int astLevel, final boolean storeOnError, Writer astWriter, String key) {
    try {/*from   ww  w  .  ja  v a  2s.c om*/
        //System.out.println("parsing=" + (++count) + "\t" + path);
        final org.eclipse.wst.jsdt.core.dom.ASTParser parser = org.eclipse.wst.jsdt.core.dom.ASTParser
                .newParser(astLevel);
        parser.setKind(ASTParser.K_COMPILATION_UNIT);
        parser.setResolveBindings(true);
        parser.setSource(content.toCharArray());

        final Map options = JavaCore.getOptions();
        JavaCore.setComplianceOptions(compliance, options);
        parser.setCompilerOptions(options);

        JavaScriptUnit cu;
        try {
            cu = (JavaScriptUnit) parser.createAST(null);
        } catch (java.lang.IllegalArgumentException ex) {
            return false;
        }

        final JavaScriptErrorCheckVisitor errorCheck = new JavaScriptErrorCheckVisitor();
        cu.accept(errorCheck);

        if (!errorCheck.hasError || storeOnError) {
            final ASTRoot.Builder ast = ASTRoot.newBuilder();
            // final CommentsRoot.Builder comments =
            // CommentsRoot.newBuilder();
            final JavaScriptVisitor visitor = new JavaScriptVisitor(content);
            try {
                ast.addNamespaces(visitor.getNamespaces(cu));
                // for (final String s : visitor.getImports())
                // ast.addImports(s);
                /*
                 * for (final Comment c : visitor.getComments())
                 * comments.addComments(c);
                 */
            } catch (final UnsupportedOperationException e) {
                return false;
            } catch (final Exception e) {
                if (debug)
                    System.err.println("Error visiting: " + path);
                //e.printStackTrace();
                return false;
            }

            if (astWriter != null) {
                try {
                    //   System.out.println("writing=" + count + "\t" + path);
                    astWriter.append(new Text(key), new BytesWritable(ast.build().toByteArray()));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else
                fb.setAst(ast);
            // fb.setComments(comments);
        }

        return !errorCheck.hasError;
    } catch (final Exception e) {
        e.printStackTrace();
        return false;
    }
}

From source file:boa.datagen.scm.AbstractCommit.java

License:Apache License

private boolean parseJavaFile(final String path, final ChangedFile.Builder fb, final String content,
        final String compliance, final int astLevel, final boolean storeOnError, Writer astWriter, String key) {
    try {//www  .  ja  va  2s .c o  m
        final ASTParser parser = ASTParser.newParser(astLevel);
        parser.setKind(ASTParser.K_COMPILATION_UNIT);
        parser.setResolveBindings(true);
        parser.setSource(content.toCharArray());

        final Map options = JavaCore.getOptions();
        JavaCore.setComplianceOptions(compliance, options);
        parser.setCompilerOptions(options);

        final CompilationUnit cu = (CompilationUnit) parser.createAST(null);

        final JavaErrorCheckVisitor errorCheck = new JavaErrorCheckVisitor();
        cu.accept(errorCheck);

        if (!errorCheck.hasError || storeOnError) {
            final ASTRoot.Builder ast = ASTRoot.newBuilder();
            //final CommentsRoot.Builder comments = CommentsRoot.newBuilder();
            final JavaVisitor visitor = new JavaVisitor(content, connector.nameIndices);
            try {
                ast.addNamespaces(visitor.getNamespaces(cu));
                for (final String s : visitor.getImports())
                    ast.addImports(s);
                /*for (final Comment c : visitor.getComments())
                   comments.addComments(c);*/
            } catch (final UnsupportedOperationException e) {
                return false;
            } catch (final Exception e) {
                if (debug)
                    System.err.println("Error visiting: " + path);
                e.printStackTrace();
                return false;
            }

            if (astWriter != null) {
                try {
                    astWriter.append(new Text(key), new BytesWritable(ast.build().toByteArray()));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else
                fb.setAst(ast);
            //fb.setComments(comments);
        }

        return !errorCheck.hasError;
    } catch (final Exception e) {
        e.printStackTrace();
        return false;
    }
}

From source file:ca.sparkera.adapters.mapreduce.MainframeVBRecordReader.java

License:Apache License

@Override
public synchronized boolean nextKeyValue() throws IOException {
    LOG.debug("VLR nextKey value started: pos" + pos + "leninit" + recordLength);

    byte[] lengthByte = new byte[4];
    int offset = 0;
    int numBytesRead = 0;
    if (numBytesRemainingInSplit > 0) {
        int numBytesToRead = 4;
        while (numBytesToRead > 0) {
            byte[] tempByte = new byte[4];
            numBytesRead = inputStream.read(tempByte, offset, numBytesToRead);
            if (numBytesRead == -1) {
                // EOF
                return false;
            }/*  w  ww .j a  v a  2s  . c  om*/
            for (int i = 0; i < numBytesRead; i++) {
                lengthByte[4 - numBytesToRead + i] = tempByte[i];
            }
            numBytesToRead -= numBytesRead;
        }
        if (numBytesToRead == 0) {
            pos += numBytesRead;
            recordLength = hex2decimal(lengthByte) - 4;
            numBytesRemainingInSplit -= numBytesRead;
        } else {
            throw new IOException("Error Reading RDW at pos = " + pos);
        }

    }

    if (key == null) {
        key = new LongWritable();
    }
    if (value == null) {
        value = new BytesWritable(new byte[recordLength]);
    }
    LOG.debug("VLR nextKey record length" + recordLength + ":pos:" + pos);
    boolean dataRead = false;
    value.setSize(recordLength);
    byte[] record = value.getBytes();
    if (numBytesRemainingInSplit > 0) {
        key.set(pos);

        int numBytesToRead = recordLength;

        while (numBytesToRead > 0) {
            numBytesRead = inputStream.read(record, offset, numBytesToRead);
            if (numBytesRead == -1) {
                // EOF
                break;
            }
            offset += numBytesRead;
            numBytesToRead -= numBytesRead;
        }
        numBytesRead = recordLength - numBytesToRead;
        pos += numBytesRead;
        if (numBytesRead > 0) {
            dataRead = true;
            if (numBytesRead >= recordLength) {
                if (!isCompressedInput) {
                    numBytesRemainingInSplit -= numBytesRead;
                }

            } else {
                dataRead = false;
                throw new IOException(
                        "Partial record(length = " + numBytesRead + ") found at the end of split.");
            }
        } else {
            numBytesRemainingInSplit = 0L; // End of input.
        }
    }
    return dataRead;
}

From source file:cascading.avro.AvroSchemeTest.java

License:Apache License

@Test
public void testRoundTrip() throws Exception {
    final Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("test1.avsc"));

    final Fields fields = new Fields("aBoolean", "anInt", "aLong", "aDouble", "aFloat", "aBytes", "aFixed",
            "aNull", "aString", "aList", "aMap", "aUnion");

    String in = tempDir.getRoot().toString() + "/testRoundTrip/in";
    String out = tempDir.getRoot().toString() + "/testRoundTrip/out";
    Tap lfsSource = new Lfs(new AvroScheme(schema), in, SinkMode.REPLACE);
    TupleEntryCollector write = lfsSource.openForWrite(new HadoopFlowProcess());

    List<Integer> aList = new ArrayList<Integer>();
    Map<String, Integer> aMap = new HashMap<String, Integer>();
    aMap.put("one", 1);
    aMap.put("two", 2);

    aList.add(0);/*from  w w w. j  a va  2 s.c o m*/
    aList.add(1);
    BytesWritable bytesWritable = new BytesWritable(
            new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 });
    BytesWritable bytesWritable2 = new BytesWritable(new byte[] { 1, 2, 3 });
    Tuple tuple = new Tuple(false, 1, 2L, 3.0, 4.0F, bytesWritable2, bytesWritable, null, "test-string", aList,
            aMap, 5);
    write.add(new TupleEntry(fields, tuple));
    write.add(new TupleEntry(fields,
            new Tuple(false, 1, 2L, 3.0, 4.0F, new BytesWritable(new byte[0]),
                    new BytesWritable(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6 }), null,
                    "other string", aList, aMap, null)));
    write.close();

    Pipe writePipe = new Pipe("tuples to avro");

    Tap avroSink = new Lfs(new AvroScheme(schema), out);
    Flow flow = new HadoopFlowConnector().connect(lfsSource, avroSink, writePipe);
    flow.complete();

    // Now read it back in, and verify that the data/types match up.
    Tap avroSource = new Lfs(new AvroScheme(schema), out);

    TupleEntryIterator iterator = avroSource.openForRead(new HadoopFlowProcess());

    assertTrue(iterator.hasNext());
    final TupleEntry readEntry1 = iterator.next();

    assertEquals(false, readEntry1.getBoolean(0));
    assertEquals(1, readEntry1.getInteger(1));
    assertEquals(2L, readEntry1.getLong(2));
    assertEquals(3.0, readEntry1.getDouble(3), 0.01);
    assertEquals(4.0F, readEntry1.getFloat(4), 0.01);
    assertEquals(bytesWritable2, readEntry1.getObject(5));
    assertEquals(bytesWritable, readEntry1.getObject(6));
    assertEquals("test-string", readEntry1.getString(8));
    assertEquals("0", ((List) readEntry1.getObject(9)).get(0).toString());
    assertEquals(1, ((Map) readEntry1.getObject(10)).get("one"));
    assertTrue(iterator.hasNext());
    final TupleEntry readEntry2 = iterator.next();

    assertNull(readEntry2.get("aUnion"));
}

From source file:cascading.avro.AvroToCascading.java

License:Apache License

protected static Object fromAvroFixed(Object obj, Schema schema) {
    Fixed fixed = (Fixed) obj;
    return new BytesWritable(fixed.bytes());
}

From source file:cascading.avro.AvroToCascading.java

License:Apache License

protected static BytesWritable fromAvroBytes(ByteBuffer val) {
    BytesWritable result = new BytesWritable(val.array());
    return result;
}