List of usage examples for org.apache.hadoop.io BytesWritable getLength
@Override public int getLength()
From source file:alluxio.hadoop.mapreduce.KeyValueRecordWriter.java
License:Apache License
@Override public synchronized void write(BytesWritable key, BytesWritable value) throws IOException { try {//ww w . j a v a2s . co m // NOTE: BytesWritable.getBytes() returns the internal byte array, whose length might not be // the same as BytesWritable.getLength(). mWriter.put(Arrays.copyOf(key.getBytes(), key.getLength()), Arrays.copyOf(value.getBytes(), value.getLength())); // Sends a progress to the job manager to inform it that the task is still running. } catch (AlluxioException e) { throw new IOException(e); } }
From source file:boa.datagen.SeqProjectCombiner.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://boa-njt/"); FileSystem fileSystem = FileSystem.get(conf); String base = conf.get("fs.default.name", ""); HashMap<String, String> sources = new HashMap<String, String>(); HashSet<String> marks = new HashSet<String>(); FileStatus[] files = fileSystem.listStatus(new Path(base + "tmprepcache/2015-07")); for (int i = 0; i < files.length; i++) { FileStatus file = files[i];/*from w w w .j a v a 2s .c o m*/ String name = file.getPath().getName(); if (name.startsWith("projects-") && name.endsWith(".seq")) { System.out.println("Reading file " + i + " in " + files.length + ": " + name); SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf); final Text key = new Text(); final BytesWritable value = new BytesWritable(); try { while (r.next(key, value)) { String s = key.toString(); if (marks.contains(s)) continue; Project p = Project .parseFrom(CodedInputStream.newInstance(value.getBytes(), 0, value.getLength())); if (p.getCodeRepositoriesCount() > 0 && p.getCodeRepositories(0).getRevisionsCount() > 0) marks.add(s); sources.put(s, name); } } catch (Exception e) { System.err.println(name); e.printStackTrace(); } r.close(); } } SequenceFile.Writer w = SequenceFile.createWriter(fileSystem, conf, new Path(base + "repcache/2015-07/projects.seq"), Text.class, BytesWritable.class); for (int i = 0; i < files.length; i++) { FileStatus file = files[i]; String name = file.getPath().getName(); if (name.startsWith("projects-") && name.endsWith(".seq")) { System.out.println("Reading file " + i + " in " + files.length + ": " + name); SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf); final Text key = new Text(); final BytesWritable value = new BytesWritable(); try { while (r.next(key, value)) { String s = key.toString(); if (sources.get(s).equals(name)) w.append(key, value); } } catch (Exception e) { System.err.println(name); e.printStackTrace(); } r.close(); } } w.close(); fileSystem.close(); }
From source file:boa.functions.BoaAstIntrinsics.java
License:Apache License
/** * Given a ChangedFile, return the AST for that file at that revision. * /*from ww w . ja v a 2 s . co m*/ * @param f the ChangedFile to get a snapshot of the AST for * @return the AST, or an empty AST on any sort of error */ @SuppressWarnings("unchecked") @FunctionSpec(name = "getast", returnType = "ASTRoot", formalParameters = { "ChangedFile" }) public static ASTRoot getast(final ChangedFile f) { // since we know only certain kinds have ASTs, filter before looking up final ChangedFile.FileKind kind = f.getKind(); if (kind != ChangedFile.FileKind.SOURCE_JAVA_ERROR && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS2 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS3 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS4) return emptyAst; context.getCounter(AST_COUNTER.GETS_ATTEMPTED).increment(1); final String rowName = f.getKey() + "!!" + f.getName(); if (map == null) openMap(); try { final BytesWritable value = new BytesWritable(); if (map.get(new Text(rowName), value) == null) { context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1); } else { final CodedInputStream _stream = CodedInputStream.newInstance(value.getBytes(), 0, value.getLength()); // defaults to 64, really big ASTs require more _stream.setRecursionLimit(Integer.MAX_VALUE); final ASTRoot root = ASTRoot.parseFrom(_stream); context.getCounter(AST_COUNTER.GETS_SUCCEED).increment(1); return root; } } catch (final InvalidProtocolBufferException e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_BADPROTOBUF).increment(1); } catch (final IOException e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1); } catch (final RuntimeException e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1); } catch (final Error e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_BADPROTOBUF).increment(1); } System.err.println("error with ast: " + rowName); context.getCounter(AST_COUNTER.GETS_FAILED).increment(1); return emptyAst; }
From source file:boa.functions.BoaAstIntrinsics.java
License:Apache License
/** * Given a ChangedFile, return the comments for that file at that revision. * // w ww. j a va 2 s.co m * @param f the ChangedFile to get a snapshot of the comments for * @return the comments list, or an empty list on any sort of error */ @FunctionSpec(name = "getcomments", returnType = "CommentsRoot", formalParameters = { "ChangedFile" }) public static CommentsRoot getcomments(final ChangedFile f) { // since we know only certain kinds have comments, filter before looking up final ChangedFile.FileKind kind = f.getKind(); if (kind != ChangedFile.FileKind.SOURCE_JAVA_ERROR && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS2 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS3 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS4) return emptyComments; final String rowName = f.getKey() + "!!" + f.getName(); if (commentsMap == null) openCommentMap(); try { final BytesWritable value = new BytesWritable(); if (commentsMap.get(new Text(rowName), value) != null) { final CodedInputStream _stream = CodedInputStream.newInstance(value.getBytes(), 0, value.getLength()); final CommentsRoot root = CommentsRoot.parseFrom(_stream); return root; } } catch (final InvalidProtocolBufferException e) { e.printStackTrace(); } catch (final IOException e) { e.printStackTrace(); } catch (final RuntimeException e) { e.printStackTrace(); } catch (final Error e) { e.printStackTrace(); } System.err.println("error with comments: " + rowName); return emptyComments; }
From source file:boa.functions.BoaAstIntrinsics.java
License:Apache License
/** * Given an IssueRepository, return the issues. * /*w w w. j a v a 2 s. c om*/ * @param f the IssueRepository to get issues for * @return the issues list, or an empty list on any sort of error */ @FunctionSpec(name = "getissues", returnType = "IssuesRoot", formalParameters = { "IssueRepository" }) public static IssuesRoot getissues(final IssueRepository f) { if (issuesMap == null) openIssuesMap(); try { final BytesWritable value = new BytesWritable(); if (issuesMap.get(new Text(f.getKey()), value) != null) { final CodedInputStream _stream = CodedInputStream.newInstance(value.getBytes(), 0, value.getLength()); final IssuesRoot root = IssuesRoot.parseFrom(_stream); return root; } } catch (final InvalidProtocolBufferException e) { e.printStackTrace(); } catch (final IOException e) { e.printStackTrace(); } catch (final RuntimeException e) { e.printStackTrace(); } catch (final Error e) { e.printStackTrace(); } System.err.println("error with issues: " + f.getKey()); return emptyIssues; }
From source file:cascading.avro.CascadingToAvro.java
License:Apache License
protected static Object toAvroFixed(Object obj, Schema schema) { BytesWritable bytes = (BytesWritable) obj; Fixed fixed = new Fixed(schema, Arrays.copyOfRange(bytes.getBytes(), 0, bytes.getLength())); return fixed; }
From source file:cascading.avro.CascadingToAvro.java
License:Apache License
protected static Object toAvroBytes(Object obj) { BytesWritable inBytes = (BytesWritable) obj; ByteBuffer buffer = ByteBuffer.wrap(Arrays.copyOfRange(inBytes.getBytes(), 0, inBytes.getLength())); return buffer; }
From source file:cascading.tuple.hadoop.SpillableTupleHadoopTest.java
License:Open Source License
private void performListTest(int size, int threshold, CompressionCodec codec, int spills) { Configuration jobConf = new Configuration(); jobConf.set("io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName()); // disable/replace WritableSerialization class jobConf.set("cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName()); // not using Text, just testing parsing HadoopSpillableTupleList list = new HadoopSpillableTupleList(threshold, codec, jobConf); for (int i = 0; i < size; i++) { String aString = "string number " + i; double random = Math.random(); list.add(new Tuple(i, aString, random, new Text(aString), new TestText(aString), new Tuple("inner tuple", new BytesWritable(aString.getBytes())))); }//from w ww .j a v a2 s . com assertEquals("not equal: list.size();", size, list.size()); assertEquals("not equal: list.getNumFiles()", spills, list.spillCount()); int i = -1; int count = 0; for (Tuple tuple : list) { int value = tuple.getInteger(0); assertTrue("wrong diff", value - i == 1); assertEquals("wrong value", "string number " + count, tuple.getObject(3).toString()); assertEquals("wrong value", "string number " + count, tuple.getObject(4).toString()); assertTrue("wrong type", tuple.getObject(5) instanceof Tuple); BytesWritable bytesWritable = (BytesWritable) ((Tuple) tuple.getObject(5)).getObject(1); byte[] bytes = bytesWritable.getBytes(); String actual = new String(bytes, 0, bytesWritable.getLength()); assertEquals("wrong value", "string number " + count, actual); i = value; count++; } assertEquals("not equal: list.size();", size, count); Iterator<Tuple> iterator = list.iterator(); assertEquals("not equal: iterator.next().get(1)", "string number 0", iterator.next().getObject(1)); assertEquals("not equal: iterator.next().get(1)", "string number 1", iterator.next().getObject(1)); }
From source file:com.axiomine.largecollections.kryo.serializers.BytesWritableSerializer.java
License:Apache License
public void write(Kryo kryo, Output output, BytesWritable object) { output.writeInt(object.getLength(), true); output.write(object.getBytes()); }
From source file:com.bixolabs.cascading.avro.AvroScheme.java
License:Apache License
private Object convertToAvroPrimitive(Object inObj) { if (inObj == null) { return null; } else if (inObj instanceof String) { return new Utf8((String) inObj); } else if (inObj instanceof BytesWritable) { BytesWritable bw = (BytesWritable) inObj; ByteBuffer convertedObj = ByteBuffer.wrap(bw.getBytes(), 0, bw.getLength()); return convertedObj; // } else if (curType.isEnum()) { // Object result = new CascadingEnumSymbol((String) inObj); // return result; } else if (inObj instanceof Short) { Short val = (Short) inObj; return val.intValue(); } else {//w ww . j a v a 2 s . c o m return inObj; } }