List of usage examples for org.apache.hadoop.io Text Text
public Text(byte[] utf8)
From source file:be.uantwerpen.adrem.eclat.util.TreeStringReporter.java
License:Apache License
@Override public void close() { try {/*from w w w . j av a2 s . co m*/ context.write(new Text("" + count), new Text(builder.toString())); System.out.println("wrote " + count + " compressed itemsets"); builder.setLength(0); count = 0; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.java
@Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); if (allTasksHaveStarted) { try {//from w w w.j a v a2 s .c om star_shmem_lock.getLock(); ((STARInstance) instance).loadSharedMemoryReference(null, true); } finally { star_shmem_lock.removeAndReleaseLock(); } } if (!runPass2) { sj.setOverhang(((STARInstance) instance).getOverhang()); context.write(sj, new Text("")); } }
From source file:bixo.examples.crawl.CreateWritableSeqFileData.java
License:Apache License
@Override public void operate(FlowProcess flowProcess, FunctionCall<NullContext> funcCall) { ParsedDatum datum = new ParsedDatum(funcCall.getArguments()); Text key = new Text(datum.getUrl()); Text value = new Text(datum.getTitle() + '\n' + datum.getParsedText()); Tuple keyVal = new Tuple(key, value); funcCall.getOutputCollector().add(keyVal); }
From source file:bme.iclef.hadoop.file2seq.TarToSeqFile.java
License:Apache License
/** Performs the conversion. */ public void execute() throws Exception { TarInputStream input = null;//from w w w . jav a 2 s . c o m SequenceFile.Writer output = null; try { input = openInputFile(); output = openOutputFile(); TarEntry entry; while ((entry = input.getNextEntry()) != null) { if (entry.isDirectory()) { continue; } String filename = entry.getName(); byte[] data = TarToSeqFile.getBytes(input, entry.getSize()); Text key = new Text(filename); BytesWritable value = new BytesWritable(data); output.append(key, value); } } finally { if (input != null) { input.close(); } if (output != null) { output.close(); } } }
From source file:boa.aggregators.Aggregator.java
License:Apache License
@SuppressWarnings("unchecked") protected void collect(final String data, final String metadata) throws IOException, InterruptedException { if (this.combining) this.getContext().write(this.getKey(), new EmitValue(data, metadata)); else if (metadata != null) this.getContext().write(new Text(this.getKey() + " = " + data + " weight " + metadata), NullWritable.get());/*from w w w . j a v a 2 s . c o m*/ else this.getContext().write(new Text(this.getKey() + " = " + data), NullWritable.get()); }
From source file:boa.aggregators.GraphAggregator.java
License:Apache License
/** {@inheritDoc} */ @Override/*w w w .j a v a2 s . co m*/ @SuppressWarnings("unchecked") protected void collect(final String data, final String metadata) throws IOException, InterruptedException { if (this.isCombining()) { this.getContext().write(this.getKey(), new EmitValue(data, metadata)); return; } this.getContext().write(new Text(format(this.getKey().getIndex(), data, metadata)), NullWritable.get()); }
From source file:boa.datagen.scm.AbstractCommit.java
License:Apache License
private Builder processChangeFile(String path, boolean parse, Writer astWriter, String revKey, String keyDelim) {//from w ww .j ava 2 s.co m final ChangedFile.Builder fb = ChangedFile.newBuilder(); fb.setName(path); fb.setKind(FileKind.OTHER); final String lowerPath = path.toLowerCase(); if (lowerPath.endsWith(".txt")) fb.setKind(FileKind.TEXT); else if (lowerPath.endsWith(".xml")) fb.setKind(FileKind.XML); else if (lowerPath.endsWith(".jar") || lowerPath.endsWith(".class")) fb.setKind(FileKind.BINARY); else if (lowerPath.endsWith(".java") && parse) { final String content = getFileContents(path); fb.setKind(FileKind.SOURCE_JAVA_JLS2); if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_4, AST.JLS2, false, astWriter, revKey + keyDelim + path)) { if (debug) System.err.println("Found JLS2 parse error in: revision " + id + ": file " + path); fb.setKind(FileKind.SOURCE_JAVA_JLS3); if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_5, AST.JLS3, false, astWriter, revKey + keyDelim + path)) { if (debug) System.err.println("Found JLS3 parse error in: revision " + id + ": file " + path); fb.setKind(FileKind.SOURCE_JAVA_JLS4); if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_7, AST.JLS4, false, astWriter, revKey + keyDelim + path)) { if (debug) System.err.println("Found JLS4 parse error in: revision " + id + ": file " + path); //fb.setContent(content); fb.setKind(FileKind.SOURCE_JAVA_ERROR); try { astWriter.append(new Text(revKey + keyDelim + fb.getName()), new BytesWritable(ASTRoot.newBuilder().build().toByteArray())); } catch (IOException e) { e.printStackTrace(); } } else if (debug) System.err.println("Accepted JLS4: revision " + id + ": file " + path); } else if (debug) System.err.println("Accepted JLS3: revision " + id + ": file " + path); } else if (debug) System.err.println("Accepted JLS2: revision " + id + ": file " + path); } fb.setKey(revKey); return fb; }
From source file:boa.datagen.scm.AbstractCommit.java
License:Apache License
private boolean parseJavaScriptFile(final String path, final ChangedFile.Builder fb, final String content, final String compliance, final int astLevel, final boolean storeOnError, Writer astWriter, String key) { try {/* w ww .j av a2 s. c om*/ //System.out.println("parsing=" + (++count) + "\t" + path); final org.eclipse.wst.jsdt.core.dom.ASTParser parser = org.eclipse.wst.jsdt.core.dom.ASTParser .newParser(astLevel); parser.setKind(ASTParser.K_COMPILATION_UNIT); parser.setResolveBindings(true); parser.setSource(content.toCharArray()); final Map options = JavaCore.getOptions(); JavaCore.setComplianceOptions(compliance, options); parser.setCompilerOptions(options); JavaScriptUnit cu; try { cu = (JavaScriptUnit) parser.createAST(null); } catch (java.lang.IllegalArgumentException ex) { return false; } final JavaScriptErrorCheckVisitor errorCheck = new JavaScriptErrorCheckVisitor(); cu.accept(errorCheck); if (!errorCheck.hasError || storeOnError) { final ASTRoot.Builder ast = ASTRoot.newBuilder(); // final CommentsRoot.Builder comments = // CommentsRoot.newBuilder(); final JavaScriptVisitor visitor = new JavaScriptVisitor(content); try { ast.addNamespaces(visitor.getNamespaces(cu)); // for (final String s : visitor.getImports()) // ast.addImports(s); /* * for (final Comment c : visitor.getComments()) * comments.addComments(c); */ } catch (final UnsupportedOperationException e) { return false; } catch (final Exception e) { if (debug) System.err.println("Error visiting: " + path); //e.printStackTrace(); return false; } if (astWriter != null) { try { // System.out.println("writing=" + count + "\t" + path); astWriter.append(new Text(key), new BytesWritable(ast.build().toByteArray())); } catch (IOException e) { e.printStackTrace(); } } else fb.setAst(ast); // fb.setComments(comments); } return !errorCheck.hasError; } catch (final Exception e) { e.printStackTrace(); return false; } }
From source file:boa.datagen.scm.AbstractCommit.java
License:Apache License
private boolean parseJavaFile(final String path, final ChangedFile.Builder fb, final String content, final String compliance, final int astLevel, final boolean storeOnError, Writer astWriter, String key) { try {/*from ww w .j a va 2s . co m*/ final ASTParser parser = ASTParser.newParser(astLevel); parser.setKind(ASTParser.K_COMPILATION_UNIT); parser.setResolveBindings(true); parser.setSource(content.toCharArray()); final Map options = JavaCore.getOptions(); JavaCore.setComplianceOptions(compliance, options); parser.setCompilerOptions(options); final CompilationUnit cu = (CompilationUnit) parser.createAST(null); final JavaErrorCheckVisitor errorCheck = new JavaErrorCheckVisitor(); cu.accept(errorCheck); if (!errorCheck.hasError || storeOnError) { final ASTRoot.Builder ast = ASTRoot.newBuilder(); //final CommentsRoot.Builder comments = CommentsRoot.newBuilder(); final JavaVisitor visitor = new JavaVisitor(content, connector.nameIndices); try { ast.addNamespaces(visitor.getNamespaces(cu)); for (final String s : visitor.getImports()) ast.addImports(s); /*for (final Comment c : visitor.getComments()) comments.addComments(c);*/ } catch (final UnsupportedOperationException e) { return false; } catch (final Exception e) { if (debug) System.err.println("Error visiting: " + path); e.printStackTrace(); return false; } if (astWriter != null) { try { astWriter.append(new Text(key), new BytesWritable(ast.build().toByteArray())); } catch (IOException e) { e.printStackTrace(); } } else fb.setAst(ast); //fb.setComments(comments); } return !errorCheck.hasError; } catch (final Exception e) { e.printStackTrace(); return false; } }
From source file:boa.functions.BoaAstIntrinsics.java
License:Apache License
/** * Given a ChangedFile, return the AST for that file at that revision. * //from w ww . j a va 2 s . c o m * @param f the ChangedFile to get a snapshot of the AST for * @return the AST, or an empty AST on any sort of error */ @SuppressWarnings("unchecked") @FunctionSpec(name = "getast", returnType = "ASTRoot", formalParameters = { "ChangedFile" }) public static ASTRoot getast(final ChangedFile f) { // since we know only certain kinds have ASTs, filter before looking up final ChangedFile.FileKind kind = f.getKind(); if (kind != ChangedFile.FileKind.SOURCE_JAVA_ERROR && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS2 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS3 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS4) return emptyAst; context.getCounter(AST_COUNTER.GETS_ATTEMPTED).increment(1); final String rowName = f.getKey() + "!!" + f.getName(); if (map == null) openMap(); try { final BytesWritable value = new BytesWritable(); if (map.get(new Text(rowName), value) == null) { context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1); } else { final CodedInputStream _stream = CodedInputStream.newInstance(value.getBytes(), 0, value.getLength()); // defaults to 64, really big ASTs require more _stream.setRecursionLimit(Integer.MAX_VALUE); final ASTRoot root = ASTRoot.parseFrom(_stream); context.getCounter(AST_COUNTER.GETS_SUCCEED).increment(1); return root; } } catch (final InvalidProtocolBufferException e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_BADPROTOBUF).increment(1); } catch (final IOException e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1); } catch (final RuntimeException e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1); } catch (final Error e) { e.printStackTrace(); context.getCounter(AST_COUNTER.GETS_FAIL_BADPROTOBUF).increment(1); } System.err.println("error with ast: " + rowName); context.getCounter(AST_COUNTER.GETS_FAILED).increment(1); return emptyAst; }