Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:be.uantwerpen.adrem.eclat.util.TreeStringReporter.java

License:Apache License

@Override
public void close() {
    try {/*from  w w w . j  av a2 s . co  m*/
        context.write(new Text("" + count), new Text(builder.toString()));
        System.out.println("wrote " + count + " compressed itemsets");
        builder.setLength(0);
        count = 0;
    } catch (IOException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.java

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
    super.cleanup(context);
    if (allTasksHaveStarted) {
        try {//from   w w  w.j  a v  a2 s .c om
            star_shmem_lock.getLock();
            ((STARInstance) instance).loadSharedMemoryReference(null, true);
        } finally {
            star_shmem_lock.removeAndReleaseLock();
        }
    }
    if (!runPass2) {
        sj.setOverhang(((STARInstance) instance).getOverhang());
        context.write(sj, new Text(""));
    }
}

From source file:bixo.examples.crawl.CreateWritableSeqFileData.java

License:Apache License

@Override
public void operate(FlowProcess flowProcess, FunctionCall<NullContext> funcCall) {
    ParsedDatum datum = new ParsedDatum(funcCall.getArguments());
    Text key = new Text(datum.getUrl());
    Text value = new Text(datum.getTitle() + '\n' + datum.getParsedText());
    Tuple keyVal = new Tuple(key, value);
    funcCall.getOutputCollector().add(keyVal);
}

From source file:bme.iclef.hadoop.file2seq.TarToSeqFile.java

License:Apache License

/** Performs the conversion. */
public void execute() throws Exception {
    TarInputStream input = null;//from   w  w  w  . jav a  2 s .  c o m
    SequenceFile.Writer output = null;
    try {
        input = openInputFile();
        output = openOutputFile();
        TarEntry entry;
        while ((entry = input.getNextEntry()) != null) {
            if (entry.isDirectory()) {
                continue;
            }
            String filename = entry.getName();
            byte[] data = TarToSeqFile.getBytes(input, entry.getSize());

            Text key = new Text(filename);
            BytesWritable value = new BytesWritable(data);
            output.append(key, value);
        }
    } finally {
        if (input != null) {
            input.close();
        }
        if (output != null) {
            output.close();
        }
    }
}

From source file:boa.aggregators.Aggregator.java

License:Apache License

@SuppressWarnings("unchecked")
protected void collect(final String data, final String metadata) throws IOException, InterruptedException {
    if (this.combining)
        this.getContext().write(this.getKey(), new EmitValue(data, metadata));
    else if (metadata != null)
        this.getContext().write(new Text(this.getKey() + " = " + data + " weight " + metadata),
                NullWritable.get());/*from  w  w  w  . j  a v a  2 s . c  o  m*/
    else
        this.getContext().write(new Text(this.getKey() + " = " + data), NullWritable.get());
}

From source file:boa.aggregators.GraphAggregator.java

License:Apache License

/** {@inheritDoc} */
@Override/*w w  w .j a v a2 s .  co  m*/
@SuppressWarnings("unchecked")
protected void collect(final String data, final String metadata) throws IOException, InterruptedException {
    if (this.isCombining()) {
        this.getContext().write(this.getKey(), new EmitValue(data, metadata));
        return;
    }
    this.getContext().write(new Text(format(this.getKey().getIndex(), data, metadata)), NullWritable.get());
}

From source file:boa.datagen.scm.AbstractCommit.java

License:Apache License

private Builder processChangeFile(String path, boolean parse, Writer astWriter, String revKey,
        String keyDelim) {//from w  ww .j  ava 2  s.co  m
    final ChangedFile.Builder fb = ChangedFile.newBuilder();
    fb.setName(path);
    fb.setKind(FileKind.OTHER);

    final String lowerPath = path.toLowerCase();
    if (lowerPath.endsWith(".txt"))
        fb.setKind(FileKind.TEXT);
    else if (lowerPath.endsWith(".xml"))
        fb.setKind(FileKind.XML);
    else if (lowerPath.endsWith(".jar") || lowerPath.endsWith(".class"))
        fb.setKind(FileKind.BINARY);
    else if (lowerPath.endsWith(".java") && parse) {
        final String content = getFileContents(path);

        fb.setKind(FileKind.SOURCE_JAVA_JLS2);
        if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_4, AST.JLS2, false, astWriter,
                revKey + keyDelim + path)) {
            if (debug)
                System.err.println("Found JLS2 parse error in: revision " + id + ": file " + path);

            fb.setKind(FileKind.SOURCE_JAVA_JLS3);
            if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_5, AST.JLS3, false, astWriter,
                    revKey + keyDelim + path)) {
                if (debug)
                    System.err.println("Found JLS3 parse error in: revision " + id + ": file " + path);

                fb.setKind(FileKind.SOURCE_JAVA_JLS4);
                if (!parseJavaFile(path, fb, content, JavaCore.VERSION_1_7, AST.JLS4, false, astWriter,
                        revKey + keyDelim + path)) {
                    if (debug)
                        System.err.println("Found JLS4 parse error in: revision " + id + ": file " + path);

                    //fb.setContent(content);
                    fb.setKind(FileKind.SOURCE_JAVA_ERROR);
                    try {
                        astWriter.append(new Text(revKey + keyDelim + fb.getName()),
                                new BytesWritable(ASTRoot.newBuilder().build().toByteArray()));
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                } else if (debug)
                    System.err.println("Accepted JLS4: revision " + id + ": file " + path);
            } else if (debug)
                System.err.println("Accepted JLS3: revision " + id + ": file " + path);
        } else if (debug)
            System.err.println("Accepted JLS2: revision " + id + ": file " + path);
    }
    fb.setKey(revKey);

    return fb;
}

From source file:boa.datagen.scm.AbstractCommit.java

License:Apache License

private boolean parseJavaScriptFile(final String path, final ChangedFile.Builder fb, final String content,
        final String compliance, final int astLevel, final boolean storeOnError, Writer astWriter, String key) {
    try {/*  w ww .j  av a2 s.  c om*/
        //System.out.println("parsing=" + (++count) + "\t" + path);
        final org.eclipse.wst.jsdt.core.dom.ASTParser parser = org.eclipse.wst.jsdt.core.dom.ASTParser
                .newParser(astLevel);
        parser.setKind(ASTParser.K_COMPILATION_UNIT);
        parser.setResolveBindings(true);
        parser.setSource(content.toCharArray());

        final Map options = JavaCore.getOptions();
        JavaCore.setComplianceOptions(compliance, options);
        parser.setCompilerOptions(options);

        JavaScriptUnit cu;
        try {
            cu = (JavaScriptUnit) parser.createAST(null);
        } catch (java.lang.IllegalArgumentException ex) {
            return false;
        }

        final JavaScriptErrorCheckVisitor errorCheck = new JavaScriptErrorCheckVisitor();
        cu.accept(errorCheck);

        if (!errorCheck.hasError || storeOnError) {
            final ASTRoot.Builder ast = ASTRoot.newBuilder();
            // final CommentsRoot.Builder comments =
            // CommentsRoot.newBuilder();
            final JavaScriptVisitor visitor = new JavaScriptVisitor(content);
            try {
                ast.addNamespaces(visitor.getNamespaces(cu));
                // for (final String s : visitor.getImports())
                // ast.addImports(s);
                /*
                 * for (final Comment c : visitor.getComments())
                 * comments.addComments(c);
                 */
            } catch (final UnsupportedOperationException e) {
                return false;
            } catch (final Exception e) {
                if (debug)
                    System.err.println("Error visiting: " + path);
                //e.printStackTrace();
                return false;
            }

            if (astWriter != null) {
                try {
                    //   System.out.println("writing=" + count + "\t" + path);
                    astWriter.append(new Text(key), new BytesWritable(ast.build().toByteArray()));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else
                fb.setAst(ast);
            // fb.setComments(comments);
        }

        return !errorCheck.hasError;
    } catch (final Exception e) {
        e.printStackTrace();
        return false;
    }
}

From source file:boa.datagen.scm.AbstractCommit.java

License:Apache License

private boolean parseJavaFile(final String path, final ChangedFile.Builder fb, final String content,
        final String compliance, final int astLevel, final boolean storeOnError, Writer astWriter, String key) {
    try {/*from ww  w .j a  va 2s  .  co  m*/
        final ASTParser parser = ASTParser.newParser(astLevel);
        parser.setKind(ASTParser.K_COMPILATION_UNIT);
        parser.setResolveBindings(true);
        parser.setSource(content.toCharArray());

        final Map options = JavaCore.getOptions();
        JavaCore.setComplianceOptions(compliance, options);
        parser.setCompilerOptions(options);

        final CompilationUnit cu = (CompilationUnit) parser.createAST(null);

        final JavaErrorCheckVisitor errorCheck = new JavaErrorCheckVisitor();
        cu.accept(errorCheck);

        if (!errorCheck.hasError || storeOnError) {
            final ASTRoot.Builder ast = ASTRoot.newBuilder();
            //final CommentsRoot.Builder comments = CommentsRoot.newBuilder();
            final JavaVisitor visitor = new JavaVisitor(content, connector.nameIndices);
            try {
                ast.addNamespaces(visitor.getNamespaces(cu));
                for (final String s : visitor.getImports())
                    ast.addImports(s);
                /*for (final Comment c : visitor.getComments())
                   comments.addComments(c);*/
            } catch (final UnsupportedOperationException e) {
                return false;
            } catch (final Exception e) {
                if (debug)
                    System.err.println("Error visiting: " + path);
                e.printStackTrace();
                return false;
            }

            if (astWriter != null) {
                try {
                    astWriter.append(new Text(key), new BytesWritable(ast.build().toByteArray()));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else
                fb.setAst(ast);
            //fb.setComments(comments);
        }

        return !errorCheck.hasError;
    } catch (final Exception e) {
        e.printStackTrace();
        return false;
    }
}

From source file:boa.functions.BoaAstIntrinsics.java

License:Apache License

/**
 * Given a ChangedFile, return the AST for that file at that revision.
 * //from w  ww . j  a  va  2  s  .  c  o  m
 * @param f the ChangedFile to get a snapshot of the AST for
 * @return the AST, or an empty AST on any sort of error
 */
@SuppressWarnings("unchecked")
@FunctionSpec(name = "getast", returnType = "ASTRoot", formalParameters = { "ChangedFile" })
public static ASTRoot getast(final ChangedFile f) {
    // since we know only certain kinds have ASTs, filter before looking up
    final ChangedFile.FileKind kind = f.getKind();
    if (kind != ChangedFile.FileKind.SOURCE_JAVA_ERROR && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS2
            && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS3 && kind != ChangedFile.FileKind.SOURCE_JAVA_JLS4)
        return emptyAst;

    context.getCounter(AST_COUNTER.GETS_ATTEMPTED).increment(1);

    final String rowName = f.getKey() + "!!" + f.getName();

    if (map == null)
        openMap();

    try {
        final BytesWritable value = new BytesWritable();
        if (map.get(new Text(rowName), value) == null) {
            context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1);
        } else {
            final CodedInputStream _stream = CodedInputStream.newInstance(value.getBytes(), 0,
                    value.getLength());
            // defaults to 64, really big ASTs require more
            _stream.setRecursionLimit(Integer.MAX_VALUE);
            final ASTRoot root = ASTRoot.parseFrom(_stream);
            context.getCounter(AST_COUNTER.GETS_SUCCEED).increment(1);
            return root;
        }
    } catch (final InvalidProtocolBufferException e) {
        e.printStackTrace();
        context.getCounter(AST_COUNTER.GETS_FAIL_BADPROTOBUF).increment(1);
    } catch (final IOException e) {
        e.printStackTrace();
        context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1);
    } catch (final RuntimeException e) {
        e.printStackTrace();
        context.getCounter(AST_COUNTER.GETS_FAIL_MISSING).increment(1);
    } catch (final Error e) {
        e.printStackTrace();
        context.getCounter(AST_COUNTER.GETS_FAIL_BADPROTOBUF).increment(1);
    }

    System.err.println("error with ast: " + rowName);
    context.getCounter(AST_COUNTER.GETS_FAILED).increment(1);
    return emptyAst;
}