Example usage for org.apache.hadoop.io LongWritable toString

List of usage examples for org.apache.hadoop.io LongWritable toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:HistogramBucket.java

License:Apache License

@Override
public void readFields(DataInput di) throws IOException {
    attribute.readFields(di);/*from  w  w w. ja v  a  2 s  . com*/
    LongWritable arraySize = new LongWritable();
    arraySize.readFields(di);
    splits = new ArrayList<DoubleWritable>();
    for (int i = 0; i < Integer.parseInt(arraySize.toString()); i++) {
        DoubleWritable d = new DoubleWritable();
        d.readFields(di);
        splits.add(d);
    }
}

From source file:TestHashMap.java

License:Apache License

@Test
public void testHashSetString() throws Exception {
    final Set<String> hashSet = new HashSet<>();
    final Random random = new Random(0xDEADBEEF);
    int matched = 0;
    LongWritable num = new LongWritable();

    long startTime = System.nanoTime();

    for (int i = 0; i < SET_SIZE; i++) {
        // input data is String
        String input = Long.toString(random.nextLong());
        // disable optimizer
        if (input.length() > 5) {
            hashSet.add(input);//from  w  w  w  .  j a  v a  2s .  c om
        }
    }

    random.setSeed(0xDEADBEEF);

    for (int i = 0; i < DATA_SIZE; i++) {
        // query data is LongWritable
        num.set(random.nextLong());
        if (hashSet.contains(num.toString())) {
            matched++;
        }
    }

    long endTime = System.nanoTime();
    System.out.println("  HashSet<String>");
    System.out.println("  Elapsed time: " + (endTime - startTime) / 1000000 + " ms");
    System.out.println("  Matched " + matched + " times");
}

From source file:co.nubetech.hiho.dedup.HashUtility.java

License:Apache License

public static MD5Hash getMD5Hash(LongWritable key) throws IOException {
    return MD5Hash.digest(key.toString());
}

From source file:com.ifeng.ipserver.IPServerLogParseMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {

    String line = value.toString();

    if (line.contains("The node 59.53.89.94") || line.contains("http://59.53.89.94")) {
        log.info("Matched: {}", line);
        context.write(new Text(key.toString()), value);
    }//from w w w .j a va2  s . c  o m
}

From source file:com.splunk.shuttl.integration.hadoop.hbase.HBaseKeyGenerator.java

License:Apache License

/**
 * @param offsetKey/*from   w w w.  j a va  2 s .c  o m*/
 * @return
 */
public String getKey(LongWritable offsetKey) {
    return offsetKey.toString().concat(this.filename);
}

From source file:com.yahoo.glimmer.indexing.preprocessor.TuplesToResourcesMapper.java

License:Open Source License

@Override
protected void map(LongWritable key, Text valueText, Mapper<LongWritable, Text, Text, Object>.Context context)
        throws java.io.IOException, InterruptedException {

    if (extraResources != null && context.getTaskAttemptID().getTaskID().getId() == 0) {
        // Add extra resources.
        // These end up in the 'all' resources file so get given a Doc ID
        // even if they don't occur in the data.

        for (String extraResource : extraResources) {
            context.write(new Text(extraResource), new Text(""));
        }//from  w ww  .ja va 2 s.  co  m

        extraResources = null;
    }
    if (!context.getInputSplit().equals(lastInputSplit)) {
        lastInputSplit = context.getInputSplit();
        if (lastInputSplit instanceof FileSplit) {
            FileSplit fileSplit = (FileSplit) lastInputSplit;
            LOG.info("Current FileSplit " + fileSplit.getPath().toString() + " start(length) bytes "
                    + fileSplit.getStart() + "(" + fileSplit.getLength() + ")");
        } else {
            LOG.info("Current InputSplit " + lastInputSplit.toString());
        }
    }

    String value = valueText.toString().trim();
    if (value.isEmpty()) {
        return;
    }
    Node[] nodes;
    try {
        nodes = NxParser.parseNodes(value);
    } catch (ParseException e) {
        // NxParser 1.2.2 has problems with typed literals like:
        // "27"^^<int uri>. This is fixed in 1.2.3
        context.getCounter(Counters.NX_PARSER_EXCEPTION).increment(1l);
        String s = value.replaceAll("\\^\\^<[^>]+>", "");
        try {
            nodes = NxParser.parseNodes(s);
            LOG.info("Only parsed after remove of literal types:" + value);
        } catch (ParseException e1) {
            context.getCounter(Counters.NX_PARSER_RETRY_EXCEPTION).increment(1l);
            LOG.info("Failed parsing even after remove of literal types:" + value);
            return;
        }
    }

    if (nodes.length < 3) {
        context.getCounter(Counters.SHORT_TUPLE).increment(1l);
        LOG.info("Line parsed with less than 3 nodes at position" + key.toString());
        return;
    }
    if (nodes.length > MAX_NODES) {
        context.getCounter(Counters.LONG_TUPLE).increment(1l);
        LOG.info("Line parsed with more than " + MAX_NODES + " nodes at position" + key.toString());
        return;
    }

    for (TupleElementName name : TupleElementName.values()) {
        TupleElement element = tuple.getElement(name);

        if (nodes.length > name.ordinal()) {
            Node node = nodes[name.ordinal()];

            String text = node.toString();
            if (text.length() > 5000) {
                System.out.println("Long tuple element " + name.name() + ". Length:" + text.length()
                        + " starting with " + text.substring(0, 100));
                context.getCounter(Counters.LONG_TUPLE_ELEMENT).increment(1);
                return;
            }

            element.type = TupleElement.Type.valueOf(node.getClass().getSimpleName().toUpperCase());
            if (element.type == TupleElement.Type.RESOURCE) {
                try {
                    new URI(text);
                } catch (URISyntaxException e) {
                    context.getCounter(Counters.INVALID_RESOURCE).increment(1l);
                    LOG.info("Bad resource near position " + key.toString());
                    return;
                }
            }
            element.text = text;
            element.n3 = node.toN3();
        } else {
            element.type = null;
            element.text = null;
            element.n3 = null;
        }
    }

    if (filter != null) {
        if (!filter.filter(tuple)) {
            // Skip tuple.
            return;
        }
    }

    predicateObjectContextDot.setLength(0);

    if (!tuple.subject.isOfType(TupleElement.Type.RESOURCE, TupleElement.Type.BNODE)) {
        context.getCounter(Counters.UNEXPECTED_SUBJECT_TYPE).increment(1l);
        return;
    }
    Text subject = new Text(tuple.subject.text);

    if (!tuple.predicate.isOfType(TupleElement.Type.RESOURCE)) {
        context.getCounter(Counters.UNEXPECTED_PREDICATE_TYPE).increment(1l);
        return;
    }

    context.write(new Text(tuple.predicate.text), new Text(TupleElementName.PREDICATE.name()));
    predicateObjectContextDot.append(tuple.predicate.n3);

    if (tuple.object.isOfType(TupleElement.Type.RESOURCE, TupleElement.Type.BNODE)) {
        context.write(new Text(tuple.object.text), new Text(TupleElementName.OBJECT.name()));
    }
    predicateObjectContextDot.append(' ');
    predicateObjectContextDot.append(tuple.object.n3);

    if (includeContexts && tuple.context.text != null) {
        if (tuple.context.isOfType(TupleElement.Type.RESOURCE)) {
            context.write(new Text(tuple.context.text), new Text(TupleElementName.CONTEXT.name()));
            predicateObjectContextDot.append(' ');
            predicateObjectContextDot.append(tuple.context.n3);
        } else {
            context.getCounter(Counters.UNEXPECTED_CONTEXT_TYPE).increment(1l);
        }
    }
    predicateObjectContextDot.append(" .");

    if (predicateObjectContextDot.length() > 10000) {
        System.out.println("Long tuple. Length:" + predicateObjectContextDot.length() + " starting with "
                + predicateObjectContextDot.substring(0, 100));
        context.getCounter(Counters.LONG_TUPLES).increment(1);
    } else {
        // Write subject with predicate, object, context as value
        context.write(subject, new Text(predicateObjectContextDot.toString()));
    }
}

From source file:diamondmapreduce.DiamondMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    //get query and database name from mapreduce driver
    Configuration conf = context.getConfiguration();
    String query = conf.get(DiamondMapReduce.QUERY);
    String dataBase = conf.get(DiamondMapReduce.DATABASE);
    String[] args = conf.getStrings("DIAMOND-arguments");

    //write key-value pair to local tmp
    WriteKeyValueToTemp.write(key.toString(), value.toString());

    //use runtime to execute alignment, intermediate binary files are stored in local tmp
    DiamondAlignment.align(this.diamond, this.localDB, key.toString(), args, conf);

    //view the binary files to tabular output file, view output will be streammized into HDFS
    //        DiamondView.view(this.diamond, key.toString(), conf);

    //delete all intermediate files
    DeleteIntermediateFiles.deleteFiles(key.toString());

    context.write(new Text("key"), new Text(key.toString()));

}

From source file:edu.cmu.cs.in.hadoop.HoopInvertedListMapper.java

License:Open Source License

/**
 * /* w  w w .  j  a v a  2 s  .  c o  m*/
 */
public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    debug("map ()");

    if (HoopLink.metrics != null) {
        mapperMarker = new HoopPerformanceMeasure();
        mapperMarker.setMarker("Mapper");
        HoopLink.metrics.getDataSet().add(mapperMarker);
    }

    if (value == null) {
        debug("Internal error: value is null");
        return;
    }

    String line = value.toString(); // We assume here we're getting one file at a time

    HoopDocumentParser parser = new HoopDocumentParser();
    //parser.setDocID(key.toString());
    parser.setKey(key.get());
    parser.setIncludePositions(true);
    parser.loadDocumentFromData(line); // Tokenization happens here

    List<String> tokens = parser.getTokens();

    for (int i = 0; i < tokens.size(); i++) {
        HoopToken token = new HoopToken(tokens.get(i));

        StringBuffer formatted = new StringBuffer();
        formatted.append(key.get());
        formatted.append(":");
        formatted.append(token.getPosition().toString());

        //word.set(token.getValue()+":"+key.toString()); // We need this for the partitioner and reducers
        word.set(token.getValue() + ":" + partitioner.getPartition(new Text("key:" + key.toString()),
                new Text("undef"), partitioner.getNrPartitions())); // We need this for the partitioner and reducers

        output.collect(word, new Text(formatted.toString()));
    }

    debug("map (" + tokens.size() + " tokens) done for key: " + key.toString());

    if (mapperMarker != null) {
        //mapperMarker.getMarkerRaw ();
        mapperMarker.closeMarker();
    }
}

From source file:edu.cuhk.hccl.hadoop.TripAdvisorMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    hotelID = key.toString();

    String[] lines = value.toString().split("\n");
    StringBuilder buffer = new StringBuilder();
    Text fiveLines = new Text();
    for (int i = 1; i <= lines.length; i++) {
        buffer.append(lines[i - 1] + "\n");
        if (i % NUM_LINES == 0) {
            fiveLines.set(buffer.toString());
            DataRecord record = parseDataRecord(fiveLines);
            if (record != null)
                processRecord(record, context);
            buffer.setLength(0);/*from w  ww .java  2 s  .c  o m*/
            ;
        }
    }

    // Report progress
    context.getCounter(Counters.INPUT_REVIEWS).increment(1);
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.ExpressionRecordWriter.java

License:LGPL

@Override
public synchronized void write(final Text key, final LongWritable value)
        throws IOException, InterruptedException {

    this.context.getCounter(COUNTERS_GROUP, INPUT_ENTRIES).increment(1);

    if (value == null) {
        return;/*from   w  w  w.  j  a  v a2  s. c om*/
    }

    this.out.write(key.getBytes(), 0, key.getLength());
    this.out.write(separator);
    this.out.write(value.toString().getBytes(StandardCharsets.UTF_8));
    this.out.write(newline);

    this.context.getCounter(COUNTERS_GROUP, ENTRIES_WRITTEN).increment(1);
}