List of usage examples for org.apache.hadoop.io LongWritable toString
@Override
public String toString()
From source file:HistogramBucket.java
License:Apache License
@Override public void readFields(DataInput di) throws IOException { attribute.readFields(di);/*from w w w. ja v a 2 s . com*/ LongWritable arraySize = new LongWritable(); arraySize.readFields(di); splits = new ArrayList<DoubleWritable>(); for (int i = 0; i < Integer.parseInt(arraySize.toString()); i++) { DoubleWritable d = new DoubleWritable(); d.readFields(di); splits.add(d); } }
From source file:TestHashMap.java
License:Apache License
@Test public void testHashSetString() throws Exception { final Set<String> hashSet = new HashSet<>(); final Random random = new Random(0xDEADBEEF); int matched = 0; LongWritable num = new LongWritable(); long startTime = System.nanoTime(); for (int i = 0; i < SET_SIZE; i++) { // input data is String String input = Long.toString(random.nextLong()); // disable optimizer if (input.length() > 5) { hashSet.add(input);//from w w w . j a v a 2s . c om } } random.setSeed(0xDEADBEEF); for (int i = 0; i < DATA_SIZE; i++) { // query data is LongWritable num.set(random.nextLong()); if (hashSet.contains(num.toString())) { matched++; } } long endTime = System.nanoTime(); System.out.println(" HashSet<String>"); System.out.println(" Elapsed time: " + (endTime - startTime) / 1000000 + " ms"); System.out.println(" Matched " + matched + " times"); }
From source file:co.nubetech.hiho.dedup.HashUtility.java
License:Apache License
public static MD5Hash getMD5Hash(LongWritable key) throws IOException { return MD5Hash.digest(key.toString()); }
From source file:com.ifeng.ipserver.IPServerLogParseMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { String line = value.toString(); if (line.contains("The node 59.53.89.94") || line.contains("http://59.53.89.94")) { log.info("Matched: {}", line); context.write(new Text(key.toString()), value); }//from w w w .j a va2 s . c o m }
From source file:com.splunk.shuttl.integration.hadoop.hbase.HBaseKeyGenerator.java
License:Apache License
/** * @param offsetKey/*from w w w. j a va 2 s .c o m*/ * @return */ public String getKey(LongWritable offsetKey) { return offsetKey.toString().concat(this.filename); }
From source file:com.yahoo.glimmer.indexing.preprocessor.TuplesToResourcesMapper.java
License:Open Source License
@Override protected void map(LongWritable key, Text valueText, Mapper<LongWritable, Text, Text, Object>.Context context) throws java.io.IOException, InterruptedException { if (extraResources != null && context.getTaskAttemptID().getTaskID().getId() == 0) { // Add extra resources. // These end up in the 'all' resources file so get given a Doc ID // even if they don't occur in the data. for (String extraResource : extraResources) { context.write(new Text(extraResource), new Text("")); }//from w ww .ja va 2 s. co m extraResources = null; } if (!context.getInputSplit().equals(lastInputSplit)) { lastInputSplit = context.getInputSplit(); if (lastInputSplit instanceof FileSplit) { FileSplit fileSplit = (FileSplit) lastInputSplit; LOG.info("Current FileSplit " + fileSplit.getPath().toString() + " start(length) bytes " + fileSplit.getStart() + "(" + fileSplit.getLength() + ")"); } else { LOG.info("Current InputSplit " + lastInputSplit.toString()); } } String value = valueText.toString().trim(); if (value.isEmpty()) { return; } Node[] nodes; try { nodes = NxParser.parseNodes(value); } catch (ParseException e) { // NxParser 1.2.2 has problems with typed literals like: // "27"^^<int uri>. This is fixed in 1.2.3 context.getCounter(Counters.NX_PARSER_EXCEPTION).increment(1l); String s = value.replaceAll("\\^\\^<[^>]+>", ""); try { nodes = NxParser.parseNodes(s); LOG.info("Only parsed after remove of literal types:" + value); } catch (ParseException e1) { context.getCounter(Counters.NX_PARSER_RETRY_EXCEPTION).increment(1l); LOG.info("Failed parsing even after remove of literal types:" + value); return; } } if (nodes.length < 3) { context.getCounter(Counters.SHORT_TUPLE).increment(1l); LOG.info("Line parsed with less than 3 nodes at position" + key.toString()); return; } if (nodes.length > MAX_NODES) { context.getCounter(Counters.LONG_TUPLE).increment(1l); LOG.info("Line parsed with more than " + MAX_NODES + " nodes at position" + key.toString()); return; } for (TupleElementName name : TupleElementName.values()) { TupleElement element = tuple.getElement(name); if (nodes.length > name.ordinal()) { Node node = nodes[name.ordinal()]; String text = node.toString(); if (text.length() > 5000) { System.out.println("Long tuple element " + name.name() + ". Length:" + text.length() + " starting with " + text.substring(0, 100)); context.getCounter(Counters.LONG_TUPLE_ELEMENT).increment(1); return; } element.type = TupleElement.Type.valueOf(node.getClass().getSimpleName().toUpperCase()); if (element.type == TupleElement.Type.RESOURCE) { try { new URI(text); } catch (URISyntaxException e) { context.getCounter(Counters.INVALID_RESOURCE).increment(1l); LOG.info("Bad resource near position " + key.toString()); return; } } element.text = text; element.n3 = node.toN3(); } else { element.type = null; element.text = null; element.n3 = null; } } if (filter != null) { if (!filter.filter(tuple)) { // Skip tuple. return; } } predicateObjectContextDot.setLength(0); if (!tuple.subject.isOfType(TupleElement.Type.RESOURCE, TupleElement.Type.BNODE)) { context.getCounter(Counters.UNEXPECTED_SUBJECT_TYPE).increment(1l); return; } Text subject = new Text(tuple.subject.text); if (!tuple.predicate.isOfType(TupleElement.Type.RESOURCE)) { context.getCounter(Counters.UNEXPECTED_PREDICATE_TYPE).increment(1l); return; } context.write(new Text(tuple.predicate.text), new Text(TupleElementName.PREDICATE.name())); predicateObjectContextDot.append(tuple.predicate.n3); if (tuple.object.isOfType(TupleElement.Type.RESOURCE, TupleElement.Type.BNODE)) { context.write(new Text(tuple.object.text), new Text(TupleElementName.OBJECT.name())); } predicateObjectContextDot.append(' '); predicateObjectContextDot.append(tuple.object.n3); if (includeContexts && tuple.context.text != null) { if (tuple.context.isOfType(TupleElement.Type.RESOURCE)) { context.write(new Text(tuple.context.text), new Text(TupleElementName.CONTEXT.name())); predicateObjectContextDot.append(' '); predicateObjectContextDot.append(tuple.context.n3); } else { context.getCounter(Counters.UNEXPECTED_CONTEXT_TYPE).increment(1l); } } predicateObjectContextDot.append(" ."); if (predicateObjectContextDot.length() > 10000) { System.out.println("Long tuple. Length:" + predicateObjectContextDot.length() + " starting with " + predicateObjectContextDot.substring(0, 100)); context.getCounter(Counters.LONG_TUPLES).increment(1); } else { // Write subject with predicate, object, context as value context.write(subject, new Text(predicateObjectContextDot.toString())); } }
From source file:diamondmapreduce.DiamondMapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //get query and database name from mapreduce driver Configuration conf = context.getConfiguration(); String query = conf.get(DiamondMapReduce.QUERY); String dataBase = conf.get(DiamondMapReduce.DATABASE); String[] args = conf.getStrings("DIAMOND-arguments"); //write key-value pair to local tmp WriteKeyValueToTemp.write(key.toString(), value.toString()); //use runtime to execute alignment, intermediate binary files are stored in local tmp DiamondAlignment.align(this.diamond, this.localDB, key.toString(), args, conf); //view the binary files to tabular output file, view output will be streammized into HDFS // DiamondView.view(this.diamond, key.toString(), conf); //delete all intermediate files DeleteIntermediateFiles.deleteFiles(key.toString()); context.write(new Text("key"), new Text(key.toString())); }
From source file:edu.cmu.cs.in.hadoop.HoopInvertedListMapper.java
License:Open Source License
/** * /* w w w . j a v a 2 s . c o m*/ */ public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { debug("map ()"); if (HoopLink.metrics != null) { mapperMarker = new HoopPerformanceMeasure(); mapperMarker.setMarker("Mapper"); HoopLink.metrics.getDataSet().add(mapperMarker); } if (value == null) { debug("Internal error: value is null"); return; } String line = value.toString(); // We assume here we're getting one file at a time HoopDocumentParser parser = new HoopDocumentParser(); //parser.setDocID(key.toString()); parser.setKey(key.get()); parser.setIncludePositions(true); parser.loadDocumentFromData(line); // Tokenization happens here List<String> tokens = parser.getTokens(); for (int i = 0; i < tokens.size(); i++) { HoopToken token = new HoopToken(tokens.get(i)); StringBuffer formatted = new StringBuffer(); formatted.append(key.get()); formatted.append(":"); formatted.append(token.getPosition().toString()); //word.set(token.getValue()+":"+key.toString()); // We need this for the partitioner and reducers word.set(token.getValue() + ":" + partitioner.getPartition(new Text("key:" + key.toString()), new Text("undef"), partitioner.getNrPartitions())); // We need this for the partitioner and reducers output.collect(word, new Text(formatted.toString())); } debug("map (" + tokens.size() + " tokens) done for key: " + key.toString()); if (mapperMarker != null) { //mapperMarker.getMarkerRaw (); mapperMarker.closeMarker(); } }
From source file:edu.cuhk.hccl.hadoop.TripAdvisorMapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { hotelID = key.toString(); String[] lines = value.toString().split("\n"); StringBuilder buffer = new StringBuilder(); Text fiveLines = new Text(); for (int i = 1; i <= lines.length; i++) { buffer.append(lines[i - 1] + "\n"); if (i % NUM_LINES == 0) { fiveLines.set(buffer.toString()); DataRecord record = parseDataRecord(fiveLines); if (record != null) processRecord(record, context); buffer.setLength(0);/*from w ww .java 2 s .c o m*/ ; } } // Report progress context.getCounter(Counters.INPUT_REVIEWS).increment(1); }
From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.ExpressionRecordWriter.java
License:LGPL
@Override public synchronized void write(final Text key, final LongWritable value) throws IOException, InterruptedException { this.context.getCounter(COUNTERS_GROUP, INPUT_ENTRIES).increment(1); if (value == null) { return;/*from w w w. j a v a2 s. c om*/ } this.out.write(key.getBytes(), 0, key.getLength()); this.out.write(separator); this.out.write(value.toString().getBytes(StandardCharsets.UTF_8)); this.out.write(newline); this.context.getCounter(COUNTERS_GROUP, ENTRIES_WRITTEN).increment(1); }