List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:mlbench.bayes.BayesUtils.java
License:Apache License
static Pair<Long[], List<Path>> createDictionaryChunks(Path featureCountPath, Path dictionaryPathBase, Configuration baseConf, int chunkSizeInMegabytes) throws IOException { List<Path> chunkPaths = Lists.newArrayList(); Configuration conf = new Configuration(baseConf); FileSystem fs = FileSystem.get(featureCountPath.toUri(), conf); long chunkSizeLimit = chunkSizeInMegabytes * 1024L * 1024L; int chunkIndex = 0; Path chunkPath = new Path(dictionaryPathBase, FREQUENCY_FILE + chunkIndex); chunkPaths.add(chunkPath);/* w w w. j ava 2s .co m*/ SequenceFile.Writer freqWriter = new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class, LongWritable.class); try { long currentChunkSize = 0; long featureCount = 0; long vectorCount = Long.MAX_VALUE; Path filesPattern = new Path(featureCountPath, OUTPUT_FILES_PATTERN); for (Pair<IntWritable, LongWritable> record : new SequenceFileDirIterable<IntWritable, LongWritable>( filesPattern, PathType.GLOB, null, null, true, conf)) { if (currentChunkSize > chunkSizeLimit) { Closeables.close(freqWriter, false); chunkIndex++; chunkPath = new Path(dictionaryPathBase, FREQUENCY_FILE + chunkIndex); chunkPaths.add(chunkPath); freqWriter = new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class, LongWritable.class); currentChunkSize = 0; } int fieldSize = SEQUENCEFILE_BYTE_OVERHEAD + Integer.SIZE / 8 + Long.SIZE / 8; currentChunkSize += fieldSize; IntWritable key = record.getFirst(); LongWritable value = record.getSecond(); if (key.get() >= 0) { freqWriter.append(key, value); } else if (key.get() == -1) { vectorCount = value.get(); } featureCount = Math.max(key.get(), featureCount); } featureCount++; Long[] counts = { featureCount, vectorCount }; return new Pair<Long[], List<Path>>(counts, chunkPaths); } finally { Closeables.close(freqWriter, false); } }
From source file:msc.fall2015.stock.kmeans.hbase.mapreduce.pwd.SWGReduce.java
License:Open Source License
public void reduce(LongWritable key, Iterable<SWGWritable> values, Context context) throws IOException { long startTime = System.nanoTime(); Configuration conf = context.getConfiguration(); long blockSize = conf.getLong(Constants.BLOCK_SIZE, 1000); long noOfSequences = conf.getLong(Constants.NO_OF_SEQUENCES, blockSize * 10); long noOfDivisions = conf.getLong(Constants.NO_OF_DIVISIONS, noOfSequences / blockSize); boolean weightEnabled = conf.getBoolean(Constants.WEIGHT_ENABLED, false); // to handle the edge blocks with lesser number of sequences int row = (int) (key.get() * blockSize); int currentRowBlockSize = (int) blockSize; if ((row + blockSize) > (noOfSequences)) { currentRowBlockSize = (int) (noOfSequences - row); }//from w ww . j av a 2s. c o m // TODO do this in the byte level short[][] alignments = new short[(int) currentRowBlockSize][(int) noOfSequences]; for (SWGWritable alignmentWritable : values) { System.out.println("key " + key.get() + " col " + alignmentWritable.getColumnBlock() + " row " + alignmentWritable.getRowBlock() + " blocksize " + blockSize); DataInput in = alignmentWritable.getDataInput(); int column = (int) (alignmentWritable.getColumnBlock() * blockSize); // to handle the edge blocks with lesser number of sequences int currentColumnBlockSize = (int) blockSize; if ((column + blockSize) > (noOfSequences)) { currentColumnBlockSize = (int) (noOfSequences - column); } for (int i = 0; i < currentRowBlockSize; i++) { // byte[] b = new byte[currentBlockSize /* * 2*/]; // System.out.println("row block "+i+" currentBlockSize"+currentRowBlockSize); for (int j = 0; j < currentColumnBlockSize; j++) { short readShort = in.readShort(); // System.out.print(readShort+" "); alignments[i][column + j] = readShort; } // System.out.println(); //TODO try to do the above using byte[] copy // in.readFully(b); // System.out.println(new String(b)); // System.arraycopy(b, 0, alignments[i], (column /* * 2*/), // currentBlockSize); } } // retrieve the output dir String outDir = context.getConfiguration().get("mapred.output.dir"); FileSystem fs = FileSystem.get(conf); // out dir is created in the main driver. String childName = "rowblock_cor_" + key.get() + "_blockSize_" + blockSize; if (weightEnabled) { childName = "rowblock_weight_" + key.get() + "_blockSize_" + blockSize; } Path outFilePart = new Path(outDir, childName); writeOutFile(alignments, fs, outFilePart); System.out.println("Reduce Processing Time: " + ((System.nanoTime() - startTime) / 1000000)); }
From source file:net.jarcec.sqoop.data.gen.mr.GeneratorMapper.java
License:Apache License
@Override protected void map(LongWritable key, LongWritable value, Context context) throws IOException, InterruptedException { long from = key.get(); long to = value.get(); random = new SecureRandom(); decimal = new DecimalFormat("###.###"); date = new SimpleDateFormat("yyyy-MM-dd"); time = new SimpleDateFormat("HH:mm:ss"); datetime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String[] types = context.getConfiguration().get(Constants.TYPES).split(","); String[] values = new String[types.length]; for (long i = from; i < to; i++) { context.progress();/*from w ww. j a va2 s. com*/ int y = 0; for (String type : types) { if ("id".equals(type)) { values[y] = String.valueOf(i); } else if ("s50".equals(type)) { values[y] = generateString(50); } else if ("i".equals(type)) { values[y] = generateInteger(); } else if ("f".equals(type)) { values[y] = generateFloat(250, 31); } else if ("d".equals(type)) { values[y] = generateDate(); } else if ("t".equals(type)) { values[y] = generateTime(); } else if ("dt".equals(type)) { values[y] = generateDateTime(); } else if ("s255".equals(type)) { values[y] = generateString(255); } else { throw new RuntimeException("Unknown type: " + type); } y++; } context.write(new Text(StringUtils.join(values, ",")), NullWritable.get()); } }
From source file:net.petrabarus.hiveudfs.GeoIP.java
License:Open Source License
/** * Evaluate the UDF with the arguments./* w w w . j av a 2s . c om*/ * * @param arguments The arguments as DeferedObject, use * DeferedObject.get() to get the actual argument Object. The Objects * can be inspected by the ObjectInspectors passed in the initialize * call. * @return The return value. */ @Override public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException { assert (arguments.length == 3); LongWritable ipArg = (LongWritable) converters[0].convert(arguments[0].get()); long ip = ipArg.get(); String attributeName = ((Text) converters[1].convert(arguments[1].get())).toString(); String databaseName = ((Text) converters[2].convert(arguments[2].get())).toString(); LookupService lookupService; //Just in case there are more than one database filename attached. //We will just assume that two file with same filename are identical. if (!databases.containsKey(databaseName)) { File file = new File(databaseName); if (!file.exists()) { throw new HiveException(databaseName + " does not exist"); } try { lookupService = new LookupService(file, LookupService.GEOIP_MEMORY_CACHE | LookupService.GEOIP_CHECK_CACHE); databases.put(databaseName, lookupService); } catch (IOException ex) { throw new HiveException(ex); } } else { lookupService = databases.get(databaseName); } String retVal = ""; try { //Let's do it baby! Location location = lookupService.getLocation(ip); if (attributeName.equals(COUNTRY_NAME)) { retVal = location.countryName; } else if (attributeName.equals(COUNTRY_CODE)) { retVal = location.countryCode; } else if (attributeName.equals(AREA_CODE)) { retVal = location.area_code + ""; } else if (attributeName.equals(CITY)) { retVal = location.city + ""; } else if (attributeName.equals(DMA_CODE)) { retVal = location.dma_code + ""; } else if (attributeName.equals(LATITUDE)) { retVal = location.latitude + ""; } else if (attributeName.equals(LONGITUDE)) { retVal = location.longitude + ""; } else if (attributeName.equals(METRO_CODE)) { retVal = location.metro_code + ""; } else if (attributeName.equals(POSTAL_CODE)) { retVal = location.postalCode; } else if (attributeName.equals(REGION)) { retVal = location.region; } else if (attributeName.equals(REGION_NAME)) { retVal = RegionName.regionNameByCode(location.countryCode, location.region); } else if (attributeName.equals(ORG)) { retVal = lookupService.getOrg(ip); } else if (attributeName.equals(ID)) { retVal = lookupService.getID(ip) + ""; } else if (attributeName.equals(TIMEZONE)) { retVal = TimeZone.timeZoneByCountryAndRegion(location.countryCode, location.region); } } catch (Exception ex) { //This will be useful if you don't have a complete database file. return null; } if (retVal == null) { return null; } return new Text(retVal); }
From source file:net.petrabarus.hiveudfs.LongToIP.java
License:Open Source License
/** * Evaluate the UDF with the arguments.//from w w w . j a va2 s .com * * @param arguments The arguments as DeferedObject, use * DeferedObject.get() to get the actual argument Object. The Objects * can be inspected by the ObjectInspectors passed in the initialize * call. * @return The return value. */ @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { assert (arguments.length == 1); if (arguments[0].get() == null) { return null; } LongWritable iplong = (LongWritable) converter.convert(arguments[0].get()); long ip = iplong.get(); Text t = new Text(InetAddrHelper.longToIP(ip)); return t; }
From source file:nl.gridline.free.taalserver.TokenizeReduce.java
License:Apache License
@Override protected void reduce(WordVector inKey, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { for (LongWritable value : values) { final long docId = value.get(); List<String> words = inKey.getAsList(); LOG.info("words: " + words.size()); LongWritable outKey = new LongWritable(docId); for (String w : words) { word.set(w);//from www . ja v a 2 s. co m context.write(outKey, word); } } context.progress(); }
From source file:nl.gridline.free.taalserver.WordCountMap.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // tuple: (word,docid) context.write(new WordDocId(value.toString(), key.get()), ONE); context.progress();/*from w ww . ja va 2s . com*/ }
From source file:nl.gridline.free.taalserver.WordCountPerDocReduce.java
License:Apache License
@Override protected void reduce(LongWritable keyIn, Iterable<WordCount> values, Context context) throws IOException, InterruptedException { // IN: docid => ((word1,wc1),(word2,wc2)...) // OUT: word,docid => wordCount, wordsPerDoc // create local list: List<WordCount> wc = new ArrayList<WordCount>(); for (WordCount v : values) { wc.add(new WordCount(v.getWord(), v.getCount())); }//from w w w .ja v a 2s . com final int wordsPerDoc = wc.size(); final long docId = keyIn.get(); for (WordCount v : wc) { context.write(new WordDocId(v.getWord(), docId), new WordCountWordsPerDoc(v.getCount(), wordsPerDoc)); } context.progress(); }
From source file:nl.gridline.free.taalserver.WordCountReduce.java
License:Apache License
@Override protected void reduce(WordDocId keyIn, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { int count = 0; for (LongWritable v : values) { count += (int) v.get(); }/*from w w w. java 2s . com*/ context.write(keyIn, new IntWritable(count)); context.progress(); }
From source file:nl.gridline.zieook.inx.czp.CZPMap.java
License:Apache License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // process the input: String object = value.toString(); LOG.info("line #" + key.get()); CollectionItem item = null;/*w ww . j a v a 2s . co m*/ try { item = czpParser.getCZPfromXML(object); } catch (JAXBException e) { LOG.error("failed to parse collection item: " + key.toString(), e); LOG.error("object:\n" + object + "\n"); } catch (ParseException e) { LOG.error("failed to parse key" + key.toString(), e); LOG.error("object:\n" + object + "\n"); } LOG.info("writing: " + item.getId()); if (item != null) { item.setCp(cp); Long itemid = item.getId(); if (itemid != null) { // create a HBase PUT object: Put put = HBasePUTFactory.getCollectionItemCompl(collection, itemid.longValue(), object, item); // reuse the key: // write the result: context.write(key, put); } else { LOG.error("collection item has no identifier: " + item.toString() + "' "); } } context.progress(); }