Example usage for org.apache.hadoop.io LongWritable get

List of usage examples for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get() 

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:mlbench.bayes.BayesUtils.java

License:Apache License

static Pair<Long[], List<Path>> createDictionaryChunks(Path featureCountPath, Path dictionaryPathBase,
        Configuration baseConf, int chunkSizeInMegabytes) throws IOException {
    List<Path> chunkPaths = Lists.newArrayList();
    Configuration conf = new Configuration(baseConf);

    FileSystem fs = FileSystem.get(featureCountPath.toUri(), conf);

    long chunkSizeLimit = chunkSizeInMegabytes * 1024L * 1024L;
    int chunkIndex = 0;
    Path chunkPath = new Path(dictionaryPathBase, FREQUENCY_FILE + chunkIndex);
    chunkPaths.add(chunkPath);/* w w  w.  j  ava 2s .co  m*/
    SequenceFile.Writer freqWriter = new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class,
            LongWritable.class);

    try {
        long currentChunkSize = 0;
        long featureCount = 0;
        long vectorCount = Long.MAX_VALUE;
        Path filesPattern = new Path(featureCountPath, OUTPUT_FILES_PATTERN);
        for (Pair<IntWritable, LongWritable> record : new SequenceFileDirIterable<IntWritable, LongWritable>(
                filesPattern, PathType.GLOB, null, null, true, conf)) {

            if (currentChunkSize > chunkSizeLimit) {
                Closeables.close(freqWriter, false);
                chunkIndex++;

                chunkPath = new Path(dictionaryPathBase, FREQUENCY_FILE + chunkIndex);
                chunkPaths.add(chunkPath);

                freqWriter = new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class,
                        LongWritable.class);
                currentChunkSize = 0;
            }

            int fieldSize = SEQUENCEFILE_BYTE_OVERHEAD + Integer.SIZE / 8 + Long.SIZE / 8;
            currentChunkSize += fieldSize;
            IntWritable key = record.getFirst();
            LongWritable value = record.getSecond();
            if (key.get() >= 0) {
                freqWriter.append(key, value);
            } else if (key.get() == -1) {
                vectorCount = value.get();
            }
            featureCount = Math.max(key.get(), featureCount);

        }
        featureCount++;
        Long[] counts = { featureCount, vectorCount };
        return new Pair<Long[], List<Path>>(counts, chunkPaths);
    } finally {
        Closeables.close(freqWriter, false);
    }
}

From source file:msc.fall2015.stock.kmeans.hbase.mapreduce.pwd.SWGReduce.java

License:Open Source License

public void reduce(LongWritable key, Iterable<SWGWritable> values, Context context) throws IOException {
    long startTime = System.nanoTime();
    Configuration conf = context.getConfiguration();

    long blockSize = conf.getLong(Constants.BLOCK_SIZE, 1000);
    long noOfSequences = conf.getLong(Constants.NO_OF_SEQUENCES, blockSize * 10);
    long noOfDivisions = conf.getLong(Constants.NO_OF_DIVISIONS, noOfSequences / blockSize);
    boolean weightEnabled = conf.getBoolean(Constants.WEIGHT_ENABLED, false);

    // to handle the edge blocks with lesser number of sequences
    int row = (int) (key.get() * blockSize);
    int currentRowBlockSize = (int) blockSize;
    if ((row + blockSize) > (noOfSequences)) {
        currentRowBlockSize = (int) (noOfSequences - row);
    }//from  w  ww  .  j av a  2s.  c  o  m

    // TODO do this in the byte level
    short[][] alignments = new short[(int) currentRowBlockSize][(int) noOfSequences];

    for (SWGWritable alignmentWritable : values) {
        System.out.println("key " + key.get() + " col " + alignmentWritable.getColumnBlock() + " row "
                + alignmentWritable.getRowBlock() + " blocksize " + blockSize);
        DataInput in = alignmentWritable.getDataInput();
        int column = (int) (alignmentWritable.getColumnBlock() * blockSize);

        // to handle the edge blocks with lesser number of sequences
        int currentColumnBlockSize = (int) blockSize;
        if ((column + blockSize) > (noOfSequences)) {
            currentColumnBlockSize = (int) (noOfSequences - column);
        }

        for (int i = 0; i < currentRowBlockSize; i++) {
            // byte[] b = new byte[currentBlockSize /* * 2*/];
            //            System.out.println("row block "+i+"  currentBlockSize"+currentRowBlockSize);
            for (int j = 0; j < currentColumnBlockSize; j++) {
                short readShort = in.readShort();
                //               System.out.print(readShort+" ");
                alignments[i][column + j] = readShort;
            }
            //            System.out.println();
            //TODO try to do the above using byte[] copy 
            // in.readFully(b);
            // System.out.println(new String(b));
            // System.arraycopy(b, 0, alignments[i], (column /* * 2*/),
            // currentBlockSize);
        }
    }

    // retrieve the output dir
    String outDir = context.getConfiguration().get("mapred.output.dir");

    FileSystem fs = FileSystem.get(conf);
    // out dir is created in the main driver.
    String childName = "rowblock_cor_" + key.get() + "_blockSize_" + blockSize;
    if (weightEnabled) {
        childName = "rowblock_weight_" + key.get() + "_blockSize_" + blockSize;
    }
    Path outFilePart = new Path(outDir, childName);
    writeOutFile(alignments, fs, outFilePart);
    System.out.println("Reduce Processing Time: " + ((System.nanoTime() - startTime) / 1000000));
}

From source file:net.jarcec.sqoop.data.gen.mr.GeneratorMapper.java

License:Apache License

@Override
protected void map(LongWritable key, LongWritable value, Context context)
        throws IOException, InterruptedException {
    long from = key.get();
    long to = value.get();

    random = new SecureRandom();
    decimal = new DecimalFormat("###.###");
    date = new SimpleDateFormat("yyyy-MM-dd");
    time = new SimpleDateFormat("HH:mm:ss");
    datetime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    String[] types = context.getConfiguration().get(Constants.TYPES).split(",");
    String[] values = new String[types.length];

    for (long i = from; i < to; i++) {
        context.progress();/*from w ww.  j  a  va2  s. com*/

        int y = 0;
        for (String type : types) {

            if ("id".equals(type)) {
                values[y] = String.valueOf(i);
            } else if ("s50".equals(type)) {
                values[y] = generateString(50);
            } else if ("i".equals(type)) {
                values[y] = generateInteger();
            } else if ("f".equals(type)) {
                values[y] = generateFloat(250, 31);
            } else if ("d".equals(type)) {
                values[y] = generateDate();
            } else if ("t".equals(type)) {
                values[y] = generateTime();
            } else if ("dt".equals(type)) {
                values[y] = generateDateTime();
            } else if ("s255".equals(type)) {
                values[y] = generateString(255);
            } else {
                throw new RuntimeException("Unknown type: " + type);
            }

            y++;
        }

        context.write(new Text(StringUtils.join(values, ",")), NullWritable.get());
    }
}

From source file:net.petrabarus.hiveudfs.GeoIP.java

License:Open Source License

/**
 * Evaluate the UDF with the arguments./*  w w w  . j av a 2s . c om*/
 *
 * @param arguments The arguments as DeferedObject, use
 * DeferedObject.get() to get the actual argument Object. The Objects
 * can be inspected by the ObjectInspectors passed in the initialize
 * call.
 * @return The return value.
 */
@Override
public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException {
    assert (arguments.length == 3);
    LongWritable ipArg = (LongWritable) converters[0].convert(arguments[0].get());
    long ip = ipArg.get();
    String attributeName = ((Text) converters[1].convert(arguments[1].get())).toString();
    String databaseName = ((Text) converters[2].convert(arguments[2].get())).toString();
    LookupService lookupService;
    //Just in case there are more than one database filename attached.
    //We will just assume that two file with same filename are identical.
    if (!databases.containsKey(databaseName)) {
        File file = new File(databaseName);
        if (!file.exists()) {
            throw new HiveException(databaseName + " does not exist");
        }
        try {
            lookupService = new LookupService(file,
                    LookupService.GEOIP_MEMORY_CACHE | LookupService.GEOIP_CHECK_CACHE);
            databases.put(databaseName, lookupService);
        } catch (IOException ex) {
            throw new HiveException(ex);
        }
    } else {
        lookupService = databases.get(databaseName);
    }
    String retVal = "";
    try {
        //Let's do it baby!
        Location location = lookupService.getLocation(ip);
        if (attributeName.equals(COUNTRY_NAME)) {
            retVal = location.countryName;
        } else if (attributeName.equals(COUNTRY_CODE)) {
            retVal = location.countryCode;
        } else if (attributeName.equals(AREA_CODE)) {
            retVal = location.area_code + "";
        } else if (attributeName.equals(CITY)) {
            retVal = location.city + "";
        } else if (attributeName.equals(DMA_CODE)) {
            retVal = location.dma_code + "";
        } else if (attributeName.equals(LATITUDE)) {
            retVal = location.latitude + "";
        } else if (attributeName.equals(LONGITUDE)) {
            retVal = location.longitude + "";
        } else if (attributeName.equals(METRO_CODE)) {
            retVal = location.metro_code + "";
        } else if (attributeName.equals(POSTAL_CODE)) {
            retVal = location.postalCode;
        } else if (attributeName.equals(REGION)) {
            retVal = location.region;
        } else if (attributeName.equals(REGION_NAME)) {
            retVal = RegionName.regionNameByCode(location.countryCode, location.region);
        } else if (attributeName.equals(ORG)) {
            retVal = lookupService.getOrg(ip);
        } else if (attributeName.equals(ID)) {
            retVal = lookupService.getID(ip) + "";
        } else if (attributeName.equals(TIMEZONE)) {
            retVal = TimeZone.timeZoneByCountryAndRegion(location.countryCode, location.region);
        }

    } catch (Exception ex) {
        //This will be useful if you don't have a complete database file.
        return null;
    }
    if (retVal == null) {
        return null;
    }
    return new Text(retVal);
}

From source file:net.petrabarus.hiveudfs.LongToIP.java

License:Open Source License

/**
 * Evaluate the UDF with the arguments.//from  w w  w  .  j a va2  s  .com
 *
 * @param arguments The arguments as DeferedObject, use
 * DeferedObject.get() to get the actual argument Object. The Objects
 * can be inspected by the ObjectInspectors passed in the initialize
 * call.
 * @return The return value.
 */
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    assert (arguments.length == 1);
    if (arguments[0].get() == null) {
        return null;
    }
    LongWritable iplong = (LongWritable) converter.convert(arguments[0].get());
    long ip = iplong.get();
    Text t = new Text(InetAddrHelper.longToIP(ip));
    return t;
}

From source file:nl.gridline.free.taalserver.TokenizeReduce.java

License:Apache License

@Override
protected void reduce(WordVector inKey, Iterable<LongWritable> values, Context context)
        throws IOException, InterruptedException {
    for (LongWritable value : values) {
        final long docId = value.get();
        List<String> words = inKey.getAsList();
        LOG.info("words: " + words.size());
        LongWritable outKey = new LongWritable(docId);
        for (String w : words) {
            word.set(w);//from   www .  ja  v a 2 s.  co  m
            context.write(outKey, word);
        }
    }
    context.progress();
}

From source file:nl.gridline.free.taalserver.WordCountMap.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    // tuple: (word,docid)
    context.write(new WordDocId(value.toString(), key.get()), ONE);
    context.progress();/*from  w  ww  .  ja va  2s  . com*/
}

From source file:nl.gridline.free.taalserver.WordCountPerDocReduce.java

License:Apache License

@Override
protected void reduce(LongWritable keyIn, Iterable<WordCount> values, Context context)
        throws IOException, InterruptedException {
    // IN: docid => ((word1,wc1),(word2,wc2)...)

    // OUT: word,docid => wordCount, wordsPerDoc

    // create local list:
    List<WordCount> wc = new ArrayList<WordCount>();
    for (WordCount v : values) {
        wc.add(new WordCount(v.getWord(), v.getCount()));
    }//from   w  w  w  .ja  v a  2s .  com

    final int wordsPerDoc = wc.size();
    final long docId = keyIn.get();

    for (WordCount v : wc) {
        context.write(new WordDocId(v.getWord(), docId), new WordCountWordsPerDoc(v.getCount(), wordsPerDoc));
    }
    context.progress();
}

From source file:nl.gridline.free.taalserver.WordCountReduce.java

License:Apache License

@Override
protected void reduce(WordDocId keyIn, Iterable<LongWritable> values, Context context)
        throws IOException, InterruptedException {
    int count = 0;

    for (LongWritable v : values) {
        count += (int) v.get();
    }/*from  w w  w.  java 2s .  com*/

    context.write(keyIn, new IntWritable(count));
    context.progress();
}

From source file:nl.gridline.zieook.inx.czp.CZPMap.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    // process the input:
    String object = value.toString();

    LOG.info("line #" + key.get());

    CollectionItem item = null;/*w ww  . j a  v  a  2s .  co m*/
    try {
        item = czpParser.getCZPfromXML(object);
    } catch (JAXBException e) {
        LOG.error("failed to parse collection item: " + key.toString(), e);
        LOG.error("object:\n" + object + "\n");
    } catch (ParseException e) {
        LOG.error("failed to parse key" + key.toString(), e);
        LOG.error("object:\n" + object + "\n");
    }

    LOG.info("writing: " + item.getId());
    if (item != null) {
        item.setCp(cp);

        Long itemid = item.getId();
        if (itemid != null) {
            // create a HBase PUT object:
            Put put = HBasePUTFactory.getCollectionItemCompl(collection, itemid.longValue(), object, item);
            // reuse the key:

            // write the result:
            context.write(key, put);
        } else {
            LOG.error("collection item has no identifier: " + item.toString() + "' ");
        }

    }
    context.progress();
}