Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java

License:Apache License

@Override
public void bsp(BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer)
        throws IOException, SyncException, InterruptedException {

    long startTime = System.currentTimeMillis();

    // Fetch inputs
    collectInput(peer);/*from  www  .j a va  2s.  c om*/

    // Sync tasks after input has been collected
    peer.sync();

    // DEBUG
    if (m_isDebuggingEnabled) {
        m_logger.writeChars("collected: " + this.m_usersMatrix.size() + " users, " + this.m_itemsMatrix.size()
                + " items, " + this.m_preferences.size() + " preferences\n");

        m_logger.writeChars("preferences: length: " + this.m_preferences.size() + "\n");
        for (Preference<Long, Long> p : this.m_preferences) {
            m_logger.writeChars("userId: '" + p.getUserId() + "' itemId: '" + p.getItemId() + "' value: '"
                    + p.getValue().get() + "'\n");
        }
        m_logger.writeChars("indexes: length: " + this.m_indexes.size() + " indexes: "
                + Arrays.toString(this.m_indexes.toArray()) + "\n");

        m_logger.writeChars("usersMatrix: length: " + this.m_usersMatrix.size() + "\n");
        for (Map.Entry<Long, PipesVectorWritable> e : this.m_usersMatrix.entrySet()) {
            m_logger.writeChars("key: '" + e.getKey() + "' value: '" + e.getValue().toString() + "'\n");
        }
        m_logger.writeChars("itemsMatrix: length: " + this.m_itemsMatrix.size() + "\n");
        for (Map.Entry<Long, PipesVectorWritable> e : this.m_itemsMatrix.entrySet()) {
            m_logger.writeChars("key: '" + e.getKey() + "' value: '" + e.getValue().toString() + "'\n");
        }
    }

    // calculation steps
    for (int i = 0; i < m_maxIterations; i++) {

        computeAllValues();

        if ((i + 1) % m_skipCount == 0) {
            normalizeWithBroadcastingValues(peer);
        }
    }

    // save users
    if (m_isDebuggingEnabled) {
        m_logger.writeChars("saving " + m_usersMatrix.size() + " users\n");
    }
    for (Map.Entry<Long, PipesVectorWritable> user : m_usersMatrix.entrySet()) {
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("user: " + user.getKey() + " vector: " + user.getValue().getVector() + "\n");
        }
        peer.write(new Text("u" + user.getKey()), user.getValue());
    }
    // save items
    // TODO duplicated item saves, but one item may belong to one task only
    if (m_isDebuggingEnabled) {
        m_logger.writeChars("saving " + m_itemsMatrix.size() + " items\n");
    }
    for (Map.Entry<Long, PipesVectorWritable> item : m_itemsMatrix.entrySet()) {
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("item: " + item.getKey() + " vector: " + item.getValue().getVector() + "\n");
        }
        peer.write(new Text("i" + item.getKey()), item.getValue());
    }

    this.m_bspTimeCpu = System.currentTimeMillis() - startTime;

    // Logging
    if (m_isDebuggingEnabled) {
        m_logger.writeChars("OnlineCFTrainHybridBSP,setupTimeCpu=" + this.m_setupTimeCpu + " ms\n");
        m_logger.writeChars(
                "OnlineCFTrainHybridBSP,setupTimeCpu=" + (this.m_setupTimeCpu / 1000.0) + " seconds\n");
        m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeCpu=" + this.m_bspTimeCpu + " ms\n");
        m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeCpu=" + (this.m_bspTimeCpu / 1000.0) + " seconds\n");
        m_logger.close();
    }
    LOG.info("OnlineCFTrainHybridBSP,setupTimeCpu=" + this.m_setupTimeCpu + " ms");
    LOG.info("OnlineCFTrainHybridBSP,setupTimeCpu=" + (this.m_setupTimeCpu / 1000.0) + " seconds");
    LOG.info("OnlineCFTrainHybridBSP,bspTimeCpu=" + this.m_bspTimeCpu + " ms");
    LOG.info("OnlineCFTrainHybridBSP,bspTimeCpu=" + (this.m_bspTimeCpu / 1000.0) + " seconds");

}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java

License:Apache License

@Override
public void bspGpu(BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer,
        Rootbeer rootbeer) throws IOException, SyncException, InterruptedException {

    long startTime = System.currentTimeMillis();

    // **********************************************************************
    // Collect inputs
    // **********************************************************************
    Map<Long, HashMap<Long, Double>> preferencesMap = new HashMap<Long, HashMap<Long, Double>>();
    Map<Long, Long> userRatingCount = new HashMap<Long, Long>();
    Map<Long, Long> itemRatingCount = new HashMap<Long, Long>();

    LongWritable key = new LongWritable();
    PipesVectorWritable value = new PipesVectorWritable();
    int counter = 0;

    while (peer.readNext(key, value)) {
        // parse as <k:userId, v:(itemId, score)>
        long userId = key.get();
        long itemId = (long) value.getVector().get(0);
        double score = value.getVector().get(1);

        // Add User vector
        if (m_usersMatrix.containsKey(userId) == false) {
            DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank);
            for (int i = 0; i < m_matrixRank; i++) {
                vals.set(i, m_rand.nextDouble());
            }//from   www  . ja  v  a 2 s  .co m
            m_usersMatrix.put(userId, new PipesVectorWritable(vals));
            userRatingCount.put(userId, 1l);
        } else {
            userRatingCount.put(userId, userRatingCount.get(userId) + 1);
        }

        // Add Item vector
        if (m_itemsMatrix.containsKey(itemId) == false) {
            DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank);
            for (int i = 0; i < m_matrixRank; i++) {
                vals.set(i, m_rand.nextDouble());
            }
            m_itemsMatrix.put(itemId, new PipesVectorWritable(vals));
            itemRatingCount.put(itemId, 1l);
        } else {
            itemRatingCount.put(itemId, itemRatingCount.get(itemId) + 1);
        }

        // Add preference
        m_preferences.add(new Preference<Long, Long>(userId, itemId, score));

        if (preferencesMap.containsKey(userId) == false) {
            HashMap<Long, Double> map = new HashMap<Long, Double>();
            map.put(itemId, score);
            preferencesMap.put(userId, map);
        } else {
            preferencesMap.get(userId).put(itemId, score);
        }

        // Add counter
        m_indexes.add(counter);
        counter++;
    }

    // DEBUG
    if (m_isDebuggingEnabled) {
        m_logger.writeChars("collected: " + m_usersMatrix.size() + " users, " + m_itemsMatrix.size()
                + " items, " + m_preferences.size() + " preferences\n");
    }

    // **********************************************************************
    // Prepare input for GPU
    // **********************************************************************
    Map<Long, Long> sortedUserRatingCount = sortByValues(userRatingCount);
    Map<Long, Long> sortedItemRatingCount = sortByValues(itemRatingCount);

    // Convert preferences to userItemMatrix double[][]
    // sortedUserRatingCount.size() x sortedItemRatingCount.size()
    double[][] userItemMatrix = new double[m_usersMatrix.size()][m_itemsMatrix.size()];

    // Mappers
    Map<Long, Integer> userItemMatrixUserRowMap = new HashMap<Long, Integer>();
    GpuIntegerMap userItemMatrixItemColMap = new GpuIntegerMap(m_itemsMatrix.size() + 1); // +1 because we are starting with 0
    GpuIntegerMap userItemMatrixColItemMap = new GpuIntegerMap(m_itemsMatrix.size() + 1); // +1 because we are starting with 0

    // Create userHelper to int[][]
    // userHelper[userId][0] = userRatingCount
    // userHelper[userId][1] = colId of userItemMatrix
    int[][] userHelper = null;
    // Create itemHelper to int[][]
    // itemHelper[itemId][0] = itemRatingCount
    // itemHelper[userId][1] = rowId of userItemMatrix
    int[][] itemHelper = null;
    Map<Long, Integer> itemHelperId = new HashMap<Long, Integer>();

    // Debug
    if (m_isDebuggingEnabled) {
        m_logger.writeChars(
                "userItemMatrix: (m x n): " + m_usersMatrix.size() + " x " + m_itemsMatrix.size() + "\n");
    }

    int rowId = 0;
    for (Long userId : sortedUserRatingCount.keySet()) {

        // Map userId to rowId in userItemMatrixUserRowMap
        userItemMatrixUserRowMap.put(userId, rowId);

        // Setup userHelper
        if (userHelper == null) {
            // TODO sortedUserRatingCount.size()
            userHelper = new int[m_usersMatrix.size()][sortedUserRatingCount.get(userId).intValue() + 1];
        }
        userHelper[rowId][0] = sortedUserRatingCount.get(userId).intValue();

        int colId = 0;
        int userHelperId = 1;
        for (Long itemId : sortedItemRatingCount.keySet()) {

            // Map itemId to colId in userItemMatrixItemColMap
            if (rowId == 0) {
                userItemMatrixItemColMap.put(itemId.intValue(), colId);
                userItemMatrixColItemMap.put(colId, itemId.intValue());
            }

            // Setup itemHelper
            if (itemHelper == null) {
                // TODO sortedItemRatingCount.size()
                itemHelper = new int[m_itemsMatrix.size()][sortedItemRatingCount.get(itemId).intValue() + 1];
            }
            itemHelper[colId][0] = sortedItemRatingCount.get(itemId).intValue();

            if (preferencesMap.get(userId).containsKey(itemId)) {
                // Add userItemMatrix
                userItemMatrix[rowId][colId] = preferencesMap.get(userId).get(itemId);

                // Add userHelper
                userHelper[rowId][userHelperId] = colId;
                userHelperId++;

                // Add itemHelper
                if (itemHelperId.containsKey(itemId)) {
                    int idx = itemHelperId.get(itemId);
                    itemHelper[colId][idx] = rowId;
                    itemHelperId.put(itemId, idx + 1);
                } else {
                    itemHelper[colId][1] = rowId;
                    itemHelperId.put(itemId, 2);
                }

            }

            colId++;
        }

        // Debug userItemMatrix
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("userItemMatrix userId: " + userId + " row[" + rowId + "]: "
                    + Arrays.toString(userItemMatrix[rowId]) + " userRatings: "
                    + sortedUserRatingCount.get(userId) + "\n");
        }
        rowId++;
    }

    // Debug userHelper and itemHelper
    if (m_isDebuggingEnabled) {
        // TODO sortedUserRatingCount.size()
        for (int i = 0; i < m_usersMatrix.size(); i++) {
            m_logger.writeChars("userHelper row " + i + ": " + Arrays.toString(userHelper[i]) + "\n");
        }
        // TODO sortedItemRatingCount.size()
        for (int i = 0; i < m_itemsMatrix.size(); i++) {
            m_logger.writeChars("itemHelper row " + i + ": " + Arrays.toString(itemHelper[i]) + "\n");
        }
    }

    // Convert usersMatrix to double[][]
    double[][] userMatrix = new double[m_usersMatrix.size()][m_matrixRank];
    rowId = 0;
    if (m_isDebuggingEnabled) {
        m_logger.writeChars("userMatrix: length: " + m_usersMatrix.size() + "\n");
    }
    for (Long userId : sortedUserRatingCount.keySet()) {
        DoubleVector vector = m_usersMatrix.get(userId).getVector();
        for (int i = 0; i < m_matrixRank; i++) {
            userMatrix[rowId][i] = vector.get(i);
        }
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("userId: " + userId + " " + Arrays.toString(vector.toArray()) + "\n");
        }
        rowId++;
    }

    // Convert itemsMatrix to double[][]
    double[][] itemMatrix = new double[m_itemsMatrix.size()][m_matrixRank];
    rowId = 0;
    GpuIntegerMap counterMap = new GpuIntegerMap(m_itemsMatrix.size());
    if (m_isDebuggingEnabled) {
        m_logger.writeChars("itemMatrix: length: " + m_itemsMatrix.size() + "\n");
    }
    for (Long itemId : sortedItemRatingCount.keySet()) {
        counterMap.put(itemId.intValue(), 0);

        DoubleVector vector = m_itemsMatrix.get(itemId).getVector();
        for (int i = 0; i < m_matrixRank; i++) {
            itemMatrix[rowId][i] = vector.get(i);
        }
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("itemId: " + itemId + " " + Arrays.toString(vector.toArray()) + "\n");
        }
        rowId++;
    }

    // Sync tasks after input has been collected
    peer.sync();

    // **********************************************************************
    // Run GPU Kernels
    // **********************************************************************
    OnlineCFTrainHybridKernel kernel = new OnlineCFTrainHybridKernel(userItemMatrix, userHelper, itemHelper,
            userItemMatrixItemColMap, userItemMatrixColItemMap, userMatrix, itemMatrix, m_usersMatrix.size(),
            m_itemsMatrix.size(), ALPHA, m_matrixRank, m_maxIterations, counterMap, m_skipCount,
            peer.getNumPeers(), peer.getPeerIndex(), peer.getAllPeerNames());

    Context context = rootbeer.createDefaultContext();
    Stopwatch watch = new Stopwatch();
    watch.start();
    rootbeer.run(kernel, new ThreadConfig(m_blockSize, m_gridSize, m_blockSize * m_gridSize), context);
    watch.stop();

    // **********************************************************************
    // Save Model
    // **********************************************************************
    // save users
    for (Entry<Long, Integer> userMap : userItemMatrixUserRowMap.entrySet()) {
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("user: " + userMap.getKey() + " vector: "
                    + Arrays.toString(kernel.m_usersMatrix[userMap.getValue()]) + "\n");
        }
        peer.write(new Text("u" + userMap.getKey()),
                new PipesVectorWritable(new DenseDoubleVector(kernel.m_usersMatrix[userMap.getValue()])));
    }
    // TODO duplicated item saves, but one item may belong to one task only
    // save items
    for (GpuIntIntPair itemMap : userItemMatrixItemColMap.getList()) {
        if (itemMap != null) {
            if (m_isDebuggingEnabled) {
                m_logger.writeChars("item: " + itemMap.getKey() + " vector: "
                        + Arrays.toString(kernel.m_itemsMatrix[itemMap.getValue()]) + "\n");
            }
            peer.write(new Text("i" + itemMap.getKey()),
                    new PipesVectorWritable(new DenseDoubleVector(kernel.m_itemsMatrix[itemMap.getValue()])));
        }
    }

    this.m_bspTimeGpu = System.currentTimeMillis() - startTime;

    // **********************************************************************
    // Logging
    // **********************************************************************
    if (m_isDebuggingEnabled) {
        m_logger.writeChars("OnlineCFTrainHybridBSP.bspGpu executed on GPU!\n");
        m_logger.writeChars(
                "OnlineCFTrainHybridBSP.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize + "\n");
        m_logger.writeChars("OnlineCFTrainHybridBSP,setupTimeGpu=" + this.m_setupTimeGpu + " ms\n");
        m_logger.writeChars(
                "OnlineCFTrainHybridBSP,setupTimeGpu=" + (this.m_setupTimeGpu / 1000.0) + " seconds\n");
        m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeGpu=" + this.m_bspTimeGpu + " ms\n");
        m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeGpu=" + (this.m_bspTimeGpu / 1000.0) + " seconds\n");

        List<StatsRow> stats = context.getStats();
        for (StatsRow row : stats) {
            m_logger.writeChars("  StatsRow:\n");
            m_logger.writeChars("    serial time: " + row.getSerializationTime() + "\n");
            m_logger.writeChars("    exec time: " + row.getExecutionTime() + "\n");
            m_logger.writeChars("    deserial time: " + row.getDeserializationTime() + "\n");
            m_logger.writeChars("    num blocks: " + row.getNumBlocks() + "\n");
            m_logger.writeChars("    num threads: " + row.getNumThreads() + "\n");
            m_logger.writeChars("GPUTime: " + watch.elapsedTimeMillis() + " ms" + "\n");
        }

        m_logger.close();
    }

    // Logging
    List<StatsRow> stats = context.getStats();
    for (StatsRow row : stats) {
        LOG.info("  StatsRow:");
        LOG.info("    serial time: " + row.getSerializationTime());
        LOG.info("    exec time: " + row.getExecutionTime());
        LOG.info("    deserial time: " + row.getDeserializationTime());
        LOG.info("    num blocks: " + row.getNumBlocks());
        LOG.info("    num threads: " + row.getNumThreads());
        LOG.info("GPUTime: " + watch.elapsedTimeMillis() + " ms");
    }
    LOG.info("OnlineCFTrainHybridBSP.bspGpu executed on GPU!");
    LOG.info("OnlineCFTrainHybridBSP.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize);
    LOG.info("OnlineCFTrainHybridBSP,setupTimeGpu=" + this.m_setupTimeGpu + " ms");
    LOG.info("OnlineCFTrainHybridBSP,setupTimeGpu=" + (this.m_setupTimeGpu / 1000.0) + " seconds");
    LOG.info("OnlineCFTrainHybridBSP,bspTimeGpu=" + this.m_bspTimeGpu + " ms");
    LOG.info("OnlineCFTrainHybridBSP,bspTimeGpu=" + (this.m_bspTimeGpu / 1000.0) + " seconds");

}

From source file:at.illecker.hama.hybrid.examples.piestimator.PiEstimatorHybridBSP.java

License:Apache License

@Override
public void cleanup(BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, LongWritable> peer)
        throws IOException {

    long startTime = 0;
    if (m_timeMeasurement) {
        startTime = System.currentTimeMillis();
    }/*from   w ww  .  j av  a2 s .  c o  m*/

    // MasterTask writes out results
    if (peer.getPeerName().equals(m_masterTask)) {
        long totalHits = 0;
        LongWritable received;
        while ((received = peer.getCurrentMessage()) != null) {
            totalHits += received.get();
        }

        double pi = 4.0 * totalHits / m_iterations;

        // DEBUG
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("PiEstimatorHybrid,Iterations=" + m_iterations + "\n");
            m_logger.writeChars("PiEstimatorHybrid,numMessages: " + peer.getNumCurrentMessages() + "\n");
            m_logger.writeChars("PiEstimatorHybrid,totalHits: " + totalHits + "\n");
        }

        peer.write(new Text("Estimated value of PI(3,14159265) using " + m_iterations + " iterations is"),
                new DoubleWritable(pi));
    }

    long stopTime = 0;
    if (m_timeMeasurement) {
        stopTime = System.currentTimeMillis();
        LOG.info("# cleanupTime: " + ((stopTime - startTime) / 1000.0) + " sec");
    }

    if (m_isDebuggingEnabled) {
        m_logger.writeChars("PiEstimatorHybrid,cleanupTime: " + ((stopTime - startTime) / 1000.0) + " sec\n");
        m_logger.close();
    }
}

From source file:at.illecker.hama.rootbeer.examples.hellorootbeer.HelloRootbeerGpuBSP.java

License:Apache License

@Override
public void cleanup(BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, DoubleWritable> peer)
        throws IOException {

    if (peer.getPeerName().equals(m_masterTask)) {

        double sum = 0.0;

        DoubleWritable received;/*  www  .  ja v  a2  s  .com*/
        while ((received = peer.getCurrentMessage()) != null) {
            sum += received.get();
        }

        double expectedResult = peer.getNumPeers() * m_kernelCount * m_iterations;
        Assert.assertEquals(expectedResult, sum);

        peer.write(new Text(
                "Result of " + (peer.getNumPeers() * m_kernelCount * m_iterations) + " calculations is"),
                new DoubleWritable(sum));
    }
}

From source file:at.illecker.hama.rootbeer.examples.piestimator.cpu.PiEstimatorCpuBSP.java

License:Apache License

@Override
public void cleanup(BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, LongWritable> peer)
        throws IOException {

    // MasterTask writes out results
    if (peer.getPeerName().equals(m_masterTask)) {

        int numMessages = peer.getNumCurrentMessages();

        long totalHits = 0;
        LongWritable received;/*ww w.  j  a  v  a  2  s  .c  o  m*/
        while ((received = peer.getCurrentMessage()) != null) {
            totalHits += received.get();
        }

        double pi = 4.0 * totalHits / (m_calculationsPerBspTask * numMessages);

        // DEBUG
        if (m_isDebuggingEnabled) {
            // Write log to dfs
            BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration());
            FileSystem fs = FileSystem.get(peer.getConfiguration());
            FSDataOutputStream outStream = fs
                    .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log"));

            outStream.writeChars("BSP=PiEstimatorCpuBSP,Iterations=" + m_iterations + "\n");

            outStream.writeChars("totalHits: " + totalHits + "\n");
            outStream.writeChars("numMessages: " + numMessages + "\n");
            outStream.writeChars("calculationsPerBspTask: " + m_calculationsPerBspTask + "\n");
            outStream.writeChars("calculationsTotal: " + (m_calculationsPerBspTask * numMessages) + "\n");
            outStream.close();
        }

        peer.write(
                new Text("Estimated value of PI(3,14159265) using " + (m_calculationsPerBspTask * numMessages)
                // + (peer.getNumPeers() * m_threadCount * m_iterations)
                        + " points is"),
                new DoubleWritable(pi));
    }
}

From source file:at.illecker.hama.rootbeer.examples.piestimator.gpu.PiEstimatorGpuBSP.java

License:Apache License

@Override
public void cleanup(BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, LongWritable> peer)
        throws IOException {

    if (peer.getPeerName().equals(m_masterTask)) {

        long totalHits = 0;
        LongWritable received;/*from  w ww  . j  a  va 2  s  .co  m*/
        while ((received = peer.getCurrentMessage()) != null) {
            totalHits += received.get();
        }

        double pi = 4.0 * totalHits / (m_calculationsPerThread * m_blockSize * m_gridSize);

        peer.write(
                new Text("Estimated value of PI(3,14159265) using "
                        + (m_calculationsPerThread * m_blockSize * m_gridSize) + " points is"),
                new DoubleWritable(pi));
    }
}

From source file:authordetect.input.SingleBookReader.java

private void processBookContent() throws IOException {

    currentPos += lineReader.readLine(currentLine);
    String currentLineStr = currentLine.toString().toLowerCase();

    //Processing book content line by line. And update the word map
    while (!isFinish) {
        String[] words = currentLineStr.split(" ");
        //write all words into the word map
        for (String word : words) {
            word = word.trim().replaceAll("[^a-zA-Z0-9]", "").toLowerCase();
            if (!word.equals("")) {
                wordCountMap.put(word, 1);
            }//from  ww  w  .  j  ava 2 s.c  om
        }
        //detect book end
        if (currentLineStr.contains("end") && currentLineStr.contains("gutenberg")) {
            isFinish = true;

            //update counter which stores the book count
            Counter counter = context.getCounter(BookCounter.BOOK_COUNT);
            counter.increment(1);
        }
        currentPos += lineReader.readLine(currentLine);
        currentLineStr = currentLine.toString().toLowerCase();
    }

    //convert word map to text array
    int arrayLen = wordCountMap.entrySet().size();
    Iterator<Map.Entry<String, Integer>> iterator = wordCountMap.entrySet().iterator();
    int maxCount = 0, count;
    String word, wordCount;
    Text[] wordArray = new Text[arrayLen];

    for (int i = 0; i < arrayLen; i++) {
        Map.Entry<String, Integer> entry = iterator.next();
        word = entry.getKey();
        count = entry.getValue();
        wordCount = word + "/" + count;
        wordArray[i] = new Text(wordCount);

        if (count > maxCount) {//get the maximum word count as well
            maxCount = count;
        }
    }

    key = new Text(title + "/" + maxCount);
    value = new TextArrayWritable(wordArray);
}

From source file:average.AverageMapper.java

@Override
public void map(LongWritable _key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String TempString = value.toString();
    String[] array = TempString.split(",");
    String key = new String("");
    key = array[0];/*w w w. j  a va 2 s . c o  m*/
    output.collect(new Text(key), new Text(array[1]));
}

From source file:average.AverageReducer.java

@Override
public void reduce(Text _key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    Text key = _key;//from w  w  w. ja v  a 2 s .  c o  m

    if (key.toString().equals("0Student_Id") != true) {
        int frequencyForYear = 0;
        int f = 0;
        double av = 0, c = 0.0;
        String ans = "", t = "";

        while (values.hasNext()) {

            Text value = (Text) values.next();
            t = value.toString();

            frequencyForYear += Integer.parseInt(t);
            c++;
            // process value
        }

        av = frequencyForYear / c;
        ans = Double.toString(av);
        output.collect(key, new Text(ans));
    } else {
        output.collect(key, new Text("Average"));
    }

}

From source file:averagetemperature.AverageTemperatureMapper.java

@Override
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

    String[] line = value.toString().split(",");
    String datePart = line[1];//from   w w w  . jav a2  s  .  c o m
    String temp = line[10];

    if (StringUtils.isNumeric(temp))
        try {
            output.collect(new Text(datePart), new IntWritable(Integer.parseInt(temp)));
        } catch (NumberFormatException e) {
        }
    ;

}