Example usage for org.apache.hadoop.io IntWritable set

List of usage examples for org.apache.hadoop.io IntWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable set.

Prototype

public void set(int value) 

Source Link

Document

Set the value of this IntWritable.

Usage

From source file:edu.ub.ahstfg.io.document.ParsedDocument.java

License:Open Source License

/**
 * Adds a new term. If the term exists increases their frequency.
 * @param term Term to add.//from w w w  . ja  v  a2  s.co m
 */
public void addTerm(Text term) {
    if (terms.containsKey(term)) {
        IntWritable n = (IntWritable) terms.get(term);
        n.set(n.get() + 1);
    } else {
        terms.put(term, ONE());
    }
}

From source file:edu.ub.ahstfg.io.document.ParsedDocument.java

License:Open Source License

/**
 * Adds a new keyword. If the keyword exists increases their frequency.
 * @param term Keyword to add.// w  w  w.ja v  a 2 s  .c o  m
 */
public void addKeyword(Text keyword) {
    if (keywords.containsKey(keyword)) {
        IntWritable n = (IntWritable) keywords.get(keyword);
        n.set(n.get() + 1);
    } else {
        keywords.put(keyword, ONE());
    }
}

From source file:edu.uci.ics.asterix.external.indexing.dataflow.IndexingScheduler.java

License:Apache License

/**
 * Scan the splits once and build a popularity map
 * /*  w  ww .  j  a va  2 s .  c o m*/
 * @param splits
 *            the split array
 * @param locationToNumOfSplits
 *            the map to be built
 * @throws IOException
 */
private void buildPopularityMap(InputSplit[] splits, Map<String, IntWritable> locationToNumOfSplits)
        throws IOException {
    for (InputSplit split : splits) {
        String[] locations = split.getLocations();
        for (String loc : locations) {
            IntWritable locCount = locationToNumOfSplits.get(loc);
            if (locCount == null) {
                locCount = new IntWritable(0);
                locationToNumOfSplits.put(loc, locCount);
            }
            locCount.set(locCount.get() + 1);
        }
    }
}

From source file:edu.uci.ics.hyracks.hdfs.scheduler.IPProximityNcCollectionBuilder.java

License:Apache License

@Override
public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos,
        final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs,
        final int[] workloads, final int slotLimit) {
    final TreeMap<BytesWritable, IntWritable> availableIpsToSlots = new TreeMap<BytesWritable, IntWritable>();
    for (int i = 0; i < workloads.length; i++) {
        if (workloads[i] < slotLimit) {
            byte[] rawip;
            try {
                rawip = ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress();
            } catch (UnknownHostException e) {
                // QQQ Should probably have a neater solution than this
                throw new RuntimeException(e);
            }//  w  w w.j a v a 2  s  .c om
            BytesWritable ip = new BytesWritable(rawip);
            IntWritable availableSlot = availableIpsToSlots.get(ip);
            if (availableSlot == null) {
                availableSlot = new IntWritable(slotLimit - workloads[i]);
                availableIpsToSlots.put(ip, availableSlot);
            } else {
                availableSlot.set(slotLimit - workloads[i] + availableSlot.get());
            }
        }
    }
    return new INcCollection() {

        @Override
        public String findNearestAvailableSlot(InputSplit split) {
            try {
                String[] locs = split.getLocations();
                int minDistance = Integer.MAX_VALUE;
                BytesWritable currentCandidateIp = null;
                if (locs == null || locs.length > 0) {
                    for (int j = 0; j < locs.length; j++) {
                        /**
                         * get all the IP addresses from the name
                         */
                        InetAddress[] allIps = InetAddress.getAllByName(locs[j]);
                        for (InetAddress ip : allIps) {
                            BytesWritable splitIp = new BytesWritable(ip.getAddress());
                            /**
                             * if the node controller exists
                             */
                            BytesWritable candidateNcIp = availableIpsToSlots.floorKey(splitIp);
                            if (candidateNcIp == null) {
                                candidateNcIp = availableIpsToSlots.ceilingKey(splitIp);
                            }
                            if (candidateNcIp != null) {
                                if (availableIpsToSlots.get(candidateNcIp).get() > 0) {
                                    byte[] candidateIP = candidateNcIp.getBytes();
                                    byte[] splitIP = splitIp.getBytes();
                                    int candidateInt = candidateIP[0] << 24 | (candidateIP[1] & 0xFF) << 16
                                            | (candidateIP[2] & 0xFF) << 8 | (candidateIP[3] & 0xFF);
                                    int splitInt = splitIP[0] << 24 | (splitIP[1] & 0xFF) << 16
                                            | (splitIP[2] & 0xFF) << 8 | (splitIP[3] & 0xFF);
                                    int distance = Math.abs(candidateInt - splitInt);
                                    if (minDistance > distance) {
                                        minDistance = distance;
                                        currentCandidateIp = candidateNcIp;
                                    }
                                }
                            }
                        }
                    }
                } else {
                    for (Entry<BytesWritable, IntWritable> entry : availableIpsToSlots.entrySet()) {
                        if (entry.getValue().get() > 0) {
                            currentCandidateIp = entry.getKey();
                            break;
                        }
                    }
                }

                if (currentCandidateIp != null) {
                    /**
                     * Update the entry of the selected IP
                     */
                    IntWritable availableSlot = availableIpsToSlots.get(currentCandidateIp);
                    availableSlot.set(availableSlot.get() - 1);
                    if (availableSlot.get() == 0) {
                        availableIpsToSlots.remove(currentCandidateIp);
                    }
                    /**
                     * Update the entry of the selected NC
                     */
                    List<String> dataLocations = ipToNcMapping
                            .get(InetAddress.getByAddress(currentCandidateIp.getBytes()).getHostAddress());
                    for (String nc : dataLocations) {
                        int ncIndex = ncNameToIndex.get(nc);
                        if (workloads[ncIndex] < slotLimit) {
                            return nc;
                        }
                    }
                }
                /** not scheduled */
                return null;
            } catch (Exception e) {
                throw new IllegalStateException(e);
            }
        }

        @Override
        public int numAvailableSlots() {
            return availableIpsToSlots.size();
        }

    };
}

From source file:edu.uci.ics.hyracks.hdfs.scheduler.RackAwareNcCollectionBuilder.java

License:Apache License

@Override
public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos,
        final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs,
        final int[] workloads, final int slotLimit) {
    try {//from w  w  w.  j  av a  2s.c o m
        final Map<List<Integer>, List<String>> pathToNCs = new HashMap<List<Integer>, List<String>>();
        for (int i = 0; i < NCs.length; i++) {
            List<Integer> path = new ArrayList<Integer>();
            String ipAddress = InetAddress
                    .getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress())
                    .getHostAddress();
            topology.lookupNetworkTerminal(ipAddress, path);
            if (path.size() <= 0) {
                // if the hyracks nc is not in the defined cluster
                path.add(Integer.MIN_VALUE);
                LOGGER.info(NCs[i] + "'s IP address is not in the cluster toplogy file!");
            }
            List<String> ncs = pathToNCs.get(path);
            if (ncs == null) {
                ncs = new ArrayList<String>();
                pathToNCs.put(path, ncs);
            }
            ncs.add(NCs[i]);
        }

        final TreeMap<List<Integer>, IntWritable> availableIpsToSlots = new TreeMap<List<Integer>, IntWritable>(
                new Comparator<List<Integer>>() {

                    @Override
                    public int compare(List<Integer> l1, List<Integer> l2) {
                        int commonLength = Math.min(l1.size(), l2.size());
                        for (int i = 0; i < commonLength; i++) {
                            Integer value1 = l1.get(i);
                            Integer value2 = l2.get(i);
                            int cmp = value1 > value2 ? 1 : (value1 < value2 ? -1 : 0);
                            if (cmp != 0) {
                                return cmp;
                            }
                        }
                        return l1.size() > l2.size() ? 1 : (l1.size() < l2.size() ? -1 : 0);
                    }

                });
        for (int i = 0; i < workloads.length; i++) {
            if (workloads[i] < slotLimit) {
                List<Integer> path = new ArrayList<Integer>();
                String ipAddress = InetAddress
                        .getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress())
                        .getHostAddress();
                topology.lookupNetworkTerminal(ipAddress, path);
                if (path.size() <= 0) {
                    // if the hyracks nc is not in the defined cluster
                    path.add(Integer.MIN_VALUE);
                }
                IntWritable availableSlot = availableIpsToSlots.get(path);
                if (availableSlot == null) {
                    availableSlot = new IntWritable(slotLimit - workloads[i]);
                    availableIpsToSlots.put(path, availableSlot);
                } else {
                    availableSlot.set(slotLimit - workloads[i] + availableSlot.get());
                }
            }
        }
        return new INcCollection() {

            @Override
            public String findNearestAvailableSlot(InputSplit split) {
                try {
                    String[] locs = split.getLocations();
                    int minDistance = Integer.MAX_VALUE;
                    List<Integer> currentCandidatePath = null;
                    if (locs == null || locs.length > 0) {
                        for (int j = 0; j < locs.length; j++) {
                            /**
                             * get all the IP addresses from the name
                             */
                            InetAddress[] allIps = InetAddress.getAllByName(locs[j]);
                            boolean inTopology = false;
                            for (InetAddress ip : allIps) {
                                List<Integer> splitPath = new ArrayList<Integer>();
                                boolean inCluster = topology.lookupNetworkTerminal(ip.getHostAddress(),
                                        splitPath);
                                if (!inCluster) {
                                    continue;
                                }
                                inTopology = true;
                                /**
                                 * if the node controller exists
                                 */
                                List<Integer> candidatePath = availableIpsToSlots.floorKey(splitPath);
                                if (candidatePath == null) {
                                    candidatePath = availableIpsToSlots.ceilingKey(splitPath);
                                }
                                if (candidatePath != null) {
                                    if (availableIpsToSlots.get(candidatePath).get() > 0) {
                                        int distance = distance(splitPath, candidatePath);
                                        if (minDistance > distance) {
                                            minDistance = distance;
                                            currentCandidatePath = candidatePath;
                                        }
                                    }

                                }
                            }

                            if (!inTopology) {
                                LOGGER.info(locs[j] + "'s IP address is not in the cluster toplogy file!");
                                /**
                                 * if the machine is not in the toplogy file
                                 */
                                List<Integer> candidatePath = null;
                                for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) {
                                    if (entry.getValue().get() > 0) {
                                        candidatePath = entry.getKey();
                                        break;
                                    }
                                }
                                /** the split path is empty */
                                if (candidatePath != null) {
                                    if (availableIpsToSlots.get(candidatePath).get() > 0) {
                                        currentCandidatePath = candidatePath;
                                    }
                                }
                            }
                        }
                    } else {
                        for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) {
                            if (entry.getValue().get() > 0) {
                                currentCandidatePath = entry.getKey();
                                break;
                            }
                        }
                    }

                    if (currentCandidatePath != null && currentCandidatePath.size() > 0) {
                        /**
                         * Update the entry of the selected IP
                         */
                        IntWritable availableSlot = availableIpsToSlots.get(currentCandidatePath);
                        availableSlot.set(availableSlot.get() - 1);
                        if (availableSlot.get() == 0) {
                            availableIpsToSlots.remove(currentCandidatePath);
                        }
                        /**
                         * Update the entry of the selected NC
                         */
                        List<String> candidateNcs = pathToNCs.get(currentCandidatePath);
                        for (String candidate : candidateNcs) {
                            int ncIndex = ncNameToIndex.get(candidate);
                            if (workloads[ncIndex] < slotLimit) {
                                return candidate;
                            }
                        }
                    }
                    /** not scheduled */
                    return null;
                } catch (Exception e) {
                    throw new IllegalStateException(e);
                }
            }

            @Override
            public int numAvailableSlots() {
                return availableIpsToSlots.size();
            }

            private int distance(List<Integer> splitPath, List<Integer> candidatePath) {
                int commonLength = Math.min(splitPath.size(), candidatePath.size());
                int distance = 0;
                for (int i = 0; i < commonLength; i++) {
                    distance = distance * 100 + Math.abs(splitPath.get(i) - candidatePath.get(i));
                }
                List<Integer> restElements = splitPath.size() > candidatePath.size() ? splitPath
                        : candidatePath;
                for (int i = commonLength; i < restElements.size(); i++) {
                    distance = distance * 100 + Math.abs(restElements.get(i));
                }
                return distance;
            }
        };
    } catch (Exception e) {
        throw new IllegalStateException(e);
    }
}

From source file:edu.uci.ics.pregelix.core.optimizer.DynamicOptimizer.java

License:Apache License

/**
 * initialize the load-per-machine map//from  w ww . j  a v  a2s . co  m
 * 
 * @return the degree of parallelism
 * @throws HyracksException
 */
private int initializeLoadPerMachine() throws HyracksException {
    machineToDegreeOfParallelism.clear();
    String[] locationConstraints = ClusterConfig.getLocationConstraint();
    for (String loc : locationConstraints) {
        machineToDegreeOfParallelism.put(loc, new IntWritable(0));
    }
    dop = 0;
    for (Entry<String, IntWritable> entry : machineToDegreeOfParallelism.entrySet()) {
        String loc = entry.getKey();
        //reserve one core for heartbeat
        int load = (int) counterContext.getCounter(Counters.NUM_PROCESSOR, false).get();
        //load = load > 3 ? load - 2 : load;
        IntWritable count = machineToDegreeOfParallelism.get(loc);
        count.set(load);
        dop += load;
    }
    return dop;
}

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweepFilterOnly(final List<S1> R,
        final List<S2> S, final ResultCollector2<S1, S2> output, Reporter reporter) throws IOException {

    LOG.debug("Start spatial join plan sweep algorithm !!!");

    final RectangleID[] Rmbrs = new RectangleID[R.size()];
    for (int i = 0; i < R.size(); i++) {
        Rmbrs[i] = new RectangleID(i, R.get(i).getMBR());
    }/* w w w  .j  av  a2 s  . c om*/
    final RectangleID[] Smbrs = new RectangleID[S.size()];
    for (int i = 0; i < S.size(); i++) {
        Smbrs[i] = new RectangleID(i, S.get(i).getMBR());
    }

    final IntWritable count = new IntWritable();
    int filterCount = SpatialJoin_rectangles(Rmbrs, Smbrs, new OutputCollector<RectangleID, RectangleID>() {
        @Override
        public void collect(RectangleID r1, RectangleID r2) throws IOException {
            //if (R.get(r1.id).isIntersected(S.get(r2.id))) {
            if (output != null)
                output.collect(R.get(r1.id), S.get(r2.id));
            count.set(count.get() + 1);
            //}
        }
    }, reporter);

    LOG.debug("Filtered result size " + filterCount + ", refined result size " + count.get());

    return count.get();
}

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

/**
 * The general version of self join algorithm which works with arbitrary
 * shapes. First, it performs a filter step where it finds shapes with
 * overlapping MBRs. Second, an optional refine step can be executed to
 * return only shapes which actually overlap.
 * @param R - input set of shapes//from w w  w . ja  v  a 2 s.  co  m
 * @param refine - Whether or not to run a refine step
 * @param output - output collector where the results are reported
 * @return - number of pairs returned by the planesweep algorithm
 * @throws IOException
 */
public static <S extends Shape> int SelfJoin_planeSweep(final S[] R, boolean refine,
        final OutputCollector<S, S> output, Progressable reporter) throws IOException {
    // Use a two-phase filter and refine approach
    // 1- Use MBRs as a first filter
    // 2- Use ConvexHull as a second filter
    // 3- Use the exact shape for refinement
    final RectangleID[] mbrs = new RectangleID[R.length];
    for (int i = 0; i < R.length; i++) {
        mbrs[i] = new RectangleID(i, R[i].getMBR());
    }

    if (refine) {
        final IntWritable count = new IntWritable();
        int filterCount = SelfJoin_rectangles(mbrs, new OutputCollector<RectangleID, RectangleID>() {
            @Override
            public void collect(RectangleID r1, RectangleID r2) throws IOException {
                if (R[r1.id].isIntersected(R[r2.id])) {
                    if (output != null)
                        output.collect(R[r1.id], R[r2.id]);
                    count.set(count.get() + 1);
                }
            }
        }, reporter);

        LOG.debug("Filtered result size " + filterCount + ", refined result size " + count.get());

        return count.get();
    } else {
        return SelfJoin_rectangles(mbrs, new OutputCollector<RectangleID, RectangleID>() {
            @Override
            public void collect(RectangleID r1, RectangleID r2) throws IOException {
                if (output != null)
                    output.collect(R[r1.id], R[r2.id]);
            }
        }, reporter);
    }
}

From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java

License:Open Source License

private static void indexLocal(Path inPath, final Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    Job job = Job.getInstance(params);//from w ww .j a v a2s. c o  m
    final Configuration conf = job.getConfiguration();

    final String sindex = conf.get("sindex");

    // Start reading input file
    List<InputSplit> splits = new ArrayList<InputSplit>();
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    FileSystem inFs = inPath.getFileSystem(conf);
    FileStatus inFStatus = inFs.getFileStatus(inPath);
    if (inFStatus != null && !inFStatus.isDir()) {
        // One file, retrieve it immediately.
        // This is useful if the input is a hidden file which is automatically
        // skipped by FileInputFormat. We need to plot a hidden file for the case
        // of plotting partition boundaries of a spatial index
        splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0]));
    } else {
        SpatialInputFormat3.setInputPaths(job, inPath);
        for (InputSplit s : inputFormat.getSplits(job))
            splits.add(s);
    }

    // Copy splits to a final array to be used in parallel
    final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]);
    boolean replicate = PartitionerReplicate.get(sindex);

    // Set input file MBR if not already set
    Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr");
    if (inputMBR == null) {
        inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf));
        OperationsParams.setShape(conf, "mbr", inputMBR);
    }

    setLocalIndexer(conf, sindex);
    final Partitioner partitioner = createPartitioner(inPath, outPath, conf, sindex);

    final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex,
            outPath, conf);
    for (FileSplit fsplit : fsplits) {
        RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
        if (reader instanceof SpatialRecordReader3) {
            ((SpatialRecordReader3) reader).initialize(fsplit, conf);
        } else if (reader instanceof RTreeRecordReader3) {
            ((RTreeRecordReader3) reader).initialize(fsplit, conf);
        } else if (reader instanceof HDFRecordReader) {
            ((HDFRecordReader) reader).initialize(fsplit, conf);
        } else {
            throw new RuntimeException("Unknown record reader");
        }

        final IntWritable partitionID = new IntWritable();

        while (reader.nextKeyValue()) {
            Iterable<Shape> shapes = reader.getCurrentValue();
            if (replicate) {
                for (final Shape s : shapes) {
                    partitioner.overlapPartitions(s, new ResultCollector<Integer>() {
                        @Override
                        public void collect(Integer id) {
                            partitionID.set(id);
                            try {
                                recordWriter.write(partitionID, s);
                            } catch (IOException e) {
                                throw new RuntimeException(e);
                            }
                        }
                    });
                }
            } else {
                for (final Shape s : shapes) {
                    int pid = partitioner.overlapPartition(s);
                    if (pid != -1) {
                        partitionID.set(pid);
                        recordWriter.write(partitionID, s);
                    }
                }
            }
        }
        reader.close();
    }
    recordWriter.close(null);

    // Write the WKT formatted master file
    Path masterPath = new Path(outPath, "_master." + sindex);
    FileSystem outFs = outPath.getFileSystem(params);
    Path wktPath = new Path(outPath, "_" + sindex + ".wkt");
    PrintStream wktOut = new PrintStream(outFs.create(wktPath));
    wktOut.println("ID\tBoundaries\tRecord Count\tSize\tFile name");
    Text tempLine = new Text2();
    Partition tempPartition = new Partition();
    LineReader in = new LineReader(outFs.open(masterPath));
    while (in.readLine(tempLine) > 0) {
        tempPartition.fromText(tempLine);
        wktOut.println(tempPartition.toWKT());
    }
    in.close();
    wktOut.close();
}

From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java

License:Open Source License

private static void indexLocal(Path inPath, Path outPath, OperationsParams params) throws IOException {
    JobConf job = new JobConf(params);
    String sindex = params.get("sindex");
    Partitioner partitioner = createPartitioner(inPath, outPath, job, sindex);

    // Start reading input file
    Vector<InputSplit> splits = new Vector<InputSplit>();
    final ShapeIterInputFormat inputFormat = new ShapeIterInputFormat();
    FileSystem inFs = inPath.getFileSystem(params);
    FileStatus inFStatus = inFs.getFileStatus(inPath);
    if (inFStatus != null && !inFStatus.isDir()) {
        // One file, retrieve it immediately.
        // This is useful if the input is a hidden file which is automatically
        // skipped by FileInputFormat. We need to plot a hidden file for the case
        // of plotting partition boundaries of a spatial index
        splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0]));
    } else {/*w  ww .jav  a2s  .  c  om*/
        ShapeIterInputFormat.addInputPath(job, inPath);
        for (InputSplit s : inputFormat.getSplits(job, 1))
            splits.add(s);
    }

    // Copy splits to a final array to be used in parallel
    final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]);
    boolean replicate = job.getBoolean("replicate", false);

    final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex,
            outPath, params);

    for (FileSplit fsplit : fsplits) {
        RecordReader<Rectangle, Iterable<? extends Shape>> reader = inputFormat.getRecordReader(fsplit, job,
                null);
        Rectangle partitionMBR = reader.createKey();
        Iterable<? extends Shape> shapes = reader.createValue();

        final IntWritable partitionID = new IntWritable();

        while (reader.next(partitionMBR, shapes)) {
            if (replicate) {
                // Replicate each shape to all overlapping partitions
                for (final Shape s : shapes) {
                    partitioner.overlapPartitions(s, new ResultCollector<Integer>() {
                        @Override
                        public void collect(Integer id) {
                            partitionID.set(id);
                            try {
                                recordWriter.write(partitionID, s);
                            } catch (IOException e) {
                                throw new RuntimeException(e);
                            }
                        }
                    });
                }
            } else {
                for (Shape s : shapes) {
                    partitionID.set(partitioner.overlapPartition(s));
                    recordWriter.write(partitionID, s);
                }
            }
        }
        reader.close();
    }

    recordWriter.close(null);
}