Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:org.apache.giraph.hive.input.edge.examples.HiveIntNullEdge.java

License:Apache License

@Override
public NullWritable getEdgeValue(HiveReadableRecord hiveRecord) {
    return NullWritable.get();
}

From source file:org.apache.giraph.hive.input.vertex.examples.HiveIntNullNullVertex.java

License:Apache License

@Override
public NullWritable getVertexValue(HiveReadableRecord record) {
    return NullWritable.get();
}

From source file:org.apache.giraph.hive.output.HiveVertexWriter.java

License:Apache License

@Override
public void save(HiveWritableRecord record) throws IOException, InterruptedException {
    hiveRecordWriter.write(NullWritable.get(), record);
}

From source file:org.apache.giraph.partition.VertexTypeStore.java

License:Apache License

/**
 * For every vertex, send message to neighbours that are not
 * on the same partition. Required for directed graphs.
 *
 * This ensures that every vertex is categorized based on
 * both their in-edge and out-edge neighbours.
 *//*from w  w  w  .  ja v a2s.com*/
public void sendDependencies() {
    // when to flush cache/send message to particular worker
    int maxMessagesSizePerWorker = GiraphConfiguration.MAX_MSG_REQUEST_SIZE.get(conf);

    // cache for messages that will be sent to neighbours
    // (much more efficient than using SendDataCache b/c
    //  we don't need to store/know dst partition ids)
    Int2ObjectOpenHashMap<VertexIdData<I, NullWritable>> msgCache = new Int2ObjectOpenHashMap<VertexIdData<I, NullWritable>>();

    int taskId = serviceWorker.getWorkerInfo().getTaskId();
    PartitionStore<I, V, E> pStore = serviceWorker.getPartitionStore();

    // don't need to synchronize, b/c this is all single threaded
    for (int partitionId : pStore.getPartitionIds()) {
        for (Vertex<I, V, E> vertex : pStore.getOrCreatePartition(partitionId)) {
            for (Edge<I, E> e : vertex.getEdges()) {
                PartitionOwner dstOwner = serviceWorker.getVertexPartitionOwner(e.getTargetVertexId());
                int dstPartitionId = dstOwner.getPartitionId();
                int dstTaskId = dstOwner.getWorkerInfo().getTaskId();

                if (dstPartitionId == partitionId) {
                    continue; // skip, no dependency
                } else if (taskId == dstTaskId) {
                    // local dependency
                    receiveDependency(e.getTargetVertexId(), true);
                } else {
                    // remote dependency
                    // We may send redundant dependency messages, but
                    // it's faster to send it than to prune it.
                    VertexIdData<I, NullWritable> messages = msgCache.get(dstTaskId);
                    if (messages == null) {
                        messages = new ByteArrayVertexIdNullData<I>();
                        messages.setConf(conf);
                        messages.initialize();
                        msgCache.put(dstTaskId, messages);
                    }

                    // no data---messages are vertex id only
                    messages.add(e.getTargetVertexId(), NullWritable.get());

                    if (messages.getSize() > maxMessagesSizePerWorker) {
                        msgCache.remove(dstTaskId);
                        serviceWorker.getWorkerClient().sendWritableRequest(dstTaskId,
                                new SendTokenDepRequest(messages));
                    }
                }
            }
        }
    }

    // send remaining messages
    for (int dstTaskId : msgCache.keySet()) {
        // no need to remove, map will be trashed entirely
        VertexIdData<I, NullWritable> messages = msgCache.get(dstTaskId);
        serviceWorker.getWorkerClient().sendWritableRequest(dstTaskId, new SendTokenDepRequest(messages));
    }

    // flush network
    serviceWorker.getWorkerClient().waitAllRequests();
}

From source file:org.apache.giraph.ranking.LinkRank.LinkRankComputation.java

License:Apache License

/**
 * Removes duplicate outgoing links./*  w w w.j a  v  a2s .c o  m*/
 *
 * @param vertex vertex whose duplicate outgoing edges
 *               will be removed.
 */
public void removeDuplicateLinks(Vertex<Text, DoubleWritable, NullWritable> vertex) {
    String sourceUrl = vertex.getId().toString().trim();
    String targetUrl;
    Set<String> urls = new HashSet<String>();

    Iterable<Edge<Text, NullWritable>> outgoingEdges = vertex.getEdges();

    for (Edge<Text, NullWritable> edge : outgoingEdges) {
        targetUrl = edge.getTargetVertexId().toString().trim().split("#")[0];
        // if source != target (avoid self-links)
        if (!targetUrl.equalsIgnoreCase(sourceUrl)) {
            urls.add(targetUrl);
        }
    }

    ArrayList<Edge<Text, NullWritable>> newEdges = new ArrayList<Edge<Text, NullWritable>>();
    for (final String url : urls) {
        newEdges.add(new Edge<Text, NullWritable>() {
            @Override
            public Text getTargetVertexId() {
                return new Text(url);
            }

            @Override
            public NullWritable getValue() {
                return NullWritable.get();
            }
        });
    }

    if (newEdges.size() > 0) {
        vertex.setEdges(newEdges);
    }
}

From source file:org.apache.giraph.utils.ByteArrayVertexIdNullData.java

License:Apache License

@Override
public NullWritable createData() {
    return NullWritable.get();
}

From source file:org.apache.giraph.utils.TestTestGraph.java

License:Apache License

public static void addVertex(TestGraph<LongWritable, LongWritable, NullWritable> graph, long id, long value,
        long... neighbors) {
    Map.Entry<LongWritable, NullWritable> edges[] = new Map.Entry[neighbors.length];
    for (int i = 0; i < neighbors.length; i++) {
        edges[i] = new AbstractMap.SimpleEntry<>(new LongWritable(neighbors[i]), NullWritable.get());
    }//w  w  w . j a  v a2 s.co m
    graph.addVertex(new LongWritable(id), new LongWritable(value), edges);
}

From source file:org.apache.giraph.utils.WritableUtils.java

License:Apache License

/**
 * Instantiate a new Writable, checking for NullWritable along the way.
 *
 * @param klass Class/*from   w  w  w  .j  a va2 s.com*/
 * @param <W> type
 * @return new instance of class
 */
public static <W extends Writable> W createWritable(Class<W> klass) {
    if (NullWritable.class.equals(klass)) {
        return (W) NullWritable.get();
    } else {
        return ReflectionUtils.newInstance(klass);
    }
}

From source file:org.apache.giraph.worker.BspServiceWorkerForCohesiveSubgraph.java

License:Apache License

/**
 * Save the vertices using the user-defined VertexOutputFormat from our
 * vertexArray based on the split.//  www  .j  a  va 2  s.  c om
 *
 * @param numLocalVertices Number of local vertices
 * @throws InterruptedException
 */
private void saveVertices(long numLocalVertices) throws IOException, InterruptedException {
    if (getConfiguration().getVertexOutputFormatClass() == null) {
        LOG.warn("saveVertices: " + GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS
                + " not specified -- there will be no saved output");
        return;
    }
    if (getConfiguration().doOutputDuringComputation()) {
        if (LOG.isInfoEnabled()) {
            LOG.info("saveVertices: The option for doing output during "
                    + "computation is selected, so there will be no saving of the "
                    + "output in the end of application");
        }
        return;
    }

    int numThreads = Math.min(getConfiguration().getNumOutputThreads(), getPartitionStore().getNumPartitions());
    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Starting to save "
            + numLocalVertices + " vertices " + "using " + numThreads + " threads");
    final VertexOutputFormat<I, V, E> vertexOutputFormat = getConfiguration().createVertexOutputFormat();
    CallableFactory<Void> callableFactory = new CallableFactory<Void>() {
        @Override
        public Callable<Void> newCallable(int callableId) {
            return new Callable<Void>() {
                @Override
                public Void call() throws Exception {
                    VertexWriter<I, V, E> vertexWriter = vertexOutputFormat.createVertexWriter(getContext());
                    vertexWriter.setConf(
                            (ImmutableClassesGiraphConfiguration<I, V, E, Writable>) getConfiguration());
                    vertexWriter.initialize(getContext());
                    long verticesWritten = 0;
                    long nextPrintVertices = 0;
                    long nextPrintMsecs = System.currentTimeMillis() + 15000;
                    int partitionIndex = 0;
                    int numPartitions = getPartitionStore().getNumPartitions();
                    for (Integer partitionId : getPartitionStore().getPartitionIds()) {
                        Partition<I, V, E, M> partition = getPartitionStore().getPartition(partitionId);

                        if (getConfiguration().getBoolean("giraph.ktruss.subgraph", false)) {
                            /**
                             * Special for the ktruss output
                             */
                            BasicGraphStoreInterface gs = (BasicGraphStoreInterface) (partition);
                            for (BasicVertex rv : gs.getLocalVertex()) {
                                Vertex<I, V, E, M> vertex = getConfiguration().createVertex();
                                List<Edge<I, E>> edges = Lists.newLinkedList();
                                for (BasicEdge nb : rv.getNeighbors()) {
                                    edges.add(EdgeFactory.create(((I) new IntWritable(nb.getTargetId())),
                                            ((E) NullWritable.get())));
                                }
                                vertex.initialize((I) (new IntWritable(rv.getId())), ((V) new IntWritable(0)),
                                        edges);

                                vertexWriter.writeVertex(vertex);
                                ++verticesWritten;

                                // Update status at most every 250k vertices or 15 seconds
                                if (verticesWritten > nextPrintVertices
                                        && System.currentTimeMillis() > nextPrintMsecs) {
                                    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                                            "saveVertices: Saved " + verticesWritten + " out of "
                                                    + partition.getVertexCount() + " partition vertices, "
                                                    + "on partition " + partitionIndex + " out of "
                                                    + numPartitions);
                                    nextPrintMsecs = System.currentTimeMillis() + 15000;
                                    nextPrintVertices = verticesWritten + 250000;
                                }
                            }
                        } else {
                            for (Vertex<I, V, E, M> vertex : partition) {
                                vertexWriter.writeVertex(vertex);
                                ++verticesWritten;

                                // Update status at most every 250k vertices or 15 seconds
                                if (verticesWritten > nextPrintVertices
                                        && System.currentTimeMillis() > nextPrintMsecs) {
                                    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                                            "saveVertices: Saved " + verticesWritten + " out of "
                                                    + partition.getVertexCount() + " partition vertices, "
                                                    + "on partition " + partitionIndex + " out of "
                                                    + numPartitions);
                                    nextPrintMsecs = System.currentTimeMillis() + 15000;
                                    nextPrintVertices = verticesWritten + 250000;
                                }
                            }
                        }
                        ++partitionIndex;
                    }
                    vertexWriter.close(getContext()); // the temp results are saved now
                    return null;
                }
            };
        }
    };
    ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext());

    LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Done saving vertices.");
    // YARN: must complete the commit the "task" output, Hadoop isn't there.
    if (getConfiguration().isPureYarnJob() && getConfiguration().getVertexOutputFormatClass() != null) {
        try {
            OutputCommitter outputCommitter = vertexOutputFormat.getOutputCommitter(getContext());
            if (outputCommitter.needsTaskCommit(getContext())) {
                LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO,
                        "OutputCommitter: committing task output.");
                // transfer from temp dirs to "task commit" dirs to prep for
                // the master's OutputCommitter#commitJob(context) call to finish.
                outputCommitter.commitTask(getContext());
            }
        } catch (InterruptedException ie) {
            LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie);
        } catch (IOException ioe) {
            LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe);
        }
    }
}

From source file:org.apache.gobblin.compaction.mapreduce.avro.AvroKeyDedupReducer.java

License:Apache License

@Override
protected void initReusableObject() {
    outKey = new AvroKey<>();
    outValue = NullWritable.get();
}