List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:org.apache.giraph.hive.input.edge.examples.HiveIntNullEdge.java
License:Apache License
@Override public NullWritable getEdgeValue(HiveReadableRecord hiveRecord) { return NullWritable.get(); }
From source file:org.apache.giraph.hive.input.vertex.examples.HiveIntNullNullVertex.java
License:Apache License
@Override public NullWritable getVertexValue(HiveReadableRecord record) { return NullWritable.get(); }
From source file:org.apache.giraph.hive.output.HiveVertexWriter.java
License:Apache License
@Override public void save(HiveWritableRecord record) throws IOException, InterruptedException { hiveRecordWriter.write(NullWritable.get(), record); }
From source file:org.apache.giraph.partition.VertexTypeStore.java
License:Apache License
/** * For every vertex, send message to neighbours that are not * on the same partition. Required for directed graphs. * * This ensures that every vertex is categorized based on * both their in-edge and out-edge neighbours. *//*from w w w . ja v a2s.com*/ public void sendDependencies() { // when to flush cache/send message to particular worker int maxMessagesSizePerWorker = GiraphConfiguration.MAX_MSG_REQUEST_SIZE.get(conf); // cache for messages that will be sent to neighbours // (much more efficient than using SendDataCache b/c // we don't need to store/know dst partition ids) Int2ObjectOpenHashMap<VertexIdData<I, NullWritable>> msgCache = new Int2ObjectOpenHashMap<VertexIdData<I, NullWritable>>(); int taskId = serviceWorker.getWorkerInfo().getTaskId(); PartitionStore<I, V, E> pStore = serviceWorker.getPartitionStore(); // don't need to synchronize, b/c this is all single threaded for (int partitionId : pStore.getPartitionIds()) { for (Vertex<I, V, E> vertex : pStore.getOrCreatePartition(partitionId)) { for (Edge<I, E> e : vertex.getEdges()) { PartitionOwner dstOwner = serviceWorker.getVertexPartitionOwner(e.getTargetVertexId()); int dstPartitionId = dstOwner.getPartitionId(); int dstTaskId = dstOwner.getWorkerInfo().getTaskId(); if (dstPartitionId == partitionId) { continue; // skip, no dependency } else if (taskId == dstTaskId) { // local dependency receiveDependency(e.getTargetVertexId(), true); } else { // remote dependency // We may send redundant dependency messages, but // it's faster to send it than to prune it. VertexIdData<I, NullWritable> messages = msgCache.get(dstTaskId); if (messages == null) { messages = new ByteArrayVertexIdNullData<I>(); messages.setConf(conf); messages.initialize(); msgCache.put(dstTaskId, messages); } // no data---messages are vertex id only messages.add(e.getTargetVertexId(), NullWritable.get()); if (messages.getSize() > maxMessagesSizePerWorker) { msgCache.remove(dstTaskId); serviceWorker.getWorkerClient().sendWritableRequest(dstTaskId, new SendTokenDepRequest(messages)); } } } } } // send remaining messages for (int dstTaskId : msgCache.keySet()) { // no need to remove, map will be trashed entirely VertexIdData<I, NullWritable> messages = msgCache.get(dstTaskId); serviceWorker.getWorkerClient().sendWritableRequest(dstTaskId, new SendTokenDepRequest(messages)); } // flush network serviceWorker.getWorkerClient().waitAllRequests(); }
From source file:org.apache.giraph.ranking.LinkRank.LinkRankComputation.java
License:Apache License
/** * Removes duplicate outgoing links./* w w w.j a v a2s .c o m*/ * * @param vertex vertex whose duplicate outgoing edges * will be removed. */ public void removeDuplicateLinks(Vertex<Text, DoubleWritable, NullWritable> vertex) { String sourceUrl = vertex.getId().toString().trim(); String targetUrl; Set<String> urls = new HashSet<String>(); Iterable<Edge<Text, NullWritable>> outgoingEdges = vertex.getEdges(); for (Edge<Text, NullWritable> edge : outgoingEdges) { targetUrl = edge.getTargetVertexId().toString().trim().split("#")[0]; // if source != target (avoid self-links) if (!targetUrl.equalsIgnoreCase(sourceUrl)) { urls.add(targetUrl); } } ArrayList<Edge<Text, NullWritable>> newEdges = new ArrayList<Edge<Text, NullWritable>>(); for (final String url : urls) { newEdges.add(new Edge<Text, NullWritable>() { @Override public Text getTargetVertexId() { return new Text(url); } @Override public NullWritable getValue() { return NullWritable.get(); } }); } if (newEdges.size() > 0) { vertex.setEdges(newEdges); } }
From source file:org.apache.giraph.utils.ByteArrayVertexIdNullData.java
License:Apache License
@Override public NullWritable createData() { return NullWritable.get(); }
From source file:org.apache.giraph.utils.TestTestGraph.java
License:Apache License
public static void addVertex(TestGraph<LongWritable, LongWritable, NullWritable> graph, long id, long value, long... neighbors) { Map.Entry<LongWritable, NullWritable> edges[] = new Map.Entry[neighbors.length]; for (int i = 0; i < neighbors.length; i++) { edges[i] = new AbstractMap.SimpleEntry<>(new LongWritable(neighbors[i]), NullWritable.get()); }//w w w . j a v a2 s.co m graph.addVertex(new LongWritable(id), new LongWritable(value), edges); }
From source file:org.apache.giraph.utils.WritableUtils.java
License:Apache License
/** * Instantiate a new Writable, checking for NullWritable along the way. * * @param klass Class/*from w w w .j a va2 s.com*/ * @param <W> type * @return new instance of class */ public static <W extends Writable> W createWritable(Class<W> klass) { if (NullWritable.class.equals(klass)) { return (W) NullWritable.get(); } else { return ReflectionUtils.newInstance(klass); } }
From source file:org.apache.giraph.worker.BspServiceWorkerForCohesiveSubgraph.java
License:Apache License
/** * Save the vertices using the user-defined VertexOutputFormat from our * vertexArray based on the split.// www .j a va 2 s. c om * * @param numLocalVertices Number of local vertices * @throws InterruptedException */ private void saveVertices(long numLocalVertices) throws IOException, InterruptedException { if (getConfiguration().getVertexOutputFormatClass() == null) { LOG.warn("saveVertices: " + GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS + " not specified -- there will be no saved output"); return; } if (getConfiguration().doOutputDuringComputation()) { if (LOG.isInfoEnabled()) { LOG.info("saveVertices: The option for doing output during " + "computation is selected, so there will be no saving of the " + "output in the end of application"); } return; } int numThreads = Math.min(getConfiguration().getNumOutputThreads(), getPartitionStore().getNumPartitions()); LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Starting to save " + numLocalVertices + " vertices " + "using " + numThreads + " threads"); final VertexOutputFormat<I, V, E> vertexOutputFormat = getConfiguration().createVertexOutputFormat(); CallableFactory<Void> callableFactory = new CallableFactory<Void>() { @Override public Callable<Void> newCallable(int callableId) { return new Callable<Void>() { @Override public Void call() throws Exception { VertexWriter<I, V, E> vertexWriter = vertexOutputFormat.createVertexWriter(getContext()); vertexWriter.setConf( (ImmutableClassesGiraphConfiguration<I, V, E, Writable>) getConfiguration()); vertexWriter.initialize(getContext()); long verticesWritten = 0; long nextPrintVertices = 0; long nextPrintMsecs = System.currentTimeMillis() + 15000; int partitionIndex = 0; int numPartitions = getPartitionStore().getNumPartitions(); for (Integer partitionId : getPartitionStore().getPartitionIds()) { Partition<I, V, E, M> partition = getPartitionStore().getPartition(partitionId); if (getConfiguration().getBoolean("giraph.ktruss.subgraph", false)) { /** * Special for the ktruss output */ BasicGraphStoreInterface gs = (BasicGraphStoreInterface) (partition); for (BasicVertex rv : gs.getLocalVertex()) { Vertex<I, V, E, M> vertex = getConfiguration().createVertex(); List<Edge<I, E>> edges = Lists.newLinkedList(); for (BasicEdge nb : rv.getNeighbors()) { edges.add(EdgeFactory.create(((I) new IntWritable(nb.getTargetId())), ((E) NullWritable.get()))); } vertex.initialize((I) (new IntWritable(rv.getId())), ((V) new IntWritable(0)), edges); vertexWriter.writeVertex(vertex); ++verticesWritten; // Update status at most every 250k vertices or 15 seconds if (verticesWritten > nextPrintVertices && System.currentTimeMillis() > nextPrintMsecs) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Saved " + verticesWritten + " out of " + partition.getVertexCount() + " partition vertices, " + "on partition " + partitionIndex + " out of " + numPartitions); nextPrintMsecs = System.currentTimeMillis() + 15000; nextPrintVertices = verticesWritten + 250000; } } } else { for (Vertex<I, V, E, M> vertex : partition) { vertexWriter.writeVertex(vertex); ++verticesWritten; // Update status at most every 250k vertices or 15 seconds if (verticesWritten > nextPrintVertices && System.currentTimeMillis() > nextPrintMsecs) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Saved " + verticesWritten + " out of " + partition.getVertexCount() + " partition vertices, " + "on partition " + partitionIndex + " out of " + numPartitions); nextPrintMsecs = System.currentTimeMillis() + 15000; nextPrintVertices = verticesWritten + 250000; } } } ++partitionIndex; } vertexWriter.close(getContext()); // the temp results are saved now return null; } }; } }; ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "save-vertices-%d", getContext()); LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "saveVertices: Done saving vertices."); // YARN: must complete the commit the "task" output, Hadoop isn't there. if (getConfiguration().isPureYarnJob() && getConfiguration().getVertexOutputFormatClass() != null) { try { OutputCommitter outputCommitter = vertexOutputFormat.getOutputCommitter(getContext()); if (outputCommitter.needsTaskCommit(getContext())) { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "OutputCommitter: committing task output."); // transfer from temp dirs to "task commit" dirs to prep for // the master's OutputCommitter#commitJob(context) call to finish. outputCommitter.commitTask(getContext()); } } catch (InterruptedException ie) { LOG.error("Interrupted while attempting to obtain " + "OutputCommitter.", ie); } catch (IOException ioe) { LOG.error("Master task's attempt to commit output has " + "FAILED.", ioe); } } }
From source file:org.apache.gobblin.compaction.mapreduce.avro.AvroKeyDedupReducer.java
License:Apache License
@Override protected void initReusableObject() { outKey = new AvroKey<>(); outValue = NullWritable.get(); }