List of usage examples for org.apache.hadoop.io WritableUtils clone
public static <T extends Writable> T clone(T orig, Configuration conf)
From source file:org.apache.giraph.tools.graphanalytics.semiclustering.SemiClusteringVertex.java
License:Apache License
private void initClusters() throws IOException { List<Vertex<IntWritable, SemiClusterMessage, DoubleWritable, SemiClusterMessage>> lV = new ArrayList<Vertex<IntWritable, SemiClusterMessage, DoubleWritable, SemiClusterMessage>>(); lV.add(WritableUtils.clone(this, this.getConf())); String newClusterName = "C" + createNewSemiClusterName(lV); SemiClusterMessage initialClusters = new SemiClusterMessage(); initialClusters.setSemiClusterId(newClusterName); initialClusters.addVertexList(lV);//from w w w . j a v a2s . c om initialClusters.setScore(1); this.sendMessageToAllEdges(initialClusters); Set<SemiClusterDetails> scList = new TreeSet<SemiClusterDetails>(); scList.add(new SemiClusterDetails(newClusterName, 1.0)); SemiClusterMessage vertexValue = new SemiClusterMessage(); vertexValue.setSemiClusterContainThis(scList); this.setValue(vertexValue); }
From source file:org.apache.giraph.utils.EdgeIterables.java
License:Apache License
/** * Make a deep copy of an edge iterable and return it as an {@link * ArrayList}.//from w w w . j a v a 2s.c om * Note: this method is slow since it has to deserialize all serialize all * the ids and values. It should only be used in unit tests. * * @param edges Iterable of edges * @param <I> Vertex id * @param <E> Edge value * @return A new list with copies of all the edges */ public static <I extends WritableComparable, E extends WritableComparable> ArrayList<Edge<I, E>> copy( Iterable<Edge<I, E>> edges) { Configuration conf = new Configuration(); ArrayList<Edge<I, E>> edgeList = new ArrayList<Edge<I, E>>(Iterables.size(edges)); for (Edge<I, E> edge : edges) { edgeList.add(EdgeFactory.create(WritableUtils.clone(edge.getTargetVertexId(), conf), WritableUtils.clone(edge.getValue(), conf))); } return edgeList; }
From source file:org.apache.hama.ml.semiclustering.SemiClusteringVertex.java
License:Apache License
/** * The user overrides the Compute() method, which will be executed at each * active vertex in every superstep/*from w ww . j a va 2s . com*/ */ @Override public void compute(Iterable<SemiClusterMessage> messages) throws IOException { if (this.getSuperstepCount() == 0) { initClusters(); } if (this.getSuperstepCount() >= 1) { TreeSet<SemiClusterMessage> candidates = new TreeSet<SemiClusterMessage>(); for (SemiClusterMessage msg : messages) { candidates.add(msg); if (!msg.contains(this.getVertexID()) && msg.size() == semiClusterMaximumVertexCount) { SemiClusterMessage msgNew = WritableUtils.clone(msg, this.getConf()); msgNew.addVertex(this); msgNew.setSemiClusterId("C" + createNewSemiClusterName(msgNew.getVertexList())); msgNew.setScore(semiClusterScoreCalcuation(msgNew)); candidates.add(msgNew); } } Iterator<SemiClusterMessage> bestCandidates = candidates.descendingIterator(); int count = 0; while (bestCandidates.hasNext() && count < graphJobMessageSentCount) { SemiClusterMessage candidate = bestCandidates.next(); sendMessageToNeighbors(candidate); count++; } // Update candidates SemiClusterMessage value = this.getValue(); Set<SemiClusterDetails> clusters = value.getSemiClusterContainThis(); for (SemiClusterMessage msg : candidates) { if (clusters.size() > graphJobVertexMaxClusterCount) { break; } else { clusters.add(new SemiClusterDetails(msg.getSemiClusterId(), msg.getScore())); } } value.setClusters(clusters, graphJobVertexMaxClusterCount); this.setValue(value); } }
From source file:org.apache.hama.ml.semiclustering.SemiClusteringVertex.java
License:Apache License
private void initClusters() throws IOException { List<Vertex<Text, DoubleWritable, SemiClusterMessage>> lV = new ArrayList<Vertex<Text, DoubleWritable, SemiClusterMessage>>(); lV.add(WritableUtils.clone(this, this.getConf())); String newClusterName = "C" + createNewSemiClusterName(lV); SemiClusterMessage initialClusters = new SemiClusterMessage(); initialClusters.setSemiClusterId(newClusterName); initialClusters.addVertexList(lV);//from w w w . j a v a2 s.c om initialClusters.setScore(1); sendMessageToNeighbors(initialClusters); Set<SemiClusterDetails> scList = new TreeSet<SemiClusterDetails>(); scList.add(new SemiClusterDetails(newClusterName, 1.0)); SemiClusterMessage vertexValue = new SemiClusterMessage(); vertexValue.setSemiClusterContainThis(scList); this.setValue(vertexValue); }
From source file:org.apache.nutch.indexer.field.FieldIndexer.java
License:Apache License
public void reduce(Text key, Iterator<FieldWritable> values, OutputCollector<Text, LuceneDocumentWrapper> output, Reporter reporter) throws IOException { Document doc = new Document(); List<FieldWritable> fieldsList = new ArrayList<FieldWritable>(); Configuration conf = getConf(); while (values.hasNext()) { FieldWritable field = values.next(); fieldsList.add((FieldWritable) WritableUtils.clone(field, conf)); }/*from w w w .j ava 2 s .c o m*/ try { doc = fieldFilters.filter(key.toString(), doc, fieldsList); } catch (IndexingException e) { throw new IOException(e); } if (doc != null) { output.collect(key, new LuceneDocumentWrapper(doc)); } }
From source file:org.apache.nutch.tools.compat.ReprUrlFixer.java
License:Apache License
/** * Runs the new ReprUrl logic on all crawldatums. */// w ww .ja va 2s . c o m public void reduce(Text key, Iterator<CrawlDatum> values, OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException { String url = key.toString(); Node node = null; List<CrawlDatum> datums = new ArrayList<CrawlDatum>(); // get all crawl datums for a given url key, fetch for instance can have // more than one under a given key if there are multiple redirects to a // given url while (values.hasNext()) { CrawlDatum datum = values.next(); datums.add((CrawlDatum) WritableUtils.clone(datum, conf)); } // apply redirect repr url logic for each datum for (CrawlDatum datum : datums) { MapWritable metadata = datum.getMetaData(); Text reprUrl = (Text) metadata.get(Nutch.WRITABLE_REPR_URL_KEY); byte status = datum.getStatus(); boolean isCrawlDb = (CrawlDatum.hasDbStatus(datum)); boolean segFetched = (status == CrawlDatum.STATUS_FETCH_SUCCESS); // only if the crawl datum is from the crawldb or is a successfully // fetched page from the segments if ((isCrawlDb || segFetched) && reprUrl != null) { String src = reprUrl.toString(); String dest = url; URL srcUrl = null; URL dstUrl = null; // both need to be well formed urls try { srcUrl = new URL(src); dstUrl = new URL(url); } catch (MalformedURLException e) { } // if the src and repr urls are the same after the new logic then // remove the repr url from the metadata as it is no longer needed if (srcUrl != null && dstUrl != null) { String reprOut = URLUtil.chooseRepr(src, dest, true); if (reprOut.equals(dest)) { LOG.info("Removing " + reprOut + " from " + dest); metadata.remove(Nutch.WRITABLE_REPR_URL_KEY); } } } // collect each datum output.collect(key, datum); } }
From source file:rugal.hadoop.repartition.enhanced.impl.IntermediateData.java
License:Apache License
public IntermediateData clone(JobConf job) { return WritableUtils.clone(this, job); }