Example usage for org.apache.hadoop.io WritableUtils clone

Introduction

In this page you can find the example usage for org.apache.hadoop.io WritableUtils clone.

Prototype

public static <T extends Writable> T clone(T orig, Configuration conf)

Source Link

Document

Make a copy of a writable object using serialization to a buffer.

Usage

From source file:org.apache.giraph.tools.graphanalytics.semiclustering.SemiClusteringVertex.java

License:Apache License

private void initClusters() throws IOException {
    List<Vertex<IntWritable, SemiClusterMessage, DoubleWritable, SemiClusterMessage>> lV = new ArrayList<Vertex<IntWritable, SemiClusterMessage, DoubleWritable, SemiClusterMessage>>();
    lV.add(WritableUtils.clone(this, this.getConf()));
    String newClusterName = "C" + createNewSemiClusterName(lV);
    SemiClusterMessage initialClusters = new SemiClusterMessage();
    initialClusters.setSemiClusterId(newClusterName);
    initialClusters.addVertexList(lV);//from  w  w w . j a  v  a2s .  c  om
    initialClusters.setScore(1);

    this.sendMessageToAllEdges(initialClusters);

    Set<SemiClusterDetails> scList = new TreeSet<SemiClusterDetails>();
    scList.add(new SemiClusterDetails(newClusterName, 1.0));
    SemiClusterMessage vertexValue = new SemiClusterMessage();
    vertexValue.setSemiClusterContainThis(scList);
    this.setValue(vertexValue);
}

From source file:org.apache.giraph.utils.EdgeIterables.java

License:Apache License

/**
 * Make a deep copy of an edge iterable and return it as an {@link
 * ArrayList}.//from  w w  w  . j a  v a 2s.c  om
 * Note: this method is slow since it has to deserialize all serialize all
 * the ids and values. It should only be used in unit tests.
 *
 * @param edges Iterable of edges
 * @param <I> Vertex id
 * @param <E> Edge value
 * @return A new list with copies of all the edges
 */
public static <I extends WritableComparable, E extends WritableComparable> ArrayList<Edge<I, E>> copy(
        Iterable<Edge<I, E>> edges) {
    Configuration conf = new Configuration();
    ArrayList<Edge<I, E>> edgeList = new ArrayList<Edge<I, E>>(Iterables.size(edges));
    for (Edge<I, E> edge : edges) {
        edgeList.add(EdgeFactory.create(WritableUtils.clone(edge.getTargetVertexId(), conf),
                WritableUtils.clone(edge.getValue(), conf)));
    }
    return edgeList;
}

From source file:org.apache.hama.ml.semiclustering.SemiClusteringVertex.java

License:Apache License

/**
 * The user overrides the Compute() method, which will be executed at each
 * active vertex in every superstep/*from   w ww .  j  a va 2s . com*/
 */
@Override
public void compute(Iterable<SemiClusterMessage> messages) throws IOException {
    if (this.getSuperstepCount() == 0) {
        initClusters();
    }

    if (this.getSuperstepCount() >= 1) {
        TreeSet<SemiClusterMessage> candidates = new TreeSet<SemiClusterMessage>();

        for (SemiClusterMessage msg : messages) {
            candidates.add(msg);

            if (!msg.contains(this.getVertexID()) && msg.size() == semiClusterMaximumVertexCount) {
                SemiClusterMessage msgNew = WritableUtils.clone(msg, this.getConf());
                msgNew.addVertex(this);
                msgNew.setSemiClusterId("C" + createNewSemiClusterName(msgNew.getVertexList()));
                msgNew.setScore(semiClusterScoreCalcuation(msgNew));

                candidates.add(msgNew);
            }
        }

        Iterator<SemiClusterMessage> bestCandidates = candidates.descendingIterator();
        int count = 0;

        while (bestCandidates.hasNext() && count < graphJobMessageSentCount) {
            SemiClusterMessage candidate = bestCandidates.next();
            sendMessageToNeighbors(candidate);
            count++;
        }

        // Update candidates
        SemiClusterMessage value = this.getValue();
        Set<SemiClusterDetails> clusters = value.getSemiClusterContainThis();
        for (SemiClusterMessage msg : candidates) {
            if (clusters.size() > graphJobVertexMaxClusterCount) {
                break;
            } else {
                clusters.add(new SemiClusterDetails(msg.getSemiClusterId(), msg.getScore()));
            }
        }

        value.setClusters(clusters, graphJobVertexMaxClusterCount);
        this.setValue(value);
    }
}

From source file:org.apache.hama.ml.semiclustering.SemiClusteringVertex.java

License:Apache License

private void initClusters() throws IOException {
    List<Vertex<Text, DoubleWritable, SemiClusterMessage>> lV = new ArrayList<Vertex<Text, DoubleWritable, SemiClusterMessage>>();
    lV.add(WritableUtils.clone(this, this.getConf()));
    String newClusterName = "C" + createNewSemiClusterName(lV);
    SemiClusterMessage initialClusters = new SemiClusterMessage();
    initialClusters.setSemiClusterId(newClusterName);
    initialClusters.addVertexList(lV);//from  w w  w . j  a  v a2  s.c  om
    initialClusters.setScore(1);

    sendMessageToNeighbors(initialClusters);

    Set<SemiClusterDetails> scList = new TreeSet<SemiClusterDetails>();
    scList.add(new SemiClusterDetails(newClusterName, 1.0));
    SemiClusterMessage vertexValue = new SemiClusterMessage();
    vertexValue.setSemiClusterContainThis(scList);
    this.setValue(vertexValue);
}

From source file:org.apache.nutch.indexer.field.FieldIndexer.java

License:Apache License

public void reduce(Text key, Iterator<FieldWritable> values,
        OutputCollector<Text, LuceneDocumentWrapper> output, Reporter reporter) throws IOException {

    Document doc = new Document();
    List<FieldWritable> fieldsList = new ArrayList<FieldWritable>();
    Configuration conf = getConf();

    while (values.hasNext()) {
        FieldWritable field = values.next();
        fieldsList.add((FieldWritable) WritableUtils.clone(field, conf));
    }/*from  w w  w  .j  ava 2  s  .c o  m*/

    try {
        doc = fieldFilters.filter(key.toString(), doc, fieldsList);
    } catch (IndexingException e) {
        throw new IOException(e);
    }

    if (doc != null) {
        output.collect(key, new LuceneDocumentWrapper(doc));
    }
}

From source file:org.apache.nutch.tools.compat.ReprUrlFixer.java

License:Apache License

/**
 * Runs the new ReprUrl logic on all crawldatums.
 *///  w  ww  .ja va 2s . c  o m
public void reduce(Text key, Iterator<CrawlDatum> values, OutputCollector<Text, CrawlDatum> output,
        Reporter reporter) throws IOException {

    String url = key.toString();
    Node node = null;
    List<CrawlDatum> datums = new ArrayList<CrawlDatum>();

    // get all crawl datums for a given url key, fetch for instance can have
    // more than one under a given key if there are multiple redirects to a
    // given url
    while (values.hasNext()) {
        CrawlDatum datum = values.next();
        datums.add((CrawlDatum) WritableUtils.clone(datum, conf));
    }

    // apply redirect repr url logic for each datum
    for (CrawlDatum datum : datums) {

        MapWritable metadata = datum.getMetaData();
        Text reprUrl = (Text) metadata.get(Nutch.WRITABLE_REPR_URL_KEY);
        byte status = datum.getStatus();
        boolean isCrawlDb = (CrawlDatum.hasDbStatus(datum));
        boolean segFetched = (status == CrawlDatum.STATUS_FETCH_SUCCESS);

        // only if the crawl datum is from the crawldb or is a successfully
        // fetched page from the segments
        if ((isCrawlDb || segFetched) && reprUrl != null) {

            String src = reprUrl.toString();
            String dest = url;
            URL srcUrl = null;
            URL dstUrl = null;

            // both need to be well formed urls
            try {
                srcUrl = new URL(src);
                dstUrl = new URL(url);
            } catch (MalformedURLException e) {
            }

            // if the src and repr urls are the same after the new logic then
            // remove the repr url from the metadata as it is no longer needed
            if (srcUrl != null && dstUrl != null) {
                String reprOut = URLUtil.chooseRepr(src, dest, true);
                if (reprOut.equals(dest)) {
                    LOG.info("Removing " + reprOut + " from " + dest);
                    metadata.remove(Nutch.WRITABLE_REPR_URL_KEY);
                }
            }
        }

        // collect each datum
        output.collect(key, datum);
    }

}

From source file:rugal.hadoop.repartition.enhanced.impl.IntermediateData.java

License:Apache License

public IntermediateData clone(JobConf job) {
    return WritableUtils.clone(this, job);
}