Example usage for org.apache.hadoop.io WritableUtils clone

List of usage examples for org.apache.hadoop.io WritableUtils clone

Introduction

In this page you can find the example usage for org.apache.hadoop.io WritableUtils clone.

Prototype

public static <T extends Writable> T clone(T orig, Configuration conf) 

Source Link

Document

Make a copy of a writable object using serialization to a buffer.

Usage

From source file:org.apache.giraph.tools.graphanalytics.semiclustering.SemiClusteringVertex.java

License:Apache License

private void initClusters() throws IOException {
    List<Vertex<IntWritable, SemiClusterMessage, DoubleWritable, SemiClusterMessage>> lV = new ArrayList<Vertex<IntWritable, SemiClusterMessage, DoubleWritable, SemiClusterMessage>>();
    lV.add(WritableUtils.clone(this, this.getConf()));
    String newClusterName = "C" + createNewSemiClusterName(lV);
    SemiClusterMessage initialClusters = new SemiClusterMessage();
    initialClusters.setSemiClusterId(newClusterName);
    initialClusters.addVertexList(lV);//from  w  w w . j a  v  a2s .  c  om
    initialClusters.setScore(1);

    this.sendMessageToAllEdges(initialClusters);

    Set<SemiClusterDetails> scList = new TreeSet<SemiClusterDetails>();
    scList.add(new SemiClusterDetails(newClusterName, 1.0));
    SemiClusterMessage vertexValue = new SemiClusterMessage();
    vertexValue.setSemiClusterContainThis(scList);
    this.setValue(vertexValue);
}

From source file:org.apache.giraph.utils.EdgeIterables.java

License:Apache License

/**
 * Make a deep copy of an edge iterable and return it as an {@link
 * ArrayList}.//from  w w  w  . j a  v a 2s.c  om
 * Note: this method is slow since it has to deserialize all serialize all
 * the ids and values. It should only be used in unit tests.
 *
 * @param edges Iterable of edges
 * @param <I> Vertex id
 * @param <E> Edge value
 * @return A new list with copies of all the edges
 */
public static <I extends WritableComparable, E extends WritableComparable> ArrayList<Edge<I, E>> copy(
        Iterable<Edge<I, E>> edges) {
    Configuration conf = new Configuration();
    ArrayList<Edge<I, E>> edgeList = new ArrayList<Edge<I, E>>(Iterables.size(edges));
    for (Edge<I, E> edge : edges) {
        edgeList.add(EdgeFactory.create(WritableUtils.clone(edge.getTargetVertexId(), conf),
                WritableUtils.clone(edge.getValue(), conf)));
    }
    return edgeList;
}

From source file:org.apache.hama.ml.semiclustering.SemiClusteringVertex.java

License:Apache License

/**
 * The user overrides the Compute() method, which will be executed at each
 * active vertex in every superstep/*from   w ww .  j  a va 2s . com*/
 */
@Override
public void compute(Iterable<SemiClusterMessage> messages) throws IOException {
    if (this.getSuperstepCount() == 0) {
        initClusters();
    }

    if (this.getSuperstepCount() >= 1) {
        TreeSet<SemiClusterMessage> candidates = new TreeSet<SemiClusterMessage>();

        for (SemiClusterMessage msg : messages) {
            candidates.add(msg);

            if (!msg.contains(this.getVertexID()) && msg.size() == semiClusterMaximumVertexCount) {
                SemiClusterMessage msgNew = WritableUtils.clone(msg, this.getConf());
                msgNew.addVertex(this);
                msgNew.setSemiClusterId("C" + createNewSemiClusterName(msgNew.getVertexList()));
                msgNew.setScore(semiClusterScoreCalcuation(msgNew));

                candidates.add(msgNew);
            }
        }

        Iterator<SemiClusterMessage> bestCandidates = candidates.descendingIterator();
        int count = 0;

        while (bestCandidates.hasNext() && count < graphJobMessageSentCount) {
            SemiClusterMessage candidate = bestCandidates.next();
            sendMessageToNeighbors(candidate);
            count++;
        }

        // Update candidates
        SemiClusterMessage value = this.getValue();
        Set<SemiClusterDetails> clusters = value.getSemiClusterContainThis();
        for (SemiClusterMessage msg : candidates) {
            if (clusters.size() > graphJobVertexMaxClusterCount) {
                break;
            } else {
                clusters.add(new SemiClusterDetails(msg.getSemiClusterId(), msg.getScore()));
            }
        }

        value.setClusters(clusters, graphJobVertexMaxClusterCount);
        this.setValue(value);
    }
}

From source file:org.apache.hama.ml.semiclustering.SemiClusteringVertex.java

License:Apache License

private void initClusters() throws IOException {
    List<Vertex<Text, DoubleWritable, SemiClusterMessage>> lV = new ArrayList<Vertex<Text, DoubleWritable, SemiClusterMessage>>();
    lV.add(WritableUtils.clone(this, this.getConf()));
    String newClusterName = "C" + createNewSemiClusterName(lV);
    SemiClusterMessage initialClusters = new SemiClusterMessage();
    initialClusters.setSemiClusterId(newClusterName);
    initialClusters.addVertexList(lV);//from  w w  w . j  a  v a2  s.c  om
    initialClusters.setScore(1);

    sendMessageToNeighbors(initialClusters);

    Set<SemiClusterDetails> scList = new TreeSet<SemiClusterDetails>();
    scList.add(new SemiClusterDetails(newClusterName, 1.0));
    SemiClusterMessage vertexValue = new SemiClusterMessage();
    vertexValue.setSemiClusterContainThis(scList);
    this.setValue(vertexValue);
}

From source file:org.apache.nutch.indexer.field.FieldIndexer.java

License:Apache License

public void reduce(Text key, Iterator<FieldWritable> values,
        OutputCollector<Text, LuceneDocumentWrapper> output, Reporter reporter) throws IOException {

    Document doc = new Document();
    List<FieldWritable> fieldsList = new ArrayList<FieldWritable>();
    Configuration conf = getConf();

    while (values.hasNext()) {
        FieldWritable field = values.next();
        fieldsList.add((FieldWritable) WritableUtils.clone(field, conf));
    }/*from  w w  w  .j  ava 2  s  .c o  m*/

    try {
        doc = fieldFilters.filter(key.toString(), doc, fieldsList);
    } catch (IndexingException e) {
        throw new IOException(e);
    }

    if (doc != null) {
        output.collect(key, new LuceneDocumentWrapper(doc));
    }
}

From source file:org.apache.nutch.tools.compat.ReprUrlFixer.java

License:Apache License

/**
 * Runs the new ReprUrl logic on all crawldatums.
 *///  w  ww  .ja va 2s . c  o m
public void reduce(Text key, Iterator<CrawlDatum> values, OutputCollector<Text, CrawlDatum> output,
        Reporter reporter) throws IOException {

    String url = key.toString();
    Node node = null;
    List<CrawlDatum> datums = new ArrayList<CrawlDatum>();

    // get all crawl datums for a given url key, fetch for instance can have
    // more than one under a given key if there are multiple redirects to a
    // given url
    while (values.hasNext()) {
        CrawlDatum datum = values.next();
        datums.add((CrawlDatum) WritableUtils.clone(datum, conf));
    }

    // apply redirect repr url logic for each datum
    for (CrawlDatum datum : datums) {

        MapWritable metadata = datum.getMetaData();
        Text reprUrl = (Text) metadata.get(Nutch.WRITABLE_REPR_URL_KEY);
        byte status = datum.getStatus();
        boolean isCrawlDb = (CrawlDatum.hasDbStatus(datum));
        boolean segFetched = (status == CrawlDatum.STATUS_FETCH_SUCCESS);

        // only if the crawl datum is from the crawldb or is a successfully
        // fetched page from the segments
        if ((isCrawlDb || segFetched) && reprUrl != null) {

            String src = reprUrl.toString();
            String dest = url;
            URL srcUrl = null;
            URL dstUrl = null;

            // both need to be well formed urls
            try {
                srcUrl = new URL(src);
                dstUrl = new URL(url);
            } catch (MalformedURLException e) {
            }

            // if the src and repr urls are the same after the new logic then
            // remove the repr url from the metadata as it is no longer needed
            if (srcUrl != null && dstUrl != null) {
                String reprOut = URLUtil.chooseRepr(src, dest, true);
                if (reprOut.equals(dest)) {
                    LOG.info("Removing " + reprOut + " from " + dest);
                    metadata.remove(Nutch.WRITABLE_REPR_URL_KEY);
                }
            }
        }

        // collect each datum
        output.collect(key, datum);
    }

}

From source file:rugal.hadoop.repartition.enhanced.impl.IntermediateData.java

License:Apache License

public IntermediateData clone(JobConf job) {
    return WritableUtils.clone(this, job);
}