trustframework.evidence.github.ConversationMimicry.java Source code

Java tutorial

Introduction

Here is the source code for trustframework.evidence.github.ConversationMimicry.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package trustframework.evidence.github;

import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.eclipse.egit.github.core.PullRequest;
import trustframework.data.github.GitComment;
import trustframework.data.github.ProjectData;
import trustframework.evidence.EvidenceAnalyser;
import trustframework.exceptions.github.InvalidProjectData;
import trustframework.utils.Similarity;
import trustframework.graph.TFEdge;
import trustframework.graph.TFGraph;

/**
 *
 * @author guilherme
 */
public class ConversationMimicry implements EvidenceAnalyser {

    private double weigth;
    private static final String EVIDENCE_LABEL = "Mimicry";

    public ConversationMimicry(double weigth) {
        this.weigth = weigth;
    }

    @Override
    public void analyseEvidence(Object projectData, TFGraph relationsGraph) throws InvalidProjectData {
        if (projectData instanceof ProjectData) {
            ProjectData pData = (ProjectData) projectData;
            generatePartialEdges(pData, relationsGraph);
            generateFinalEdges(pData, relationsGraph);
        } else {
            throw new InvalidProjectData("projectData is not an instance of " + ProjectData.class.getName());
        }
    }

    @Override
    public String getEvidenceLabel() {
        return EVIDENCE_LABEL;
    }

    @Override
    public double getWeight() {
        return weigth;
    }

    @Override
    public void setWeight(double weight) {
        this.weigth = weight;
    }

    public double cosineSimilarity(String s1, String s2) {
        try {
            if (s1.trim().equals("") || s2.trim().equals("")) {
                return 0.0;
            }
            Directory dir = indexTexts(s1, s2);
            IndexReader ir = DirectoryReader.open(dir);
            Map<String, Integer> f1 = getFrequencyMap(ir, 0);
            Map<String, Integer> f2 = getFrequencyMap(ir, 1);
            return Similarity.cosineSimilarity(f1, f2);
        } catch (IOException | NullPointerException ex) {
            if (!(ex instanceof NullPointerException)) {
                Logger.getLogger(ConversationMimicry.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        return 0.0;
    }

    private IndexWriter createIndexWriter(Directory dir) throws IOException {
        Analyzer analyser = new StandardAnalyzer(StandardAnalyzer.STOP_WORDS_SET);
        IndexWriterConfig iwc = new IndexWriterConfig(analyser);
        return new IndexWriter(dir, iwc);
    }

    private Document createDoc(String text) {
        Document doc = new Document();
        FieldType ft = new FieldType();
        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        ft.setStored(true);
        ft.setStoreTermVectors(true);
        ft.setTokenized(true);
        Field cf1 = new Field("Content", text, ft);
        doc.add(cf1);
        return doc;
    }

    private Directory indexTexts(String text1, String text2) throws IOException {
        Directory dir = new RAMDirectory();
        IndexWriter iw = createIndexWriter(dir);
        iw.addDocument(createDoc(text1));
        iw.addDocument(createDoc(text2));
        iw.close();
        return dir;
    }

    private Map<String, Integer> getFrequencyMap(IndexReader ir, Integer docIndex) throws IOException {
        Terms frequencyVector = ir.getTermVector(docIndex, "Content");
        TermsEnum termsIterator = frequencyVector.iterator();
        Map<String, Integer> frequencyMap = new HashMap<>();
        BytesRef text;
        while ((text = termsIterator.next()) != null) {
            String term = text.utf8ToString();
            int freq = (int) termsIterator.totalTermFreq();
            frequencyMap.put(term, freq);
        }
        return frequencyMap;
    }

    private void generatePartialEdges(ProjectData pData, TFGraph relationsGraph) {
        generatePartialEdges(pData, relationsGraph, null);
    }

    private void generatePartialEdges(ProjectData pData, TFGraph relationsGraph, Date lastAnalysis) {
        pData.getAllComments().forEach((prId, prComments) -> {
            PullRequest pr = pData.getPullRequest(prId);
            //Collections.sort(prComments, new CommentsComparator());  pra tar ordenado
            calculateSimilarityPR2Comments(pr, relationsGraph, prComments, lastAnalysis);
            calculateSimilarityBetweenComments(pr, relationsGraph, prComments, lastAnalysis);
        });
    }

    private void calculateSimilarityPR2Comments(PullRequest pr, TFGraph relationsGraph, List<GitComment> prComments,
            Date lastAnalysis) {
        prComments.stream().filter(c -> lastAnalysis == null || c.getUpdatedAt().after(lastAnalysis))
                .forEach((c) -> {
                    double sim = cosineSimilarity(pr.getBody(), c.getBody());
                    if (!pr.getUser().getLogin().equals(c.getCreator().getLogin())) {
                        relationsGraph.addEdge(c.getCreator().getLogin(), pr.getUser().getLogin(),
                                "P" + EVIDENCE_LABEL + ":" + pr.getNumber() + ":" + c.getId(), sim,
                                pr.getCreatedAt());
                    }
                });
    }

    private void calculateSimilarityBetweenComments(PullRequest pr, TFGraph relationsGraph,
            List<GitComment> prComments, Date lastAnalysis) {
        for (int i = 0; i < prComments.size(); i++) {
            for (int j = i + 1; j < prComments.size(); j++) {
                GitComment c1 = prComments.get(i);
                GitComment c2 = prComments.get(j);
                if (lastAnalysis == null || c2.getUpdatedAt().after(lastAnalysis)) {
                    double sim = cosineSimilarity(c1.getBody(), c2.getBody());
                    if (!c1.getCreator().getLogin().equals(c2.getCreator().getLogin())) {
                        relationsGraph.addEdge(c2.getCreator().getLogin(), c1.getCreator().getLogin(),
                                "P" + EVIDENCE_LABEL + ":" + pr.getNumber() + ":" + c2.getId(), sim,
                                c1.getUpdatedAt());
                    }
                }
            }
        }
    }

    private void generateFinalEdges(ProjectData pData, TFGraph relationsGraph) {
        pData.getInvolvedUsers().stream().forEach((user) -> {
            Map<String, List<TFEdge>> collect = relationsGraph.outEdges(user.getLogin(), "P" + EVIDENCE_LABEL)
                    .stream().collect(Collectors.groupingBy(TFEdge::getTarget));
            collect.forEach((targed, edges) -> {
                double mean = edges.stream().mapToDouble(TFEdge::getWeight).average().orElse(0);
                relationsGraph.addEdge(user.getLogin(), targed, EVIDENCE_LABEL, mean, null);
            });
        });
    }

    @Override
    public void updateEvidence(Object projectData, TFGraph relations, Date limit, Date lastAnalysis)
            throws InvalidProjectData {
        if (projectData instanceof ProjectData) {
            ProjectData pData = (ProjectData) projectData;
            removeOldEdges(relations, limit);
            generatePartialEdges(pData, relations, lastAnalysis);
            generateFinalEdges(pData, relations);
        } else {
            throw new InvalidProjectData("projectData is not an instance of " + ProjectData.class.getName());
        }
    }

    private void removeOldEdges(TFGraph relations, Date until) {
        relations.allEdges(EVIDENCE_LABEL).stream().forEach(e -> {
            relations.removeEdge(e);
        });
        relations.allEdges("P" + EVIDENCE_LABEL).stream().filter(e -> e.getDate().before(until)).forEach(e -> {
            relations.removeEdge(e);
        });
    }
}