fr.ericlab.sondy.algo.eventdetection.PersistentConversations.java Source code

Java tutorial

Introduction

Here is the source code for fr.ericlab.sondy.algo.eventdetection.PersistentConversations.java

Source

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package fr.ericlab.sondy.algo.eventdetection;

import fr.ericlab.sondy.core.structure.DetectionResult;
import fr.ericlab.sondy.algo.AlgorithmParameter;
import fr.ericlab.sondy.core.DataManipulation;
import fr.ericlab.sondy.core.access.IndexAccess;
import fr.ericlab.sondy.core.structure.Collection;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;

////////////////////////////////////////////////////////////////////////////////
//  This file is part of SONDY.                                               //
//                                                                            //
//  SONDY is free software: you can redistribute it and/or modify             //
//  it under the terms of the GNU General Public License as published by      //
//  the Free Software Foundation, either version 3 of the License, or         //
//  (at your option) any later version.                                       //
//                                                                            //
//  SONDY is distributed in the hope that it will be useful,                  //
//  but WITHOUT ANY WARRANTY; without even the implied warranty of            //
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
//  GNU General Public License for more details.                              //
//                                                                            //
//  You should have received a copy of the GNU General Public License         //
//  along with SONDY.  If not, see <http://www.gnu.org/licenses/>.            //
////////////////////////////////////////////////////////////////////////////////

/**
 *   @author Adrien GUILLE, Laboratoire ERIC, Universit Lumire Lyon 2
 */

public class PersistentConversations extends EventDetectionAlgorithm {

    double minTermSupport = 0;
    double maxTermSupport = 1.0;

    public String getName() {
        return "Persistent Conversations";
    }

    public ObservableList<DetectionResult> apply() {
        try {
            if (parameters.get(0).getValue() != null && !parameters.get(0).getValue().equals("")) {
                minTermSupport = Double.parseDouble(parameters.get(0).getValue());
            }
            if (parameters.get(1).getValue() != null && !parameters.get(1).getValue().equals("")) {
                maxTermSupport = Double.parseDouble(parameters.get(1).getValue());
            }
            long startNanoTime = System.nanoTime();
            DataManipulation dataManipulation = new DataManipulation();
            IndexAccess indexAccess = new IndexAccess(appVariables);
            IndexReader r = indexAccess.reader;
            TermEnum allTerms = r.terms();
            HashMap<DetectionResult, Float> score = new HashMap<>();
            results = FXCollections.observableArrayList();
            int intervalNumber = r.numDocs();
            float intervalDuration = ((float) appVariables.getCurrentDatasetInterval()) / 60;
            int minTermOccur = (int) (minTermSupport * appVariables.nbMessages),
                    maxTermOccur = (int) (maxTermSupport * appVariables.nbMessages);
            while (allTerms.next()) {
                String term = allTerms.term().text();
                if (term.length() > 1 && !appVariables.isStopWord(term)) {
                    TermDocs termDocs = r.termDocs(allTerms.term());
                    float frequency[] = indexAccess.getTermFrequency(appVariables, termDocs);
                    float cf = frequency[intervalNumber];
                    if (cf > minTermOccur && cf < maxTermOccur) {
                        float tf = 0;
                        int maxDoc = 0;
                        for (int i = appVariables.startTimeSlice; i <= appVariables.endTimeSlice; i++) {
                            if (frequency[i] > tf) {
                                tf = frequency[i];
                                maxDoc = i;
                            }
                        }
                        float prePeakntf = 0;
                        for (int i = 0; i < maxDoc - 1; i++) {
                            prePeakntf += frequency[i] / cf;
                        }
                        prePeakntf = prePeakntf / (maxDoc - 1);
                        float postPeaskntf = 0;
                        for (int i = maxDoc + 1; i < intervalNumber; i++) {
                            postPeaskntf += frequency[i] / cf;
                        }
                        postPeaskntf = postPeaskntf / (intervalNumber - maxDoc);
                        if (prePeakntf > 0) {
                            float peakDay = (maxDoc * intervalDuration) / 24;
                            float peakDay1 = ((maxDoc + 1) * intervalDuration) / 24;
                            score.put(
                                    new DetectionResult(term,
                                            formatter.format(peakDay) + ";" + formatter.format(peakDay1)),
                                    postPeaskntf / prePeakntf);
                        }
                    }
                }
            }
            score = Collection.getSortedMapDesc(score);
            for (Map.Entry<DetectionResult, Float> entry : score.entrySet()) {
                DetectionResult key = entry.getKey();
                results.add(0, key);
            }
            indexAccess.close();
            long endNanoTime = System.nanoTime();
            long elapsedNanoTime = endNanoTime - startNanoTime;
            double elaspedSecondTime = (double) elapsedNanoTime / (double) 1000000000;
            appVariables.addLogEntry("[event detection] computed persistent conversations, minTermSupport="
                    + minTermSupport + ", maxTermSupport=" + maxTermSupport + ". " + results.size() + " results in "
                    + formatter.format(elaspedSecondTime) + "s");
            return results;
        } catch (IOException ex) {
            Logger.getLogger(PersistentConversations.class.getName()).log(Level.SEVERE, null, ex);
            return null;
        }
    }

    public PersistentConversations() {
        parameters = FXCollections.observableArrayList(new AlgorithmParameter("minTermSupport", ""),
                new AlgorithmParameter("maxTermSupport", ""));
        algoDescription = "Identifies 'persistent conversations' using a normalized term frequency based metric";
    }

    @Override
    public String getReference() {
        return "<li><b>Persistent Conversations:</b> David A. Shamma, Lyndon Kennedy, Elizabeth F. Churchill. Peaks and persistence: modeling the shape of microblog conversations, <i>In Proceedings of the ACM 2011 conference on Computer supported cooperative work </i>, pp. 355-358, 2011</li>";
    }

}