com.appspot.socialinquirer.server.service.impl.AnalysisServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.appspot.socialinquirer.server.service.impl.AnalysisServiceImpl.java

Source

/*
 * Copyright 2012 Nabeel Mukhtar 
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); 
 * you may not use this file except in compliance with the License. 
 * You may obtain a copy of the License at 
 * 
 *  http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 * See the License for the specific language governing permissions and
 * limitations under the License. 
 * 
 */
package com.appspot.socialinquirer.server.service.impl;

import iweb2.ch3.collaborative.data.BaseDataset;
import iweb2.ch3.collaborative.data.ContentItem;
import iweb2.ch3.collaborative.model.Content;
import iweb2.ch3.collaborative.model.Dataset;
import iweb2.ch3.collaborative.model.Item;
import iweb2.ch3.collaborative.model.SimilarItem;
import iweb2.ch3.collaborative.model.SimilarUser;
import iweb2.ch3.collaborative.recommender.Delphi;
import iweb2.ch3.collaborative.similarity.RecommendationType;

import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.logging.Level;

import javax.script.Bindings;
import javax.script.ScriptContext;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;

import net.sf.jtmt.summarizers.LuceneSummarizer;
import net.sf.jtmt.summarizers.SummaryAnalyzer;
import net.sf.jtmt.tokenizers.SentenceTokenizer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.HTMLStripReader;
import org.apache.tools.ant.Project;
import org.w3._1999.xhtml.Body;
import org.w3._1999.xhtml.P;

import com.appspot.socialinquirer.server.constant.ApplicationConstants;
import com.appspot.socialinquirer.server.service.AnalysisService;
import com.appspot.socialinquirer.server.service.template.TemplateManager;
import com.appspot.socialinquirer.shared.ProgrammingLanguage;
import com.appspot.socialinquirer.shared.TextLanguage;
import com.appspot.socialinquirer.shared.dto.Classification;
import com.appspot.socialinquirer.shared.dto.Code;
import com.appspot.socialinquirer.shared.dto.ContentAnalysis;
import com.appspot.socialinquirer.shared.dto.Paper;
import com.appspot.socialinquirer.shared.dto.Tag;
import com.appspot.socialinquirer.shared.dto.User;
import com.google.code.bing.search.client.BingSearchClient;
import com.google.code.bing.search.client.BingSearchClient.SearchRequestBuilder;
import com.google.code.bing.search.client.BingSearchServiceClientFactory;
import com.google.code.bing.search.schema.SearchResponse;
import com.google.code.bing.search.schema.SourceType;
import com.google.code.bing.search.schema.translation.TranslationResult;
import com.google.code.stackexchange.schema.Answer;
import com.google.code.stackexchange.schema.Question;
import com.google.code.uclassify.client.UClassifyClient;
import com.google.code.uclassify.client.UClassifyClientFactory;
import com.googleapis.ajax.schema.BlogResult;
import com.googleapis.ajax.services.BlogSearchQuery;
import com.googleapis.ajax.services.GoogleSearchQueryFactory;
import com.memetix.mst.language.SpokenDialect;
import com.memetix.mst.speak.Speak;
import com.microsoft.research.Author;
import com.microsoft.research.Publication;
import com.microsoft.research.query.AcademicSearchQueryFactory;
import com.microsoft.research.query.PublicationSearchQuery;
import com.swabunga.spell.engine.SpellDictionary;
import com.swabunga.spell.engine.SpellDictionaryHashMap;
import com.swabunga.spell.engine.Word;
import com.swabunga.spell.event.SpellCheckEvent;
import com.swabunga.spell.event.SpellCheckListener;
import com.swabunga.spell.event.SpellChecker;
import com.swabunga.spell.event.StringWordTokenizer;

import de.danielnaber.languagetool.JLanguageTool;
import de.danielnaber.languagetool.Language;
import de.danielnaber.languagetool.rules.RuleMatch;

/**
 * The Class AnalysisServiceImpl.
 */
public class AnalysisServiceImpl extends BaseService implements AnalysisService {

    /**
     * Instantiates a new analysis service impl.
     */
    public AnalysisServiceImpl() {
        super("analysis-service");
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.bitlyminous.service.RecommendationService#getUserSimilarities(com.appspot.bitlyminous.entity.User,
     *      java.util.List)
     */
    public List<Map.Entry<User, Double>> getUserSimilarities(User user, List<User> relatedUsers) {
        Dataset ds = createUserDataSet(user, relatedUsers);
        Delphi delphi = new Delphi(ds, RecommendationType.USER_CONTENT_BASED);
        SimilarUser[] similarUsers = delphi.findSimilarUsers(createUserFromUser(user));
        Map<User, Double> similaritiesMap = new HashMap<User, Double>();
        for (SimilarUser similarUser : similarUsers) {
            similaritiesMap.put(createUserFromUser(similarUser.getUser()), similarUser.getSimilarity());
        }

        List<Map.Entry<User, Double>> similarities = new ArrayList<Map.Entry<User, Double>>(
                similaritiesMap.entrySet());
        Collections.sort(similarities, new SimilarityComparatorDesc<User>());

        return similarities;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.bitlyminous.service.RecommendationService#getTextSimilarities(java.lang.String,
     *      java.util.List)
     */
    public List<Map.Entry<String, Double>> getTextSimilarities(String text, List<String> relatedTexts) {
        Dataset ds = createTextDataSet(text, relatedTexts);
        Delphi delphi = new Delphi(ds, RecommendationType.ITEM_CONTENT_BASED);
        SimilarItem[] similarItems = delphi.findSimilarItems(createItemFromText(text));
        Map<String, Double> similaritiesMap = new HashMap<String, Double>();
        for (SimilarItem similarItem : similarItems) {
            similaritiesMap.put(createTextFromItem(similarItem.getItem()), similarItem.getSimilarity());
        }

        List<Map.Entry<String, Double>> similarities = new ArrayList<Map.Entry<String, Double>>(
                similaritiesMap.entrySet());
        Collections.sort(similarities, new SimilarityComparatorDesc<String>());

        return similarities;
    }

    /**
     * Creates the text data set.
     *
     * @param refText the ref text
     * @param relatedTexts the related texts
     * @return the dataset
     */
    protected Dataset createTextDataSet(String refText, List<String> relatedTexts) {
        BaseDataset dataset = new BaseDataset();
        dataset.addItem(createItemFromText(refText));
        for (String text : relatedTexts) {
            dataset.addItem(createItemFromText(text));
        }
        return dataset;
    }

    /**
     * Creates the project data set.
     *
     * @param refProject the ref project
     * @param projects the projects
     * @return the dataset
     */
    protected Dataset createProjectDataSet(Project refProject, List<Project> projects) {
        BaseDataset dataset = new BaseDataset();
        dataset.addItem(createItemFromProject(refProject));
        for (Project url : projects) {
            dataset.addItem(createItemFromProject(url));
        }
        return dataset;
    }

    /**
     * Creates the user data set.
     *
     * @param refUser the ref user
     * @param users the users
     * @return the dataset
     */
    protected Dataset createUserDataSet(User refUser, List<User> users) {
        BaseDataset dataset = new BaseDataset();
        dataset.add(createUserFromUser(refUser));
        for (User user : users) {
            dataset.add(createUserFromUser(user));
        }
        return dataset;
    }

    /**
     * Creates the item from project.
     *
     * @param project the project
     * @return the item
     */
    protected Item createItemFromProject(Project project) {
        // return new ContentItem(url.getUrl().hashCode(), url.getUrl(), new
        // Content("url-" + url.getUrl().hashCode(), getUrlContent(url)));
        return null;
    }

    /**
     * Creates the project from item.
     *
     * @param item the item
     * @return the project
     */
    protected Project createProjectFromItem(Item item) {
        // ContentItem contentItem = (ContentItem) item;
        // Url url = new Url();
        // url.setDescription(item.getItemContent().getText().replaceAll("\\[.*\\]",
        // ""));
        // url.setUrl(contentItem.getName());
        // url.setTags(Arrays.asList(contentItem.getItemContent().getTerms()));
        // return url;
        return null;
    }

    /**
     * Creates the item from text.
     *
     * @param text the text
     * @return the item
     */
    protected Item createItemFromText(String text) {
        return new ContentItem(text.hashCode(), "item-" + text.hashCode(),
                new Content("text-" + text.hashCode(), text));
    }

    /**
     * Creates the text from item.
     *
     * @param item the item
     * @return the string
     */
    protected String createTextFromItem(Item item) {
        return ((ContentItem) item).getItemContent().getText();
    }

    /**
     * Creates the user from user.
     *
     * @param user the user
     * @return the iweb2.ch3.collaborative.model. user
     */
    protected iweb2.ch3.collaborative.model.User createUserFromUser(User user) {
        iweb2.ch3.collaborative.model.User model = new iweb2.ch3.collaborative.model.User(user.getKey().hashCode(),
                user.getStackExchangeId());
        // if (user.getTags() != null) {
        // for (Tag tag : user.getTags().getTag()) {
        // model.addRating(new Rating(user.getId().hashCode(),
        // tag.getTag().hashCode(), tag.getCount().intValue()));
        // }
        // }
        return model;
    }

    /**
     * Creates the user from user.
     *
     * @param user the user
     * @return the user
     */
    protected User createUserFromUser(iweb2.ch3.collaborative.model.User user) {
        User entity = new User();
        entity.setStackExchangeId(user.getName());
        return entity;
    }

    /**
     * The main method.
     *
     * @param args the arguments
     */
    // protected String getUrlContent(Url url) {
    // StringBuilder builder = new StringBuilder();
    // builder.append(url.getDescription());
    // builder.append("[");
    // for (String tag : url.getTags()) {
    // builder.append(tag);
    // builder.append(" ");
    // }
    // builder.append("]");
    // return builder.toString();
    // }
    /**
     * The main method.
     * 
     * @param args
     *            the arguments
     */
    public static void main(String[] args) {
        //      AnalysisServiceImpl service = new AnalysisServiceImpl();
        // String[] relatedTexts = {"An introduction to Hadoop - ThinkPHP
        // /dev/blog", "Welcome to Apache Hadoop!", "Apache Mahout:: Scalable
        // machine-learning and data-mining library", "Distributed data
        // processing with Hadoop, Part 3: Application development",
        // "Programming Hadoop with Clojure", "Scaling Big Time with Hadoop",
        // "Getting Started on Hadoop"};
        // List<Entry<String, Double>> textSimilarities =
        // service.getTextSimilarities("Getting Started on Hadoop",
        // Arrays.asList(relatedTexts));
        // System.out.println(textSimilarities);

        // System.out.println(service.summarizeText("The Apache Hadoop software
        // library is a framework that allows for the distributed processing of
        // large data sets across clusters of computers using a simple
        // programming model. It is designed to scale up from single servers to
        // thousands of machines, each offering local computation and storage.
        // Rather than rely on hardware to deliver high-avaiability, the library
        // itself is designed to detect and handle failures at the application
        // layer, so delivering a highly-availabile service on top of a cluster
        // of computers, each of which may be prone to failures.", 2));
        // System.out.println(service.stripHtmlTags("package
        // com.oreilly.htdg.ch12.java;<br/><br/>import
        // java.io.IOException;<br/>", false));
        // DeliciousGateway delicious =
        // GatewayFactory.newInstance().createDeliciousGateway(ApplicationConstants.DELICIOUS_CONSUMER_KEY,
        // ApplicationConstants.DELICIOUS_CONSUMER_SECRET,
        // ApplicationConstants.DELICIOUS_ACCESS_TOKEN,
        // ApplicationConstants.DELICIOUS_ACCESS_TOKEN_SECRET);
        // List<Url> popularUrls = delicious.getPopularUrls("hadoop");
        // System.out.println(popularUrls.get(0));
        // List<Entry<Url, Double>> urlSimilarities =
        // service.getUrlSimilarities(popularUrls.get(0), popularUrls);
        // System.out.println(urlSimilarities);
        //      com.appspot.socialinquirer.shared.dto.Question showAnswer = service
        //            .showAnswer(
        //                  "I am making a poster about getting rid of junk cars",
        //                  "I want it to say something along the lines of \"Make America Beautiful. Get rid of Junk cars Special\"",
        //                  Arrays.asList("cars"));
        //      System.out.println(showAnswer.getTitle() + ":"
        //            + showAnswer.getContent());
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#evaluate(java.lang.String,
     *      java.lang.String)
     */
    @Override
    public String evaluate(String code, ProgrammingLanguage language) {
        StringBuilder builder = new StringBuilder();
        try {
            // TODO-NM; Change this to use ideone.
            ScriptEngineManager mgr = new ScriptEngineManager();
            ScriptEngine engine = mgr.getEngineByName(language.value());
            Object obj = engine.eval(stripHtmlTags(code, false));
            if (obj != null) {
                builder.append(obj);
                builder.append("\n");
            }
            Bindings bindings = engine.getBindings(ScriptContext.ENGINE_SCOPE);
            for (Entry<String, Object> entry : bindings.entrySet()) {
                if (isValidKey(entry.getKey()) && isValidValue(entry.getValue())) {
                    builder.append(entry.getValue().getClass().getSimpleName());
                    builder.append(" ");
                    builder.append(entry.getKey());
                    builder.append(" = ");
                    builder.append(entry.getValue());
                    builder.append(";\n");
                }
            }
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured while evaluating code.", e);
        }

        return builder.toString();
    }

    /**
     * Checks if is valid key.
     *
     * @param key the key
     * @return true, if is valid key
     */
    private boolean isValidKey(String key) {
        if (key.startsWith("javax.script")) {
            return false;
        } else if (key.equals("org_beanshell_engine_namespace")) {
            return false;
        } else if (key.equals("bsh")) {
            return false;
        }
        return true;
    }

    /**
     * Checks if is valid value.
     *
     * @param value the value
     * @return true, if is valid value
     */
    private boolean isValidValue(Object value) {
        return true;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#getTermVector(java.lang.String)
     */
    @Override
    public List<Tag> getTermVector(String title, String text) {
        RAMDirectory directory = null;
        IndexReader reader = null;
        Map<String, Tag> tagsMap = new HashMap<String, Tag>();

        try {
            directory = new RAMDirectory();

            IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true,
                    MaxFieldLength.UNLIMITED);
            Document doc = new Document();

            doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.add(new Field("body", stripHtmlTags(text, true), Field.Store.YES, Field.Index.ANALYZED,
                    Field.TermVector.YES));
            writer.addDocument(doc);

            writer.close();
            reader = IndexReader.open(directory, true);
            int numDocs = reader.maxDoc();
            for (int i = 0; i < numDocs; i++) {
                TermFreqVector termFreqVector = reader.getTermFreqVector(i, "title");
                pullTags(termFreqVector, tagsMap);
                termFreqVector = reader.getTermFreqVector(i, "body");
                pullTags(termFreqVector, tagsMap);
            }

        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured while pulling tags from text.", e);
        } finally {
            closeIndexReader(reader);
            closeRAMDirectory(directory);
        }
        ArrayList<Tag> tagsList = new ArrayList<Tag>(tagsMap.values());
        Collections.sort(tagsList, new Comparator<Tag>() {
            @Override
            public int compare(Tag o1, Tag o2) {
                return o2.getFreqency() - o1.getFreqency();
            }
        });

        return tagsList;
    }

    /**
     * Pull tags.
     *
     * @param termFreqVector the term freq vector
     * @param tagsMap the tags map
     */
    private void pullTags(TermFreqVector termFreqVector, Map<String, Tag> tagsMap) {
        if (termFreqVector != null) {
            String[] terms = termFreqVector.getTerms();
            int[] termFrequencies = termFreqVector.getTermFrequencies();
            for (int j = 0; j < termFrequencies.length; j++) {
                Tag tag = tagsMap.get(terms[j]);
                if (tag == null) {
                    tag = new Tag(terms[j], termFrequencies[j]);
                    tagsMap.put(terms[j], tag);
                } else {
                    tag.setFreqency(tag.getFreqency() + termFrequencies[j]);
                }
            }
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#recommendText(java.lang.String,
     *      java.util.List)
     */
    @Override
    public String recommendText(String title, String text, List<String> tags) {
        // TODO Auto-generated method stub
        return null;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#showRelatedBlogs(java.lang.String,
     *      java.lang.String, java.util.List)
     */
    @Override
    public List<com.appspot.socialinquirer.shared.dto.Answer> showRelatedBlogs(String title, String text,
            List<String> tags) {
        List<com.appspot.socialinquirer.shared.dto.Answer> blogs = new ArrayList<com.appspot.socialinquirer.shared.dto.Answer>();
        try {
            GoogleSearchQueryFactory GOOGLE_API_FACTORY = GoogleSearchQueryFactory
                    .newInstance(ApplicationConstants.GOOGLE_API_KEY);

            BlogSearchQuery blogQuery = GOOGLE_API_FACTORY.newBlogSearchQuery();
            blogQuery.setReferrer(ApplicationConstants.GOOGLE_API_REFERER);
            List<BlogResult> blogResponse = blogQuery.withQuery(title).list();
            for (BlogResult blogResult : blogResponse) {
                com.appspot.socialinquirer.shared.dto.Answer blog = new com.appspot.socialinquirer.shared.dto.Answer();
                blog.setTitle(blogResult.getTitle());
                blog.setAuthor(blogResult.getAuthor());
                blog.setContent(blogResult.getContent());
                blog.setPublishedDate(blogResult.getPublishedDate());
                blog.setUrl(blogResult.getPostUrl());
                blogs.add(blog);
            }
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured while checking related blogs.", e);
        }
        return blogs;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#showRelatedCode(java.lang.String,
     *      java.lang.String, java.util.List)
     */
    @Override
    public List<Code> showRelatedCode(String title, String text, List<String> tags) {
        //      CodeSearchService codesearchService = new CodeSearchService(
        //            "ever-scribe-codesearch");
        List<Code> codes = new ArrayList<Code>();
        //      try {
        //         CodeSearchFeed searchFeed = codesearchService.getFeed(new URL(
        //               MessageFormat.format(
        //                     ApplicationConstants.GOOGLE_CODE_SEARCH_API_URL,
        //                     encodeUrl(title + " lang:java"), 1, 10)),
        //               CodeSearchFeed.class);
        //         for (CodeSearchEntry entry : searchFeed.getEntries()) {
        //            Code code = new Code();
        //            code.setTitle(entry.getTitle().getPlainText());
        //            code.setUrl(entry.getHtmlLink().getHref());
        //            if (entry.getAuthors() != null) {
        //               for (Person author : entry.getAuthors()) {
        //                  code.setAuthor(author.getName());
        //               }
        //            }
        //            StringBuilder content = new StringBuilder();
        //            for (Match m : entry.getMatches()) {
        //               content.append(m.getLineText().getHtml());
        //            }
        //            code.setContent(content.toString());
        //            codes.add(code);
        //         }
        //      } catch (Exception e) {
        //         logger.log(Level.SEVERE,
        //               "An error occured while checking related code.", e);
        //      }
        return codes;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#showRelatedPapers(java.lang.String,
     *      java.lang.String, java.util.List)
     */
    @Override
    public List<Paper> showRelatedPapers(String title, String text, List<String> tags) {
        List<Paper> papers = new ArrayList<Paper>();
        try {
            // SpringerApiClient client = new
            // SpringerApiXppClient(ApplicationConstants.SPRINGER_METADATA_KEY);
            // Response metadata = client.metadata(title);
            // for (Message message : metadata.getRecords().getMessages()) {
            // Paper paper = new Paper();
            // if (message.getHead() != null && message.getHead().getArticle()
            // != null) {
            // Article article = message.getHead().getArticle();
            // paper.setCopyright(article.getCopyright());
            // paper.setCreators(article.getCreators());
            // paper.setDoi(article.getDoi());
            // paper.setIdentifier(article.getIdentifier());
            // paper.setIsbn(article.getIsbn());;
            // paper.setIssn(article.getIssn());
            // paper.setNumber(article.getNumber());
            // paper.setPublicationDate(article.getPublicationDate());
            // paper.setPublicationName(article.getPublicationName());
            // paper.setPublisher(article.getPublisher());
            // paper.setTitle(article.getTitle());
            // paper.setUrl(article.getUrl());
            // paper.setVolume(article.getVolume());
            // }
            // if (message.getBody() != null) {
            // paper.setContent(getContentFromBody(message.getBody()));
            // }
            //            
            // papers.add(paper);
            // }
            PublicationSearchQuery query = AcademicSearchQueryFactory
                    .newInstance(ApplicationConstants.MICROSOFT_APP_ID).newPublicationSearchQuery();
            List<Publication> publications = query.withFullTextQuery(title).withStartIndex(1).withEndIndex(5)
                    .list();
            for (Publication publication : publications) {
                Paper paper = new Paper();
                paper.setContent(publication.getAbstract());
                paper.setDoi(publication.getDOI());
                paper.setIdentifier(String.valueOf(publication.getID()));
                paper.setTitle(publication.getTitle());
                for (Author author : publication.getAuthor()) {
                    paper.getCreators().add(author.getFirstName() + " " + author.getLastName());
                }
                paper.setCopyright(paper.getCreators().toString());
                if (!publication.getFullVersionURL().isEmpty()) {
                    paper.setUrl(publication.getFullVersionURL().get(0));
                }
                papers.add(paper);
            }
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured while searching for papers.", e);
        }
        return papers;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#spellCheck(java.lang.String,
     *      java.lang.String)
     */
    @Override
    public String spellCheck(String text, String language) {
        final StringBuilder errors = new StringBuilder();
        try {
            SpellDictionary dictionary = new SpellDictionaryHashMap(
                    new File(getClass().getResource("/resources/jazzy/eng_com.dic").getFile()));

            SpellChecker spellCheck = new SpellChecker(dictionary);
            //         spellCheck.getConfiguration().setBoolean(Configuration.SPELL_IGNOREDIGITWORDS, Boolean.TRUE);
            //         spellCheck.getConfiguration().setBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES, Boolean.TRUE);
            //         spellCheck.getConfiguration().setBoolean(Configuration.SPELL_IGNOREMIXEDCASE, Boolean.TRUE);
            //         spellCheck.getConfiguration().setBoolean(Configuration.SPELL_IGNOREUPPERCASE, Boolean.TRUE);
            //         spellCheck.getConfiguration().setBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION, Boolean.FALSE);

            spellCheck.addSpellCheckListener(new SpellCheckListener() {

                @SuppressWarnings("unchecked")
                @Override
                public void spellingError(SpellCheckEvent event) {
                    List<Word> suggestions = event.getSuggestions();
                    if (event.getInvalidWord() != null && event.getInvalidWord().length() > 1) {
                        if (suggestions.size() > 0) {
                            errors.append("Misspelt Word: " + event.getInvalidWord());
                            for (Iterator<Word> suggestedWord = suggestions.iterator(); suggestedWord.hasNext();) {
                                errors.append("&nbsp;&nbsp;Suggested Word: " + suggestedWord.next());
                            }
                        } else {
                            errors.append("Misspelt Word: " + event.getInvalidWord());
                            errors.append("&nbsp;&nbsp;No suggestions");
                        }
                        errors.append("<br/>");
                    }
                }
            });
            spellCheck.checkSpelling(new StringWordTokenizer(stripHtmlTags(text, true)));
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured while checking spelling.", e);
        }
        return errors.toString();
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#grammarCheck(java.lang.String,
     *      java.lang.String)
     */
    @Override
    public String grammarCheck(String text, String language) {
        final StringBuilder errors = new StringBuilder();
        try {
            JLanguageTool langTool = new JLanguageTool(Language.getLanguageForShortName(language));
            langTool.activateDefaultPatternRules();
            List<RuleMatch> matches = langTool.check(stripHtmlTags(text, true));
            for (RuleMatch match : matches) {
                errors.append("Potential error at line " + match.getEndLine() + ", column " + match.getColumn()
                        + ": " + stripHtmlTags(match.getMessage(), true) + "<br/>");
                errors.append("&nbsp;&nbsp;Suggested correction: " + match.getSuggestedReplacements());
                errors.append("<br/>");
            }
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured while checking grammar", e);
        }
        return errors.toString();
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#summarizeText(java.lang.String)
     */
    @Override
    public String summarizeText(String text, int numSentences) {
        try {
            LuceneSummarizer summarizer = new LuceneSummarizer();
            summarizer.setAnalyzer(new SummaryAnalyzer());
            summarizer.setNumSentences(numSentences);
            summarizer.setTopTermCutoff(0.5F);
            summarizer.setSentenceDeboost(0.2F);
            summarizer.init();
            String summary = summarizer.summarize(stripHtmlTags(text, true));
            return summary;
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured while summarizing text.", e);
        }
        return text;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#syntaxHighlight(java.lang.String,
     *      java.lang.String)
     */
    @Override
    public String syntaxHighlight(String title, String text, ProgrammingLanguage language) {
        try {
            //         Renderer renderer = new EvernoteRenderer(XhtmlRendererFactory
            //               .getRenderer(language.value()));
            //
            //         return renderer.highlight(title, stripHtmlTags(text, false),
            //               ApplicationConstants.CONTENT_ENCODING, false);
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured while highlighting code.", e);
        }
        return text;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#translateText(java.lang.String,
     *      java.lang.String)
     */
    @Override
    public String translateText(String text, TextLanguage language) {
        try {
            BingSearchServiceClientFactory factory = BingSearchServiceClientFactory.newInstance();
            BingSearchClient client = factory.createBingSearchClient();
            SearchRequestBuilder builder = client.newSearchRequestBuilder();
            builder.withAppId(ApplicationConstants.BING_CONSUMER_KEY);
            builder.withQuery(text);
            builder.withSourceType(SourceType.TRANSLATION);
            builder.withTranslationRequestSourceLanguage("en");
            builder.withTranslationRequestTargetLanguage(language.value());
            builder.withVersion("2.2");

            SearchResponse response = client.search(builder.getResult());
            StringBuilder result = new StringBuilder();
            for (TranslationResult translation : response.getTranslation().getResults()) {
                result.append(translation.getTranslatedTerm());
            }
            return result.toString();
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured while translating text.", e);
            return text;
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#textToSpeech(java.lang.String,
     *      com.appspot.researchcraft.shared.TextLanguage)
     */
    public List<String> textToSpeech(String text, TextLanguage language) {
        List<String> streams = new ArrayList<String>();
        try {
            Speak.setKey(ApplicationConstants.BING_CONSUMER_KEY);
            int counter = 0;
            for (String splitText : splitTextIntoSegments(stripHtmlTags(text, true),
                    ApplicationConstants.BING_MAX_TEXT_LENGTH)) {
                String execute = Speak.execute(splitText, SpokenDialect.fromString(language.dialect()));
                streams.add(execute);
                //            URL url = new URL(execute);
                //            HttpURLConnection request = (HttpURLConnection) url
                //                  .openConnection();
                //
                //            if (ApplicationConstants.CONNECT_TIMEOUT > -1) {
                //               request
                //                     .setConnectTimeout(ApplicationConstants.CONNECT_TIMEOUT);
                //            }
                //
                //            if (ApplicationConstants.READ_TIMEOUT > -1) {
                //               request.setReadTimeout(ApplicationConstants.READ_TIMEOUT);
                //            }
                //            request.connect();
                //
                //            if (request.getResponseCode() == HttpURLConnection.HTTP_OK) {
                //               InputStream in = request.getInputStream();
                //               ByteArrayOutputStream out = new ByteArrayOutputStream();
                //               // Transfer bytes from in to out
                //               byte[] buf = new byte[1024];
                //               int len;
                //               while ((len = in.read(buf)) > 0) {
                //                  out.write(buf, 0, len);
                //               }
                //               in.close();
                //               out.close();
                //
                //               streams.add(out.toByteArray());
                //            }

                counter++;
                if (counter > ApplicationConstants.BING_MAX_REQUESTS_PER_NOTE) {
                    break;
                }
            }
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured while text to speech conversion.", e);
        }

        return streams;
    }

    /**
     * Split text into segments.
     *
     * @param inputText the input text
     * @param maxLength the max length
     * @return the list
     */
    private List<String> splitTextIntoSegments(String inputText, int maxLength) {
        List<String> tokens = new ArrayList<String>();
        try {
            SentenceTokenizer sentenceTokenizer = new SentenceTokenizer();
            sentenceTokenizer.setText(inputText);
            String sentence = null;
            StringBuilder builder = new StringBuilder();
            while ((sentence = sentenceTokenizer.nextSentence()) != null) {
                if (sentence.length() <= maxLength) {
                    if (builder.length() + sentence.length() > maxLength) {
                        tokens.add(builder.toString());
                        builder = new StringBuilder();
                    }
                    builder.append(sentence);
                } else {
                    tokens.add(sentence.substring(0, maxLength));
                }
            }
            if (builder.length() > 0) {
                tokens.add(builder.toString());
            }
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured tokenizing text.", e);
        }
        return tokens;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#classifyText(java.lang.String,
     *      java.lang.String, java.util.List, java.lang.String,
     *      java.lang.String)
     */
    @Override
    public List<Classification> classifyText(String title, String text, List<String> tags, String classifierName,
            String classifyUserName) {
        List<Classification> classifications = new ArrayList<Classification>();
        try {
            final UClassifyClientFactory factory = UClassifyClientFactory
                    .newInstance(ApplicationConstants.UCLASSIFY_READ_KEY, ApplicationConstants.UCLASSIFY_WRITE_KEY);
            final UClassifyClient client = factory.createUClassifyClient();
            Map<String, com.uclassify.api._1.responseschema.Classification> classificationsMap = client
                    .classify(classifyUserName, classifierName, Arrays.asList(title, stripHtmlTags(text, true)));
            // somehow average the probability.
            for (String resultKey : classificationsMap.keySet()) {
                com.uclassify.api._1.responseschema.Classification classification = classificationsMap
                        .get(resultKey);
                for (com.uclassify.api._1.responseschema.Class clazz : classification.getClazz()) {
                    Classification element = new Classification();
                    element.setCategory(clazz.getClassName());
                    element.setProbability(clazz.getP());
                    classifications.add(element);
                }
            }
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured classifying text.", e);
        }
        return classifications;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.appspot.researchcraft.server.service.AnalysisService#applyTemplate(java.lang.String,
     *      java.lang.String, java.io.Writer)
     */
    @Override
    public void applyTemplate(String templateContent, String inputText, Writer out) {
        try {
            Map<String, Object> parameters = new HashMap<String, Object>();
            parameters.put("content", inputText);
            List<String> sentences = new ArrayList<String>();
            SentenceTokenizer sentenceTokenizer = new SentenceTokenizer();
            sentenceTokenizer.setText(inputText);
            String sentence = null;
            while ((sentence = sentenceTokenizer.nextSentence()) != null) {
                sentences.add(sentence);
            }
            parameters.put("sentences", sentences);
            TemplateManager templateManager = new TemplateManager();
            templateManager.applyTemplate(parameters, new StringReader(templateContent), out);
        } catch (Exception e) {
            logger.log(Level.SEVERE, "An error occured applying template.", e);
        }
    }

    /* (non-Javadoc)
     * @see com.appspot.socialinquirer.server.service.AnalysisService#analyzeText(java.lang.String)
     */
    @Override
    public ContentAnalysis analyzeText(String text) {
        // TODO Auto-generated method stub
        return null;
    }

    /**
     * Close ram directory.
     *
     * @param directory the directory
     */
    private void closeRAMDirectory(RAMDirectory directory) {
        if (directory != null) {
            try {
                directory.close();
            } catch (Exception e) {
            }
        }
    }

    /**
     * Close index searcher.
     *
     * @param searcher the searcher
     */
    private void closeIndexSearcher(IndexSearcher searcher) {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (Exception e) {
            }
        }
    }

    /**
     * Close index reader.
     *
     * @param reader the reader
     */
    private void closeIndexReader(IndexReader reader) {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception e) {
            }
        }
    }

    /**
     * Creates the english analyzer.
     *
     * @return the analyzer
     */
    private static Analyzer createEnglishAnalyzer() {
        return new StandardAnalyzer(Version.LUCENE_CURRENT);
        // return new StandardAnalyzer(Version.LUCENE_CURRENT) {
        // public TokenStream tokenStream(String fieldName, Reader reader) {
        // TokenStream result = super.tokenStream(fieldName, reader);
        // result = new SnowballFilter(result, "English");
        // return result;
        // }
        // };
    }

    // private static Analyzer createHtmlAnalyzer() {
    // return new CustomAnalyzer() {
    // public TokenStream tokenStream(String fieldName, Reader reader) {
    // TokenStream result = super.tokenStream(fieldName, new
    // HTMLStripReader(reader));
    // return result;
    // }
    // };
    // }

    /**
     * Strip html tags.
     *
     * @param text the text
     * @param shrink the shrink
     * @return the string
     */
    protected String stripHtmlTags(String text, boolean shrink) {
        StringWriter writer = new StringWriter();
        try {
            if (!shrink) {
                text = text.replaceAll("<br/>", "\n");
            }
            Reader reader = new HTMLStripReader(new StringReader(text));
            copyWriters(reader, writer);
        } catch (Exception e) {
            logger.log(Level.SEVERE, "Error occurred while stripping html.", e);
            return text;
        }
        return (shrink) ? writer.toString().replaceAll("\\s+", " ") : writer.toString().trim();
    }

    /**
     * Copy writers.
     *
     * @param in the in
     * @param out the out
     * @throws IOException Signals that an I/O exception has occurred.
     */
    private void copyWriters(Reader in, Writer out) throws IOException {
        char[] buf = new char[1024];
        int len;
        while ((len = in.read(buf)) > 0) {
            out.write(buf, 0, len);
        }
    }

    /**
     * Gets the preferred answer.
     *
     * @param content the content
     * @param question the question
     * @param excludedIds the excluded ids
     * @return the preferred answer
     */
    protected Answer getPreferredAnswer(String content, Question question, List<Long> excludedIds) {
        // currently just return the answer with the highest votes
        Answer preferredAnswer = null;
        if (question != null) {
            for (Answer answer : question.getAnswers()) {
                if (!excludedIds.contains(answer.getAnswerId())) {
                    if (answer.getAnswerId() == question.getAcceptedAnswerId()) {
                        return answer;
                    } else {
                        if (preferredAnswer == null || answer.getScore() > preferredAnswer.getScore()) {
                            preferredAnswer = answer;
                        }
                    }
                }
            }
        }
        return preferredAnswer;
    }

    /**
     * Encode url.
     *
     * @param original the original
     * @return the string
     */
    private String encodeUrl(String original) {
        try {
            return URLEncoder.encode(original, "UTF-8");
        } catch (Exception e) {
            logger.log(Level.WARNING, "Error occurred while encoding " + original, e);
            return original;
        }
    }

    /**
     * Gets the content from body.
     *
     * @param body the body
     * @return the content from body
     */
    @SuppressWarnings("unused")
    private String getContentFromBody(Body body) {
        StringBuilder builder = new StringBuilder();
        if (body.getH1() != null) {
            builder.append("<h1>");
            builder.append(body.getH1());
            builder.append("</h1>");
        }
        if (body.getPS() != null) {
            for (P p : body.getPS()) {
                builder.append("<p>");
                for (Serializable content : p.getContent()) {
                    builder.append(content.toString());
                }
                builder.append("</p>");
            }
        }

        return builder.toString();
    }
}