Java tutorial
/* * Copyright 2012 Nabeel Mukhtar * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package com.appspot.socialinquirer.server.service.impl; import iweb2.ch3.collaborative.data.BaseDataset; import iweb2.ch3.collaborative.data.ContentItem; import iweb2.ch3.collaborative.model.Content; import iweb2.ch3.collaborative.model.Dataset; import iweb2.ch3.collaborative.model.Item; import iweb2.ch3.collaborative.model.SimilarItem; import iweb2.ch3.collaborative.model.SimilarUser; import iweb2.ch3.collaborative.recommender.Delphi; import iweb2.ch3.collaborative.similarity.RecommendationType; import java.io.File; import java.io.IOException; import java.io.Reader; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.logging.Level; import javax.script.Bindings; import javax.script.ScriptContext; import javax.script.ScriptEngine; import javax.script.ScriptEngineManager; import net.sf.jtmt.summarizers.LuceneSummarizer; import net.sf.jtmt.summarizers.SummaryAnalyzer; import net.sf.jtmt.tokenizers.SentenceTokenizer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.apache.solr.analysis.HTMLStripReader; import org.apache.tools.ant.Project; import org.w3._1999.xhtml.Body; import org.w3._1999.xhtml.P; import com.appspot.socialinquirer.server.constant.ApplicationConstants; import com.appspot.socialinquirer.server.service.AnalysisService; import com.appspot.socialinquirer.server.service.template.TemplateManager; import com.appspot.socialinquirer.shared.ProgrammingLanguage; import com.appspot.socialinquirer.shared.TextLanguage; import com.appspot.socialinquirer.shared.dto.Classification; import com.appspot.socialinquirer.shared.dto.Code; import com.appspot.socialinquirer.shared.dto.ContentAnalysis; import com.appspot.socialinquirer.shared.dto.Paper; import com.appspot.socialinquirer.shared.dto.Tag; import com.appspot.socialinquirer.shared.dto.User; import com.google.code.bing.search.client.BingSearchClient; import com.google.code.bing.search.client.BingSearchClient.SearchRequestBuilder; import com.google.code.bing.search.client.BingSearchServiceClientFactory; import com.google.code.bing.search.schema.SearchResponse; import com.google.code.bing.search.schema.SourceType; import com.google.code.bing.search.schema.translation.TranslationResult; import com.google.code.stackexchange.schema.Answer; import com.google.code.stackexchange.schema.Question; import com.google.code.uclassify.client.UClassifyClient; import com.google.code.uclassify.client.UClassifyClientFactory; import com.googleapis.ajax.schema.BlogResult; import com.googleapis.ajax.services.BlogSearchQuery; import com.googleapis.ajax.services.GoogleSearchQueryFactory; import com.memetix.mst.language.SpokenDialect; import com.memetix.mst.speak.Speak; import com.microsoft.research.Author; import com.microsoft.research.Publication; import com.microsoft.research.query.AcademicSearchQueryFactory; import com.microsoft.research.query.PublicationSearchQuery; import com.swabunga.spell.engine.SpellDictionary; import com.swabunga.spell.engine.SpellDictionaryHashMap; import com.swabunga.spell.engine.Word; import com.swabunga.spell.event.SpellCheckEvent; import com.swabunga.spell.event.SpellCheckListener; import com.swabunga.spell.event.SpellChecker; import com.swabunga.spell.event.StringWordTokenizer; import de.danielnaber.languagetool.JLanguageTool; import de.danielnaber.languagetool.Language; import de.danielnaber.languagetool.rules.RuleMatch; /** * The Class AnalysisServiceImpl. */ public class AnalysisServiceImpl extends BaseService implements AnalysisService { /** * Instantiates a new analysis service impl. */ public AnalysisServiceImpl() { super("analysis-service"); } /* * (non-Javadoc) * * @see com.appspot.bitlyminous.service.RecommendationService#getUserSimilarities(com.appspot.bitlyminous.entity.User, * java.util.List) */ public List<Map.Entry<User, Double>> getUserSimilarities(User user, List<User> relatedUsers) { Dataset ds = createUserDataSet(user, relatedUsers); Delphi delphi = new Delphi(ds, RecommendationType.USER_CONTENT_BASED); SimilarUser[] similarUsers = delphi.findSimilarUsers(createUserFromUser(user)); Map<User, Double> similaritiesMap = new HashMap<User, Double>(); for (SimilarUser similarUser : similarUsers) { similaritiesMap.put(createUserFromUser(similarUser.getUser()), similarUser.getSimilarity()); } List<Map.Entry<User, Double>> similarities = new ArrayList<Map.Entry<User, Double>>( similaritiesMap.entrySet()); Collections.sort(similarities, new SimilarityComparatorDesc<User>()); return similarities; } /* * (non-Javadoc) * * @see com.appspot.bitlyminous.service.RecommendationService#getTextSimilarities(java.lang.String, * java.util.List) */ public List<Map.Entry<String, Double>> getTextSimilarities(String text, List<String> relatedTexts) { Dataset ds = createTextDataSet(text, relatedTexts); Delphi delphi = new Delphi(ds, RecommendationType.ITEM_CONTENT_BASED); SimilarItem[] similarItems = delphi.findSimilarItems(createItemFromText(text)); Map<String, Double> similaritiesMap = new HashMap<String, Double>(); for (SimilarItem similarItem : similarItems) { similaritiesMap.put(createTextFromItem(similarItem.getItem()), similarItem.getSimilarity()); } List<Map.Entry<String, Double>> similarities = new ArrayList<Map.Entry<String, Double>>( similaritiesMap.entrySet()); Collections.sort(similarities, new SimilarityComparatorDesc<String>()); return similarities; } /** * Creates the text data set. * * @param refText the ref text * @param relatedTexts the related texts * @return the dataset */ protected Dataset createTextDataSet(String refText, List<String> relatedTexts) { BaseDataset dataset = new BaseDataset(); dataset.addItem(createItemFromText(refText)); for (String text : relatedTexts) { dataset.addItem(createItemFromText(text)); } return dataset; } /** * Creates the project data set. * * @param refProject the ref project * @param projects the projects * @return the dataset */ protected Dataset createProjectDataSet(Project refProject, List<Project> projects) { BaseDataset dataset = new BaseDataset(); dataset.addItem(createItemFromProject(refProject)); for (Project url : projects) { dataset.addItem(createItemFromProject(url)); } return dataset; } /** * Creates the user data set. * * @param refUser the ref user * @param users the users * @return the dataset */ protected Dataset createUserDataSet(User refUser, List<User> users) { BaseDataset dataset = new BaseDataset(); dataset.add(createUserFromUser(refUser)); for (User user : users) { dataset.add(createUserFromUser(user)); } return dataset; } /** * Creates the item from project. * * @param project the project * @return the item */ protected Item createItemFromProject(Project project) { // return new ContentItem(url.getUrl().hashCode(), url.getUrl(), new // Content("url-" + url.getUrl().hashCode(), getUrlContent(url))); return null; } /** * Creates the project from item. * * @param item the item * @return the project */ protected Project createProjectFromItem(Item item) { // ContentItem contentItem = (ContentItem) item; // Url url = new Url(); // url.setDescription(item.getItemContent().getText().replaceAll("\\[.*\\]", // "")); // url.setUrl(contentItem.getName()); // url.setTags(Arrays.asList(contentItem.getItemContent().getTerms())); // return url; return null; } /** * Creates the item from text. * * @param text the text * @return the item */ protected Item createItemFromText(String text) { return new ContentItem(text.hashCode(), "item-" + text.hashCode(), new Content("text-" + text.hashCode(), text)); } /** * Creates the text from item. * * @param item the item * @return the string */ protected String createTextFromItem(Item item) { return ((ContentItem) item).getItemContent().getText(); } /** * Creates the user from user. * * @param user the user * @return the iweb2.ch3.collaborative.model. user */ protected iweb2.ch3.collaborative.model.User createUserFromUser(User user) { iweb2.ch3.collaborative.model.User model = new iweb2.ch3.collaborative.model.User(user.getKey().hashCode(), user.getStackExchangeId()); // if (user.getTags() != null) { // for (Tag tag : user.getTags().getTag()) { // model.addRating(new Rating(user.getId().hashCode(), // tag.getTag().hashCode(), tag.getCount().intValue())); // } // } return model; } /** * Creates the user from user. * * @param user the user * @return the user */ protected User createUserFromUser(iweb2.ch3.collaborative.model.User user) { User entity = new User(); entity.setStackExchangeId(user.getName()); return entity; } /** * The main method. * * @param args the arguments */ // protected String getUrlContent(Url url) { // StringBuilder builder = new StringBuilder(); // builder.append(url.getDescription()); // builder.append("["); // for (String tag : url.getTags()) { // builder.append(tag); // builder.append(" "); // } // builder.append("]"); // return builder.toString(); // } /** * The main method. * * @param args * the arguments */ public static void main(String[] args) { // AnalysisServiceImpl service = new AnalysisServiceImpl(); // String[] relatedTexts = {"An introduction to Hadoop - ThinkPHP // /dev/blog", "Welcome to Apache Hadoop!", "Apache Mahout:: Scalable // machine-learning and data-mining library", "Distributed data // processing with Hadoop, Part 3: Application development", // "Programming Hadoop with Clojure", "Scaling Big Time with Hadoop", // "Getting Started on Hadoop"}; // List<Entry<String, Double>> textSimilarities = // service.getTextSimilarities("Getting Started on Hadoop", // Arrays.asList(relatedTexts)); // System.out.println(textSimilarities); // System.out.println(service.summarizeText("The Apache Hadoop software // library is a framework that allows for the distributed processing of // large data sets across clusters of computers using a simple // programming model. It is designed to scale up from single servers to // thousands of machines, each offering local computation and storage. // Rather than rely on hardware to deliver high-avaiability, the library // itself is designed to detect and handle failures at the application // layer, so delivering a highly-availabile service on top of a cluster // of computers, each of which may be prone to failures.", 2)); // System.out.println(service.stripHtmlTags("package // com.oreilly.htdg.ch12.java;<br/><br/>import // java.io.IOException;<br/>", false)); // DeliciousGateway delicious = // GatewayFactory.newInstance().createDeliciousGateway(ApplicationConstants.DELICIOUS_CONSUMER_KEY, // ApplicationConstants.DELICIOUS_CONSUMER_SECRET, // ApplicationConstants.DELICIOUS_ACCESS_TOKEN, // ApplicationConstants.DELICIOUS_ACCESS_TOKEN_SECRET); // List<Url> popularUrls = delicious.getPopularUrls("hadoop"); // System.out.println(popularUrls.get(0)); // List<Entry<Url, Double>> urlSimilarities = // service.getUrlSimilarities(popularUrls.get(0), popularUrls); // System.out.println(urlSimilarities); // com.appspot.socialinquirer.shared.dto.Question showAnswer = service // .showAnswer( // "I am making a poster about getting rid of junk cars", // "I want it to say something along the lines of \"Make America Beautiful. Get rid of Junk cars Special\"", // Arrays.asList("cars")); // System.out.println(showAnswer.getTitle() + ":" // + showAnswer.getContent()); } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#evaluate(java.lang.String, * java.lang.String) */ @Override public String evaluate(String code, ProgrammingLanguage language) { StringBuilder builder = new StringBuilder(); try { // TODO-NM; Change this to use ideone. ScriptEngineManager mgr = new ScriptEngineManager(); ScriptEngine engine = mgr.getEngineByName(language.value()); Object obj = engine.eval(stripHtmlTags(code, false)); if (obj != null) { builder.append(obj); builder.append("\n"); } Bindings bindings = engine.getBindings(ScriptContext.ENGINE_SCOPE); for (Entry<String, Object> entry : bindings.entrySet()) { if (isValidKey(entry.getKey()) && isValidValue(entry.getValue())) { builder.append(entry.getValue().getClass().getSimpleName()); builder.append(" "); builder.append(entry.getKey()); builder.append(" = "); builder.append(entry.getValue()); builder.append(";\n"); } } } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while evaluating code.", e); } return builder.toString(); } /** * Checks if is valid key. * * @param key the key * @return true, if is valid key */ private boolean isValidKey(String key) { if (key.startsWith("javax.script")) { return false; } else if (key.equals("org_beanshell_engine_namespace")) { return false; } else if (key.equals("bsh")) { return false; } return true; } /** * Checks if is valid value. * * @param value the value * @return true, if is valid value */ private boolean isValidValue(Object value) { return true; } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#getTermVector(java.lang.String) */ @Override public List<Tag> getTermVector(String title, String text) { RAMDirectory directory = null; IndexReader reader = null; Map<String, Tag> tagsMap = new HashMap<String, Tag>(); try { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.add(new Field("body", stripHtmlTags(text, true), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); writer.addDocument(doc); writer.close(); reader = IndexReader.open(directory, true); int numDocs = reader.maxDoc(); for (int i = 0; i < numDocs; i++) { TermFreqVector termFreqVector = reader.getTermFreqVector(i, "title"); pullTags(termFreqVector, tagsMap); termFreqVector = reader.getTermFreqVector(i, "body"); pullTags(termFreqVector, tagsMap); } } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while pulling tags from text.", e); } finally { closeIndexReader(reader); closeRAMDirectory(directory); } ArrayList<Tag> tagsList = new ArrayList<Tag>(tagsMap.values()); Collections.sort(tagsList, new Comparator<Tag>() { @Override public int compare(Tag o1, Tag o2) { return o2.getFreqency() - o1.getFreqency(); } }); return tagsList; } /** * Pull tags. * * @param termFreqVector the term freq vector * @param tagsMap the tags map */ private void pullTags(TermFreqVector termFreqVector, Map<String, Tag> tagsMap) { if (termFreqVector != null) { String[] terms = termFreqVector.getTerms(); int[] termFrequencies = termFreqVector.getTermFrequencies(); for (int j = 0; j < termFrequencies.length; j++) { Tag tag = tagsMap.get(terms[j]); if (tag == null) { tag = new Tag(terms[j], termFrequencies[j]); tagsMap.put(terms[j], tag); } else { tag.setFreqency(tag.getFreqency() + termFrequencies[j]); } } } } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#recommendText(java.lang.String, * java.util.List) */ @Override public String recommendText(String title, String text, List<String> tags) { // TODO Auto-generated method stub return null; } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#showRelatedBlogs(java.lang.String, * java.lang.String, java.util.List) */ @Override public List<com.appspot.socialinquirer.shared.dto.Answer> showRelatedBlogs(String title, String text, List<String> tags) { List<com.appspot.socialinquirer.shared.dto.Answer> blogs = new ArrayList<com.appspot.socialinquirer.shared.dto.Answer>(); try { GoogleSearchQueryFactory GOOGLE_API_FACTORY = GoogleSearchQueryFactory .newInstance(ApplicationConstants.GOOGLE_API_KEY); BlogSearchQuery blogQuery = GOOGLE_API_FACTORY.newBlogSearchQuery(); blogQuery.setReferrer(ApplicationConstants.GOOGLE_API_REFERER); List<BlogResult> blogResponse = blogQuery.withQuery(title).list(); for (BlogResult blogResult : blogResponse) { com.appspot.socialinquirer.shared.dto.Answer blog = new com.appspot.socialinquirer.shared.dto.Answer(); blog.setTitle(blogResult.getTitle()); blog.setAuthor(blogResult.getAuthor()); blog.setContent(blogResult.getContent()); blog.setPublishedDate(blogResult.getPublishedDate()); blog.setUrl(blogResult.getPostUrl()); blogs.add(blog); } } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while checking related blogs.", e); } return blogs; } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#showRelatedCode(java.lang.String, * java.lang.String, java.util.List) */ @Override public List<Code> showRelatedCode(String title, String text, List<String> tags) { // CodeSearchService codesearchService = new CodeSearchService( // "ever-scribe-codesearch"); List<Code> codes = new ArrayList<Code>(); // try { // CodeSearchFeed searchFeed = codesearchService.getFeed(new URL( // MessageFormat.format( // ApplicationConstants.GOOGLE_CODE_SEARCH_API_URL, // encodeUrl(title + " lang:java"), 1, 10)), // CodeSearchFeed.class); // for (CodeSearchEntry entry : searchFeed.getEntries()) { // Code code = new Code(); // code.setTitle(entry.getTitle().getPlainText()); // code.setUrl(entry.getHtmlLink().getHref()); // if (entry.getAuthors() != null) { // for (Person author : entry.getAuthors()) { // code.setAuthor(author.getName()); // } // } // StringBuilder content = new StringBuilder(); // for (Match m : entry.getMatches()) { // content.append(m.getLineText().getHtml()); // } // code.setContent(content.toString()); // codes.add(code); // } // } catch (Exception e) { // logger.log(Level.SEVERE, // "An error occured while checking related code.", e); // } return codes; } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#showRelatedPapers(java.lang.String, * java.lang.String, java.util.List) */ @Override public List<Paper> showRelatedPapers(String title, String text, List<String> tags) { List<Paper> papers = new ArrayList<Paper>(); try { // SpringerApiClient client = new // SpringerApiXppClient(ApplicationConstants.SPRINGER_METADATA_KEY); // Response metadata = client.metadata(title); // for (Message message : metadata.getRecords().getMessages()) { // Paper paper = new Paper(); // if (message.getHead() != null && message.getHead().getArticle() // != null) { // Article article = message.getHead().getArticle(); // paper.setCopyright(article.getCopyright()); // paper.setCreators(article.getCreators()); // paper.setDoi(article.getDoi()); // paper.setIdentifier(article.getIdentifier()); // paper.setIsbn(article.getIsbn());; // paper.setIssn(article.getIssn()); // paper.setNumber(article.getNumber()); // paper.setPublicationDate(article.getPublicationDate()); // paper.setPublicationName(article.getPublicationName()); // paper.setPublisher(article.getPublisher()); // paper.setTitle(article.getTitle()); // paper.setUrl(article.getUrl()); // paper.setVolume(article.getVolume()); // } // if (message.getBody() != null) { // paper.setContent(getContentFromBody(message.getBody())); // } // // papers.add(paper); // } PublicationSearchQuery query = AcademicSearchQueryFactory .newInstance(ApplicationConstants.MICROSOFT_APP_ID).newPublicationSearchQuery(); List<Publication> publications = query.withFullTextQuery(title).withStartIndex(1).withEndIndex(5) .list(); for (Publication publication : publications) { Paper paper = new Paper(); paper.setContent(publication.getAbstract()); paper.setDoi(publication.getDOI()); paper.setIdentifier(String.valueOf(publication.getID())); paper.setTitle(publication.getTitle()); for (Author author : publication.getAuthor()) { paper.getCreators().add(author.getFirstName() + " " + author.getLastName()); } paper.setCopyright(paper.getCreators().toString()); if (!publication.getFullVersionURL().isEmpty()) { paper.setUrl(publication.getFullVersionURL().get(0)); } papers.add(paper); } } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while searching for papers.", e); } return papers; } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#spellCheck(java.lang.String, * java.lang.String) */ @Override public String spellCheck(String text, String language) { final StringBuilder errors = new StringBuilder(); try { SpellDictionary dictionary = new SpellDictionaryHashMap( new File(getClass().getResource("/resources/jazzy/eng_com.dic").getFile())); SpellChecker spellCheck = new SpellChecker(dictionary); // spellCheck.getConfiguration().setBoolean(Configuration.SPELL_IGNOREDIGITWORDS, Boolean.TRUE); // spellCheck.getConfiguration().setBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES, Boolean.TRUE); // spellCheck.getConfiguration().setBoolean(Configuration.SPELL_IGNOREMIXEDCASE, Boolean.TRUE); // spellCheck.getConfiguration().setBoolean(Configuration.SPELL_IGNOREUPPERCASE, Boolean.TRUE); // spellCheck.getConfiguration().setBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION, Boolean.FALSE); spellCheck.addSpellCheckListener(new SpellCheckListener() { @SuppressWarnings("unchecked") @Override public void spellingError(SpellCheckEvent event) { List<Word> suggestions = event.getSuggestions(); if (event.getInvalidWord() != null && event.getInvalidWord().length() > 1) { if (suggestions.size() > 0) { errors.append("Misspelt Word: " + event.getInvalidWord()); for (Iterator<Word> suggestedWord = suggestions.iterator(); suggestedWord.hasNext();) { errors.append(" Suggested Word: " + suggestedWord.next()); } } else { errors.append("Misspelt Word: " + event.getInvalidWord()); errors.append(" No suggestions"); } errors.append("<br/>"); } } }); spellCheck.checkSpelling(new StringWordTokenizer(stripHtmlTags(text, true))); } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while checking spelling.", e); } return errors.toString(); } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#grammarCheck(java.lang.String, * java.lang.String) */ @Override public String grammarCheck(String text, String language) { final StringBuilder errors = new StringBuilder(); try { JLanguageTool langTool = new JLanguageTool(Language.getLanguageForShortName(language)); langTool.activateDefaultPatternRules(); List<RuleMatch> matches = langTool.check(stripHtmlTags(text, true)); for (RuleMatch match : matches) { errors.append("Potential error at line " + match.getEndLine() + ", column " + match.getColumn() + ": " + stripHtmlTags(match.getMessage(), true) + "<br/>"); errors.append(" Suggested correction: " + match.getSuggestedReplacements()); errors.append("<br/>"); } } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while checking grammar", e); } return errors.toString(); } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#summarizeText(java.lang.String) */ @Override public String summarizeText(String text, int numSentences) { try { LuceneSummarizer summarizer = new LuceneSummarizer(); summarizer.setAnalyzer(new SummaryAnalyzer()); summarizer.setNumSentences(numSentences); summarizer.setTopTermCutoff(0.5F); summarizer.setSentenceDeboost(0.2F); summarizer.init(); String summary = summarizer.summarize(stripHtmlTags(text, true)); return summary; } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while summarizing text.", e); } return text; } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#syntaxHighlight(java.lang.String, * java.lang.String) */ @Override public String syntaxHighlight(String title, String text, ProgrammingLanguage language) { try { // Renderer renderer = new EvernoteRenderer(XhtmlRendererFactory // .getRenderer(language.value())); // // return renderer.highlight(title, stripHtmlTags(text, false), // ApplicationConstants.CONTENT_ENCODING, false); } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while highlighting code.", e); } return text; } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#translateText(java.lang.String, * java.lang.String) */ @Override public String translateText(String text, TextLanguage language) { try { BingSearchServiceClientFactory factory = BingSearchServiceClientFactory.newInstance(); BingSearchClient client = factory.createBingSearchClient(); SearchRequestBuilder builder = client.newSearchRequestBuilder(); builder.withAppId(ApplicationConstants.BING_CONSUMER_KEY); builder.withQuery(text); builder.withSourceType(SourceType.TRANSLATION); builder.withTranslationRequestSourceLanguage("en"); builder.withTranslationRequestTargetLanguage(language.value()); builder.withVersion("2.2"); SearchResponse response = client.search(builder.getResult()); StringBuilder result = new StringBuilder(); for (TranslationResult translation : response.getTranslation().getResults()) { result.append(translation.getTranslatedTerm()); } return result.toString(); } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while translating text.", e); return text; } } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#textToSpeech(java.lang.String, * com.appspot.researchcraft.shared.TextLanguage) */ public List<String> textToSpeech(String text, TextLanguage language) { List<String> streams = new ArrayList<String>(); try { Speak.setKey(ApplicationConstants.BING_CONSUMER_KEY); int counter = 0; for (String splitText : splitTextIntoSegments(stripHtmlTags(text, true), ApplicationConstants.BING_MAX_TEXT_LENGTH)) { String execute = Speak.execute(splitText, SpokenDialect.fromString(language.dialect())); streams.add(execute); // URL url = new URL(execute); // HttpURLConnection request = (HttpURLConnection) url // .openConnection(); // // if (ApplicationConstants.CONNECT_TIMEOUT > -1) { // request // .setConnectTimeout(ApplicationConstants.CONNECT_TIMEOUT); // } // // if (ApplicationConstants.READ_TIMEOUT > -1) { // request.setReadTimeout(ApplicationConstants.READ_TIMEOUT); // } // request.connect(); // // if (request.getResponseCode() == HttpURLConnection.HTTP_OK) { // InputStream in = request.getInputStream(); // ByteArrayOutputStream out = new ByteArrayOutputStream(); // // Transfer bytes from in to out // byte[] buf = new byte[1024]; // int len; // while ((len = in.read(buf)) > 0) { // out.write(buf, 0, len); // } // in.close(); // out.close(); // // streams.add(out.toByteArray()); // } counter++; if (counter > ApplicationConstants.BING_MAX_REQUESTS_PER_NOTE) { break; } } } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while text to speech conversion.", e); } return streams; } /** * Split text into segments. * * @param inputText the input text * @param maxLength the max length * @return the list */ private List<String> splitTextIntoSegments(String inputText, int maxLength) { List<String> tokens = new ArrayList<String>(); try { SentenceTokenizer sentenceTokenizer = new SentenceTokenizer(); sentenceTokenizer.setText(inputText); String sentence = null; StringBuilder builder = new StringBuilder(); while ((sentence = sentenceTokenizer.nextSentence()) != null) { if (sentence.length() <= maxLength) { if (builder.length() + sentence.length() > maxLength) { tokens.add(builder.toString()); builder = new StringBuilder(); } builder.append(sentence); } else { tokens.add(sentence.substring(0, maxLength)); } } if (builder.length() > 0) { tokens.add(builder.toString()); } } catch (Exception e) { logger.log(Level.SEVERE, "An error occured tokenizing text.", e); } return tokens; } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#classifyText(java.lang.String, * java.lang.String, java.util.List, java.lang.String, * java.lang.String) */ @Override public List<Classification> classifyText(String title, String text, List<String> tags, String classifierName, String classifyUserName) { List<Classification> classifications = new ArrayList<Classification>(); try { final UClassifyClientFactory factory = UClassifyClientFactory .newInstance(ApplicationConstants.UCLASSIFY_READ_KEY, ApplicationConstants.UCLASSIFY_WRITE_KEY); final UClassifyClient client = factory.createUClassifyClient(); Map<String, com.uclassify.api._1.responseschema.Classification> classificationsMap = client .classify(classifyUserName, classifierName, Arrays.asList(title, stripHtmlTags(text, true))); // somehow average the probability. for (String resultKey : classificationsMap.keySet()) { com.uclassify.api._1.responseschema.Classification classification = classificationsMap .get(resultKey); for (com.uclassify.api._1.responseschema.Class clazz : classification.getClazz()) { Classification element = new Classification(); element.setCategory(clazz.getClassName()); element.setProbability(clazz.getP()); classifications.add(element); } } } catch (Exception e) { logger.log(Level.SEVERE, "An error occured classifying text.", e); } return classifications; } /* * (non-Javadoc) * * @see com.appspot.researchcraft.server.service.AnalysisService#applyTemplate(java.lang.String, * java.lang.String, java.io.Writer) */ @Override public void applyTemplate(String templateContent, String inputText, Writer out) { try { Map<String, Object> parameters = new HashMap<String, Object>(); parameters.put("content", inputText); List<String> sentences = new ArrayList<String>(); SentenceTokenizer sentenceTokenizer = new SentenceTokenizer(); sentenceTokenizer.setText(inputText); String sentence = null; while ((sentence = sentenceTokenizer.nextSentence()) != null) { sentences.add(sentence); } parameters.put("sentences", sentences); TemplateManager templateManager = new TemplateManager(); templateManager.applyTemplate(parameters, new StringReader(templateContent), out); } catch (Exception e) { logger.log(Level.SEVERE, "An error occured applying template.", e); } } /* (non-Javadoc) * @see com.appspot.socialinquirer.server.service.AnalysisService#analyzeText(java.lang.String) */ @Override public ContentAnalysis analyzeText(String text) { // TODO Auto-generated method stub return null; } /** * Close ram directory. * * @param directory the directory */ private void closeRAMDirectory(RAMDirectory directory) { if (directory != null) { try { directory.close(); } catch (Exception e) { } } } /** * Close index searcher. * * @param searcher the searcher */ private void closeIndexSearcher(IndexSearcher searcher) { if (searcher != null) { try { searcher.close(); } catch (Exception e) { } } } /** * Close index reader. * * @param reader the reader */ private void closeIndexReader(IndexReader reader) { if (reader != null) { try { reader.close(); } catch (Exception e) { } } } /** * Creates the english analyzer. * * @return the analyzer */ private static Analyzer createEnglishAnalyzer() { return new StandardAnalyzer(Version.LUCENE_CURRENT); // return new StandardAnalyzer(Version.LUCENE_CURRENT) { // public TokenStream tokenStream(String fieldName, Reader reader) { // TokenStream result = super.tokenStream(fieldName, reader); // result = new SnowballFilter(result, "English"); // return result; // } // }; } // private static Analyzer createHtmlAnalyzer() { // return new CustomAnalyzer() { // public TokenStream tokenStream(String fieldName, Reader reader) { // TokenStream result = super.tokenStream(fieldName, new // HTMLStripReader(reader)); // return result; // } // }; // } /** * Strip html tags. * * @param text the text * @param shrink the shrink * @return the string */ protected String stripHtmlTags(String text, boolean shrink) { StringWriter writer = new StringWriter(); try { if (!shrink) { text = text.replaceAll("<br/>", "\n"); } Reader reader = new HTMLStripReader(new StringReader(text)); copyWriters(reader, writer); } catch (Exception e) { logger.log(Level.SEVERE, "Error occurred while stripping html.", e); return text; } return (shrink) ? writer.toString().replaceAll("\\s+", " ") : writer.toString().trim(); } /** * Copy writers. * * @param in the in * @param out the out * @throws IOException Signals that an I/O exception has occurred. */ private void copyWriters(Reader in, Writer out) throws IOException { char[] buf = new char[1024]; int len; while ((len = in.read(buf)) > 0) { out.write(buf, 0, len); } } /** * Gets the preferred answer. * * @param content the content * @param question the question * @param excludedIds the excluded ids * @return the preferred answer */ protected Answer getPreferredAnswer(String content, Question question, List<Long> excludedIds) { // currently just return the answer with the highest votes Answer preferredAnswer = null; if (question != null) { for (Answer answer : question.getAnswers()) { if (!excludedIds.contains(answer.getAnswerId())) { if (answer.getAnswerId() == question.getAcceptedAnswerId()) { return answer; } else { if (preferredAnswer == null || answer.getScore() > preferredAnswer.getScore()) { preferredAnswer = answer; } } } } } return preferredAnswer; } /** * Encode url. * * @param original the original * @return the string */ private String encodeUrl(String original) { try { return URLEncoder.encode(original, "UTF-8"); } catch (Exception e) { logger.log(Level.WARNING, "Error occurred while encoding " + original, e); return original; } } /** * Gets the content from body. * * @param body the body * @return the content from body */ @SuppressWarnings("unused") private String getContentFromBody(Body body) { StringBuilder builder = new StringBuilder(); if (body.getH1() != null) { builder.append("<h1>"); builder.append(body.getH1()); builder.append("</h1>"); } if (body.getPS() != null) { for (P p : body.getPS()) { builder.append("<p>"); for (Serializable content : p.getContent()) { builder.append(content.toString()); } builder.append("</p>"); } } return builder.toString(); } }