Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package uniko.west.topology.bolts; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import org.joda.time.DateTime; import org.joda.time.format.DateTimeFormat; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import com.fasterxml.jackson.databind.ser.std.ToStringSerializer; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; /** * * @author nico */ public class DiscussionTreeBolt extends BaseRichBolt { private OutputCollector collector; private String strExampleEmitFieldsId; private DateTime deadline; private DateTime bufferStartTime = null; private int intervalInMinutes = 10; private Map<String, Tweet> rootTweetsMap = new LinkedHashMap<>(); private Map<String, Tweet> childrenTweetsMap = new HashMap<>(); public DiscussionTreeBolt(String strExampleEmitFieldsId) { super(); this.strExampleEmitFieldsId = strExampleEmitFieldsId; } /** * Prepare method is similar the "Open" method for Spouts and is called when * a worker is about to be put to work. This method also initialise the main * example Storm Java bolt. * * @param stormConf * map of the storm configuration (passed within Storm topology * itself, not be a user) * @param context * context (e.g. similar to description) of the topology (passed * within Storm topology itself, not be a user) * @param collector * output collector of the Storm (which is responsible to emiting * new tuples, passed within Storm topology itself, not be a * user) */ @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; } /** * searches for locations in the message and computes related locations * * @param input * standard Storm tuple input object (passed within Storm * topology itself, not be a user) */ @Override public void execute(Tuple input) { // Retrieve hash map tuple object from Tuple input at index 0, index 1 // will be message delivery tag (not used here) Map<Object, Object> inputMap = (HashMap<Object, Object>) input.getValue(0); // Get JSON object from the HashMap from the Collections.singletonList Map<Object, Object> message = (Map<Object, Object>) inputMap.get("message"); // Acknowledge the collector that we actually received the input this.collector.ack(input); if (!message.containsKey("created_at")) { return; // skip delete messages } // Print received message // this.logger.info("Received message: " + message.toJSONString()); String timeStamp = (String) message.get("created_at"); DateTime timestamp = DateTime.parse(timeStamp, DateTimeFormat.forPattern("EEE MMM dd HH:mm:ss Z yyyy").withLocale(Locale.US)); if (this.bufferStartTime == null) { this.bufferStartTime = timestamp; this.deadline = this.bufferStartTime.plusMinutes(this.intervalInMinutes); } String authorId = (String) ((Map<Object, Object>) message.get("user")).get("id_str"); String authorScreenName = (String) ((Map<Object, Object>) message.get("user")).get("screen_name"); String text = (String) message.get("text"); String tweetId = (String) message.get("id_str"); boolean retweet = false; String ancestorTweetId = (String) message.get("in_reply_to_status_id_str"); String ancestorAuthorId = (String) message.get("in_reply_to_user_id_str"); String ancestorAutorScreenName = (String) message.get("in_reply_to_screen_name"); Map<Object, Object> retweeted_status = (Map<Object, Object>) message.get("retweeted_status"); if (retweeted_status != null) { retweet = true; ancestorTweetId = (String) ((Map<Object, Object>) message.get("retweeted_status")).get("id_str"); } Tweet tweet = new Tweet(authorId, authorScreenName, tweetId, timestamp, text, ancestorTweetId, true, retweet); if (ancestorTweetId != null) { if (this.rootTweetsMap.containsKey(tweet.getIn_reply_to())) { this.rootTweetsMap.get(tweet.getIn_reply_to()).getReplies().add(tweet); } else if (this.childrenTweetsMap.containsKey(tweet.getIn_reply_to())) { this.childrenTweetsMap.get(tweet.getIn_reply_to()).getReplies().add(tweet); } else { // tweet is a reply or retweet but its ancestor was'nt observed // by this bolt, therefore its ancestor is treated as a dummy // entry Tweet dummyTweet = new Tweet(ancestorAuthorId, ancestorAutorScreenName, ancestorTweetId, null, null, null, false, false); dummyTweet.getReplies().add(tweet); this.rootTweetsMap.put(ancestorTweetId, dummyTweet); } this.childrenTweetsMap.put(tweetId, tweet); } else { // tweet is no reply or retweet this.rootTweetsMap.put(tweetId, tweet); } if (timestamp.isAfter(this.deadline) || timestamp.isEqual(this.deadline)) { try { ObjectMapper mapper = new ObjectMapper(); String jsonResultString; HashMap<String, Object> jsonResult = new HashMap<>(); jsonResult.put("start", this.bufferStartTime.toString()); jsonResult.put("end", timestamp.toString()); jsonResult.put("result", this.rootTweetsMap.values()); jsonResultString = mapper.writeValueAsString(jsonResult); Logger.getLogger(DiscussionTreeBolt.class.getName()).log(Level.INFO, "Deadline expired, Buffer size : " + this.rootTweetsMap.size()); this.collector.emit(new Values(jsonResultString)); mapper.enable(SerializationFeature.INDENT_OUTPUT); mapper.writeValue( new File("/home/martin/test/discussionTreeBolt/discussionTree-" + this.bufferStartTime), jsonResult); this.bufferStartTime = null; this.rootTweetsMap = new LinkedHashMap<>(); this.childrenTweetsMap = new HashMap<>(); } catch (JsonProcessingException ex) { Logger.getLogger(DiscussionTreeBolt.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(DiscussionTreeBolt.class.getName()).log(Level.SEVERE, null, ex); } } } /** * Declare output field name (in this case simple a string value that is * defined in the constructor call) * * @param declarer * standard Storm output fields declarer */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields(this.strExampleEmitFieldsId)); } public class Tweet { private String author_id; private String author_screen_name; private String tweet_id; @JsonSerialize(using = ToStringSerializer.class) private DateTime timestamp; private String text; private String in_reply_to; private boolean observed; private boolean retweet; private List<Tweet> replies = new ArrayList<>(); public Tweet(String authorId, String authorScreenName, String tweetId, DateTime timestamp, String text, String inReplyTo, boolean observed, boolean retweet) { this.author_id = authorId; this.author_screen_name = authorScreenName; this.tweet_id = tweetId; this.timestamp = timestamp; this.text = text; this.in_reply_to = inReplyTo; this.observed = observed; this.retweet = retweet; } /** * @return the author_id */ public String getAuthor_id() { return this.author_id; } /** * @param author_id * the author_id to set */ public void setAuthor_id(String author_id) { this.author_id = author_id; } /** * @return the author_screen_name */ public String getAuthor_screen_name() { return this.author_screen_name; } /** * @param author_screen_name * the author_screen_name to set */ public void setAuthor_screen_name(String author_screen_name) { this.author_screen_name = author_screen_name; } /** * @return the tweet_id */ public String getTweet_id() { return this.tweet_id; } /** * @param tweet_id * the tweet_id to set */ public void setTweet_id(String tweet_id) { this.tweet_id = tweet_id; } /** * @return the timestamp */ public DateTime getTimestamp() { return this.timestamp; } /** * @param timestamp * the timestamp to set */ public void setTimestamp(DateTime timestamp) { this.timestamp = timestamp; } /** * @return the text */ public String getText() { return this.text; } /** * @param text * the text to set */ public void setText(String text) { this.text = text; } /** * @return the in_reply_to */ public String getIn_reply_to() { return this.in_reply_to; } /** * @param in_reply_to * the in_reply_to to set */ public void setIn_reply_to(String in_reply_to) { this.in_reply_to = in_reply_to; } /** * @return the observed */ public boolean isObserved() { return this.observed; } /** * @param observed * the observed to set */ public void setObserved(boolean observed) { this.observed = observed; } /** * @return the retweet */ public boolean isRetweet() { return this.retweet; } /** * @param retweet * the retweet to set */ public void setRetweet(boolean retweet) { this.retweet = retweet; } /** * @return the replies */ public List<Tweet> getReplies() { return this.replies; } /** * @param replies * the replies to set */ public void setReplies(List<Tweet> replies) { this.replies = replies; } @Override public String toString() { try { ObjectMapper mapper = new ObjectMapper(); return mapper.writeValueAsString(this); } catch (JsonProcessingException ex) { Logger.getLogger(DiscussionTreeBolt.class.getName()).log(Level.SEVERE, null, ex); } return null; } } }