List of usage examples for org.apache.commons.feedparser.network ResourceRequest setRequestHeaderField
public void setRequestHeaderField(String name, String value);
From source file:org.scify.NewSumServer.Server.Sources.RssParser.java
/** * Processes the feeds from the given URL string and adds them to a List * containing an {@link Article} for each item found. * * @param urlString the URL string to parse * @param sCategory The category that the specified URL is about * @throws NetworkException/*from www. j av a 2s. c o m*/ * @throws IOException */ public void ProcessFeed(final String urlString, final String sCategory) throws NetworkException, IOException { //create a listener for handling our callbacks FeedParserListener listener; listener = new DefaultFeedParserListener() { @Override public void onItem(FeedParserState state, String title, String link, String description, String permalink) throws FeedParserException { // Use first 30 characters for title... if ((title == null) || (title.trim().length() == 0)) { title = description.substring(0, 30) + "..."; } // TODO for later version // check if category is "" || "Top News" and if such, create // new UnlabeledArticle so that it gets category from the // classification Module. if (sCategory.equals(UNCLASSIFIED)) { // Initiate an Unlabeled Article (null Category) with boolean // toWrap = false, so that // it is not accessed by the classification trainer UnlabeledArticle tmpUnArt = new UnlabeledArticle(permalink, title.trim(), description, null, urlString, false); //filter Article text tmpUnArt = (UnlabeledArticle) preProcessArticle(tmpUnArt, 9); // Add the Article found to the list, avoid possible duplicates if (tmpUnArt != null) { Utilities.addItemToList(lsItems, tmpUnArt); } // Otherwise procceed normally with provided category } else { // Initiate a new article with toWrap = true, // so that it feeds the classification trainer Article tmpArt = new Article(permalink, title.trim(), description, sCategory, urlString, true); //filter article text tmpArt = preProcessArticle(tmpArt, 10); // Add the Article found to the list, avoid possible duplicates if (tmpArt != null) { Utilities.addItemToList(lsItems, tmpArt); } } } @Override public void onCreated(FeedParserState state, Date date) throws FeedParserException { if (!lsItems.isEmpty()) { //Adding date to current Article -- Some feeds don't provide date Article tmpArt = lsItems.get(lsItems.size() - 1); tmpArt.setDate(date); } } }; // debug // System.out.println("Fetching resource: " + urlString); // debug //use the FeedParser network IO package to fetch our resource URL ResourceRequest request = ResourceRequestFactory.getResourceRequest(urlString); request.setRequestHeaderField("User-Agent", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0"); FeedParser parser = null; try { // Grab input stream InputStream is = request.getInputStream(); parser = FeedParserFactory.newFeedParser(); parser.parse(listener, is, urlString); } catch (FeedParserException ex) { LOGGER.log(Level.WARNING, ex.getMessage(), ex); } catch (Exception ex) { LOGGER.log(Level.WARNING, ex.getMessage()); } }