List of usage examples for org.jsoup.select Elements not
public Elements not(String query)
From source file:com.ewcms.plugin.crawler.generate.EwcmsContentCrawler.java
/** * ?page??/* w w w .ja va2 s . c o m*/ */ @Override public void visit(Page page) { try { String url = page.getWebURL().getURL(); page.setContentType("text/html; charset=" + gather.getEncoding()); Document doc = Jsoup.connect(url).timeout(gather.getTimeOutWait().intValue() * 1000).get(); String title = doc.title(); if (gather.getTitleExternal() && gather.getTitleRegex() != null && gather.getTitleRegex().length() > 0) { Elements titleEles = doc.select(gather.getTitleRegex()); if (!titleEles.isEmpty()) { String tempTitle = titleEles.text(); if (tempTitle != null && tempTitle.length() > 0) { title = tempTitle; } } } if (title != null && title.trim().length() > 0) { Elements elements = doc.select(matchRegex); if (filterRegex != null && filterRegex.trim().length() > 0) { elements = elements.not(filterRegex); } if (!elements.isEmpty()) { String subHtml = elements.html(); Document blockDoc = Jsoup.parse(subHtml); String contentText = blockDoc.html(); if (gather.getRemoveHref()) { Document moveDoc = Jsoup.parse(contentText); Elements moveEles = moveDoc.select("*").not("a"); contentText = moveEles.html(); } if (gather.getRemoveHtmlTag()) contentText = doc.text(); if (isLocal) { contentText = doc.text(); Boolean isMatcher = true; for (int i = 0; i < keys.length; i++) { Boolean result = Pattern.compile(keys[i].trim()).matcher(contentText).find(); if (!result) { isMatcher = false; break; } } if (isMatcher) { Storage storage = new Storage(); storage.setGatherId(gather.getId()); storage.setGatherName(gather.getName()); storage.setTitle(title); storage.setUrl(url); try { gatherService.addStorage(storage); } catch (Exception e) { logger.error("save storage error : {}", e.getLocalizedMessage()); } finally { storage = null; } } } else { Content content = new Content(); content.setDetail(contentText); content.setPage(1); List<Content> contents = new ArrayList<Content>(); contents.add(content); Article article = new Article(); article.setTitle(title); article.setContents(contents); articleMainService.addArticleMainByCrawler(article, gather.getChannelId(), CrawlerUtil.USER_NAME); } } } } catch (IOException e) { logger.warn(e.getLocalizedMessage()); } }