Java tutorial
package model; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jsoup.Jsoup; import org.jsoup.nodes.Comment; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.select.Elements; import org.springframework.stereotype.Component; import dao.Bank; import dao.BankStatus; /** * Class need to work with site of Nation Bank of Ukraine (NBU) - parse info, * and create Banks. * * @author * */ @Component("parseInfoFromSite") public class ParseInfoFromSite { private static Logger logger = LogManager.getLogger(); /** * Parse site of Nation Bank of Ukraine (NBU) * * @return List with all banks from NBU site */ public List<Bank> parseSiteNBU() { logger.info("run"); Map<String, String> map = new HashMap<String, String>(); List<String> list = new ArrayList<String>(); List<Bank> bankList = new ArrayList<Bank>(); for (int i = 0, j = 1; i < 6; i++, j = j + 20) { list = getListOfBanks("http://www.bank.gov.ua/control/bankdict/banks?type=369&sort=name&cPage=" + i + "&startIndx=" + j); for (int k = 0; k < list.size(); k++) { map = getBankInfoMap("http://www.bank.gov.ua/" + list.get(k)); bankList.add(createBank(map)); } } return bankList; } /** * Parse info about a bank from NBU site and return it * * @param URL * on page with information about one bank * @return Map <String,String> with information about bank */ public Map<String, String> getBankInfoMap(String URL) { logger.info("run"); Map<String, String> map = new HashMap<String, String>(); Document doc; try { doc = Jsoup.connect(URL).userAgent("Mozilla").timeout(10 * 1000).get(); List<Comment> comments = findAllComments(doc); for (Comment comment : comments) { String data = comment.getData(); comment.after(data); comment.remove(); } Elements cells = doc.getElementsByClass("cell"); for (int i = 0; i < cells.size(); i = i + 2) { map.put(cells.get(i).text(), cells.get(i + 1).text()); } } catch (IOException e) { e.printStackTrace(); } return map; } /** * Return a List of Strings (part of URLS). * * @param URL * where are 20 links to page with information about banks * @return List<String> with 20 links to page with information about banks */ public List<String> getListOfBanks(String URL) { logger.info("run"); List<String> list = new ArrayList<String>(); Document doc; try { doc = Jsoup.connect(URL).userAgent("Mozilla").timeout(10 * 1000).get(); Elements links = doc.select("a[href]"); for (int i = 0; i < links.size(); i++) { String res = ""; if ((res = links.get(i).attr("href")).startsWith("control/uk/bankdict/bank?id=")) { list.add(res); } } } catch (IOException e) { e.printStackTrace(); } return list; } /** * Create Bank and add information about bank * * @param map * with information about bank * @return Bank */ public Bank createBank(Map<String, String> map) { logger.info("run"); SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("dd.M.yyyy"); Bank bank = Bank.createBank(); try { bank.setName(map.get("")); bank.setShortName(map.get(" ")); bank.setCode(map.get(" ")); bank.setMfo(map.get(" ()")); bank.setDate(new java.sql.Date(DATE_FORMAT.parse(map.get(" ")).getTime())); bank.setAdress(map.get("")); bank.setLicense(map.get(" ()")); bank.setLicensedate( new java.sql.Date(DATE_FORMAT.parse(map.get(" ()")).getTime())); bank.setStatus(BankStatus.NORMAL); } catch (ParseException e) { e.printStackTrace(); } catch (NumberFormatException | NullPointerException e) { bank.setLicense(""); bank.setLicensedate(new java.sql.Date(0)); bank.setStatus(BankStatus.LIQUIDATION); e.printStackTrace(); } return bank; } /** * Clear page from all comments. Its give access to some closed information * * @param doc * @return */ public List<Comment> findAllComments(Document doc) { logger.info("run"); List<Comment> comments = new ArrayList<>(); for (Element element : doc.getAllElements()) { for (Node n : element.childNodes()) { if (n.nodeName().equals("#comment")) { comments.add((Comment) n); } } } return Collections.unmodifiableList(comments); } }