Trivial client for the date server using jsoup - Java HTML

Java examples for HTML:JSoup

Description

Trivial client for the date server using jsoup

Demo Code


import java.io.File;
import java.io.FileWriter;
import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class WebCrawler 
{
  
    private static void print(String msg, Object... args) 
    {/*from w w  w  . j  a v a 2 s . c  om*/
        System.out.println(String.format(msg, args));
    }

    private static String trim(String s, int width) 
    {
        if (s.length() > width)
            return s.substring(0, width-1) + ".";
        else
            return s;
    }

  
    public static void main(String[] args) throws IOException 
    {
            Document doc = Jsoup.connect("http://your server/").timeout(30000).get();
            Elements links = doc.select("a[href]");
            File file = new File("D:/Documents/abc.html");
            file.createNewFile();
            FileWriter writer = new FileWriter(file);
            writer.write(doc.body().toString());
            writer.flush();
            writer.close();
        
            print("\nLinks: (%d)", links.size());
            for (Element link : links) 
            {
                print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
            }
            for (Element link : links) 
            {
                doc = Jsoup.connect(link.attr("abs:href")).timeout(30000).userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21").ignoreHttpErrors(true).followRedirects(true).ignoreContentType(true).get();
                file = new File("D:/" + trim(link.text(), 35) + ".html");
                writer = new FileWriter(file);
                writer.write(doc.body().toString());
                writer.flush();
                writer.close();
            }
        
        System.exit(0);
    }
}

Related Tutorials