ExampleServer.java :  » Web-Crawler » crawler » com » torunski » crawler » examples » Java Open Source

Java Open Source » Web Crawler » crawler 
crawler » com » torunski » crawler » examples » ExampleServer.java
/*
 * Project: ExampleServer
 * 
 * $Id: ExampleServer.java,v 1.2 2006/08/26 10:33:11 ltorunski Exp $
 */
package com.torunski.crawler.examples;

import java.util.Collection;
import java.util.Iterator;

import com.torunski.crawler.Crawler;
import com.torunski.crawler.filter.ServerFilter;

/**
 * Example for a simple crawling process.
 * 
 * Description: Command line example to crawl a web site starting from root. It uses a "ServerFilter" and the default "Max Iterations" model with a maximum of 32 links.
 * Result: Using www.spiegel.de as the parameter 32 pages are visted and more than 400 pages are left.
 * 
 * @author Lars Torunski
 * @version $Id: ExampleServer.java,v 1.2 2006/08/26 10:33:11 ltorunski Exp $
 */
public class ExampleServer {
    
    public static void main(String[] args) {
        
        if (args.length != 1) {
            System.out.println("ExampleServer for Crawler");
            System.out.println("Usage: java com.torunski.crawler.examples.ExampleServer [http server]");
            return;
        }
        
        Crawler crawler = new Crawler();
        crawler.setLinkFilter(new ServerFilter(args[0]));
        crawler.start(args[0], "/");
        
        // show visited links
        Collection visitedLinks = crawler.getModel().getVisitedURIs(); 
        System.out.println("Links visited=" + visitedLinks.size());

        Iterator list = visitedLinks.iterator();
        while (list.hasNext()) {
            System.out.println(list.next());
        }

        // show visited links
        Collection notVisitedLinks = crawler.getModel().getToVisitURIs(); 

        System.out.println("Links NOT visited=" + notVisitedLinks.size());
        Iterator listNot = notVisitedLinks.iterator();
        while (listNot.hasNext()) {
            System.out.println(listNot.next());
        }
    }
    
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.