ExampleVakanzenMaxDepthModel.java :  » Web-Crawler » crawler » com » torunski » crawler » examples » Java Open Source

Java Open Source » Web Crawler » crawler 
crawler » com » torunski » crawler » examples » ExampleVakanzenMaxDepthModel.java
/*
 * Project: ExampleVakanzenMaxDepthModel
 * 
 * $Id: ExampleVakanzenMaxDepthModel.java,v 1.2 2006/08/26 10:33:12 ltorunski Exp $
 */
package com.torunski.crawler.examples;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import com.torunski.crawler.Crawler;
import com.torunski.crawler.filter.ServerFilter;
import com.torunski.crawler.model.MaxDepthModel;

/**
 * Example for the vakanzen site and a max depth model.
 * 
 * Description: Using the "MaxDepthModel" with a maximum depth of 2 and follows only links of the same server.
 * Result: Vists around 50 pages and stops when the depth of 2 is reached.
 * 
 * @author Lars Torunski
 * @version $Id: ExampleVakanzenMaxDepthModel.java,v 1.2 2006/08/26 10:33:12 ltorunski Exp $
 */
public class ExampleVakanzenMaxDepthModel {
    
    private static final String SERVER = "http://www.oppenheim-karriere.de";

    private static final String START = "/Vakanzen_main";
    
    public static void main(String[] args) {
        
        Crawler crawler = new Crawler();
        crawler.setModel(new MaxDepthModel(2));
        crawler.setLinkFilter(new ServerFilter(SERVER));
        
        crawler.start(SERVER, START);
        
        // show visited links
        Collection visitedLinks = sortMaxDepthLinks(crawler.getModel().getVisitedURIs()); 
        System.out.println("Links visited=" + visitedLinks.size());
        
        Iterator list = visitedLinks.iterator();
        while (list.hasNext()) {
            System.out.println(list.next());
        }

        // show not visited links
        Collection notVisitedLinks = sortMaxDepthLinks(crawler.getModel().getToVisitURIs()); 

        System.out.println("Links NOT visited=" + notVisitedLinks.size());
        Iterator listNot = notVisitedLinks.iterator();
        while (listNot.hasNext()) {
            System.out.println(listNot.next());
        }
    }
    
    /**
     * Sorts MaxDepthLinks
     * @param links the links collection
     * @return a sorted collection of links
     */
    private static Collection sortMaxDepthLinks(Collection links) {
        List sort = new ArrayList(links.size());
        
        Iterator iter = links.iterator();
        while (iter.hasNext()) {
            sort.add(iter.next().toString());
        }
        
        Collections.sort(sort);
        
        return sort;
    }
    
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.