LinkChecker.java :  » Content-Management-System » hippo-cms » nl » hippo » util » Java Open Source

Java Open Source » Content Management System » hippo cms 
hippo cms » nl » hippo » util » LinkChecker.java
/*
 * Copyright 2006 Hippo.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package nl.hippo.util;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import org.apache.commons.httpclient.Credentials;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.methods.HeadMethod;
import org.apache.commons.httpclient.methods.PutMethod;
import org.jdom.Content;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Text;
import org.jdom.output.XMLOutputter;

/**
 * Check for broken links.
 *
 * @author Ugo Cei
 * @version $Id: LinkChecker.java 861 2006-02-09 10:08:30Z ucei $
 */
public final class LinkChecker {
    
    public static final char COLUMN_SEPARATOR_CHAR = '\t';
    public static final String URL_SEPARATOR_CHARS = " ";
    public static final String I18N_NS_URI = "http://apache.org/cocoon/i18n/2.1";
    public static final String PLACEHOLDER_TEXT_BEFORE = "<?xml version='1.0'?>\n" +
            "<broken-links date='";
    public static final String PLACEHOLDER_TEXT_AFTER = "'><i18n:text xmlns:i18n='" +
            I18N_NS_URI + "'>message.link-checker-running</i18n:text></broken-links>\n";
    public static final String HTTP_ERROR_PREFIX = "message.http-error.";
        
    private static final HttpClient client = new HttpClient(new MultiThreadedHttpConnectionManager());
    
    /**
     * Check for broken links. Expects as input an array of bytes with the contents of a text file.
     * Each line in the file should contain a page URL from the repository, followed by a tab
     * character and a space-separated list of link URLs to be checked.
     * <p>Only external links (i.e. those not beginning with a forward slash) will be checked.
     * HTTP links are verified by doing a HEAD request and checking that the result code is less
     * than 300.</p>
     * <p>During processing a placeholder file will be placed in the repository.</p>
     * 
     * @param repository URL of the placeholder file.
     * @param username Username for connecting to the repository.
     * @param password Password for connecting to the repository.
     * @param data Input data.
     * @return A map whose keys are the page URLs and whose values are Lists. Each item of the list
     * is an array of two strings; first string is the link URL, second one is a message detailing
     * why the link is considered broken.
     * @throws IOException
     */
    public static Map checkLinks(String repository, String username, String password, byte[] data) throws IOException {
        put(repository, username, password, PLACEHOLDER_TEXT_BEFORE
                + new Date() + PLACEHOLDER_TEXT_AFTER);
        Map brokenLinks = new HashMap();
        BufferedReader reader = null;
        try {
            reader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(data)));
            String line;
            while ((line = reader.readLine()) != null) {
                int tabPos = line.indexOf(COLUMN_SEPARATOR_CHAR);
                if (tabPos <= 0) {
                    // FIXME: log warning?
                    continue;
                }
                String pageUrl = line.substring(0, tabPos);
                StringTokenizer st = new StringTokenizer(line.substring(tabPos + 1), URL_SEPARATOR_CHARS);
                while (st.hasMoreTokens()) {
                    String link = st.nextToken();
                    // Only links starting with '/' are considered internal.
                    Content error = null;
                    if (link.charAt(0) != '/' && (error = checkLink(link)) != null) {
                        List links = (List) brokenLinks.get(pageUrl);
                        if (links == null) {
                            links = new LinkedList();
                            brokenLinks.put(pageUrl, links);
                        }
                        links.add(new Object[] { link, error });
                    }
                }
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
        return brokenLinks;
    }

    /**
     * Save the results of link checking to a repository file.
     * 
     * @param repository URL of the output file.
     * @param username Username for connecting to the repository.
     * @param password Password for connecting to the repository.
     * @param pages Broken links data.
     * @throws IOException
     */
    public static void putResults(String repository, String username, String password, Map pages) throws IOException {
        Element root = new Element("broken-links");
        root.setAttribute("date", new Date().toString());
        for (Iterator it = pages.keySet().iterator() ; it.hasNext() ; ) {
            String page = (String) it.next();
            Element pageEl = new Element("page");
            pageEl.setAttribute("url", page);
            Collection links = (Collection) pages.get(page);
            for (Iterator it2 = links.iterator() ; it2.hasNext() ; ) {
                Object[] link = (Object[]) it2.next();
                Element linkEl = new Element("link");
                linkEl.setAttribute("url", (String) link[0]);
                linkEl.addContent((Content) link[1]);
                pageEl.addContent(linkEl);
            }
            root.addContent(pageEl);
        }
        Document doc = new Document(root);
        XMLOutputter outputter = new XMLOutputter();
        StringWriter sw = new StringWriter();
        outputter.output(doc, sw);
        String body = sw.toString();
        put(repository, username, password, body);
    }
    
    private static void put(String repository, String username, String password, String body) throws IOException {
        client.setConnectionTimeout(30000);
        client.getState().setAuthenticationPreemptive(true);
        Credentials defaultcreds = new UsernamePasswordCredentials(username, password);
        client.getState().setCredentials(null, null, defaultcreds);
        PutMethod put = null;
        try {
            put = new PutMethod(repository);
            put.setRequestBody(body);
            client.executeMethod(put);
            // FIXME: log this!
        } finally {
            if (put != null) {
                put.releaseConnection();
            }
        }
        
    }
    
    private static Content checkLink(String link) {
        client.setConnectionTimeout(30000);
        HeadMethod head = null;
        try {
            head = new HeadMethod(link);
            head.setFollowRedirects(true);
            try {
                int resultCode = client.executeMethod(head);
                if (resultCode >= 300) {
                    Element el = new Element("text", "i18n", I18N_NS_URI);
                    el.addContent(HTTP_ERROR_PREFIX + new Integer(head.getStatusCode()).toString());
                    return el;
                }
            } catch (Exception e) {
                return new Text(e.toString());
            }
            return null;
        } finally {
            if (head != null) {
                head.releaseConnection();
            }
        }
    }
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.