/*
* Copyright 2006 Hippo.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.hippo.util;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import org.apache.commons.httpclient.Credentials;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.methods.HeadMethod;
import org.apache.commons.httpclient.methods.PutMethod;
import org.jdom.Content;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Text;
import org.jdom.output.XMLOutputter;
/**
* Check for broken links.
*
* @author Ugo Cei
* @version $Id: LinkChecker.java 861 2006-02-09 10:08:30Z ucei $
*/
public final class LinkChecker {
public static final char COLUMN_SEPARATOR_CHAR = '\t';
public static final String URL_SEPARATOR_CHARS = " ";
public static final String I18N_NS_URI = "http://apache.org/cocoon/i18n/2.1";
public static final String PLACEHOLDER_TEXT_BEFORE = "<?xml version='1.0'?>\n" +
"<broken-links date='";
public static final String PLACEHOLDER_TEXT_AFTER = "'><i18n:text xmlns:i18n='" +
I18N_NS_URI + "'>message.link-checker-running</i18n:text></broken-links>\n";
public static final String HTTP_ERROR_PREFIX = "message.http-error.";
private static final HttpClient client = new HttpClient(new MultiThreadedHttpConnectionManager());
/**
* Check for broken links. Expects as input an array of bytes with the contents of a text file.
* Each line in the file should contain a page URL from the repository, followed by a tab
* character and a space-separated list of link URLs to be checked.
* <p>Only external links (i.e. those not beginning with a forward slash) will be checked.
* HTTP links are verified by doing a HEAD request and checking that the result code is less
* than 300.</p>
* <p>During processing a placeholder file will be placed in the repository.</p>
*
* @param repository URL of the placeholder file.
* @param username Username for connecting to the repository.
* @param password Password for connecting to the repository.
* @param data Input data.
* @return A map whose keys are the page URLs and whose values are Lists. Each item of the list
* is an array of two strings; first string is the link URL, second one is a message detailing
* why the link is considered broken.
* @throws IOException
*/
public static Map checkLinks(String repository, String username, String password, byte[] data) throws IOException {
put(repository, username, password, PLACEHOLDER_TEXT_BEFORE
+ new Date() + PLACEHOLDER_TEXT_AFTER);
Map brokenLinks = new HashMap();
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(data)));
String line;
while ((line = reader.readLine()) != null) {
int tabPos = line.indexOf(COLUMN_SEPARATOR_CHAR);
if (tabPos <= 0) {
// FIXME: log warning?
continue;
}
String pageUrl = line.substring(0, tabPos);
StringTokenizer st = new StringTokenizer(line.substring(tabPos + 1), URL_SEPARATOR_CHARS);
while (st.hasMoreTokens()) {
String link = st.nextToken();
// Only links starting with '/' are considered internal.
Content error = null;
if (link.charAt(0) != '/' && (error = checkLink(link)) != null) {
List links = (List) brokenLinks.get(pageUrl);
if (links == null) {
links = new LinkedList();
brokenLinks.put(pageUrl, links);
}
links.add(new Object[] { link, error });
}
}
}
} finally {
if (reader != null) {
reader.close();
}
}
return brokenLinks;
}
/**
* Save the results of link checking to a repository file.
*
* @param repository URL of the output file.
* @param username Username for connecting to the repository.
* @param password Password for connecting to the repository.
* @param pages Broken links data.
* @throws IOException
*/
public static void putResults(String repository, String username, String password, Map pages) throws IOException {
Element root = new Element("broken-links");
root.setAttribute("date", new Date().toString());
for (Iterator it = pages.keySet().iterator() ; it.hasNext() ; ) {
String page = (String) it.next();
Element pageEl = new Element("page");
pageEl.setAttribute("url", page);
Collection links = (Collection) pages.get(page);
for (Iterator it2 = links.iterator() ; it2.hasNext() ; ) {
Object[] link = (Object[]) it2.next();
Element linkEl = new Element("link");
linkEl.setAttribute("url", (String) link[0]);
linkEl.addContent((Content) link[1]);
pageEl.addContent(linkEl);
}
root.addContent(pageEl);
}
Document doc = new Document(root);
XMLOutputter outputter = new XMLOutputter();
StringWriter sw = new StringWriter();
outputter.output(doc, sw);
String body = sw.toString();
put(repository, username, password, body);
}
private static void put(String repository, String username, String password, String body) throws IOException {
client.setConnectionTimeout(30000);
client.getState().setAuthenticationPreemptive(true);
Credentials defaultcreds = new UsernamePasswordCredentials(username, password);
client.getState().setCredentials(null, null, defaultcreds);
PutMethod put = null;
try {
put = new PutMethod(repository);
put.setRequestBody(body);
client.executeMethod(put);
// FIXME: log this!
} finally {
if (put != null) {
put.releaseConnection();
}
}
}
private static Content checkLink(String link) {
client.setConnectionTimeout(30000);
HeadMethod head = null;
try {
head = new HeadMethod(link);
head.setFollowRedirects(true);
try {
int resultCode = client.executeMethod(head);
if (resultCode >= 300) {
Element el = new Element("text", "i18n", I18N_NS_URI);
el.addContent(HTTP_ERROR_PREFIX + new Integer(head.getStatusCode()).toString());
return el;
}
} catch (Exception e) {
return new Text(e.toString());
}
return null;
} finally {
if (head != null) {
head.releaseConnection();
}
}
}
}
|