package de.fzi.wikipipes.impl.googlewiki;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.xml.transform.TransformerConfigurationException;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.Node;
import de.fzi.wikipipes.IWikiPage;
import de.fzi.wikipipes.impl.AbstractWebWikiRepository;
import de.fzi.wikipipes.impl.Util;
public class WikiRepositoryGW extends AbstractWebWikiRepository {
private HttpClient httpclient;
private String username = null, password = null;
private static final Log log = LogFactory.getLog(WikiRepositoryGW.class);
public WikiRepositoryGW(String serverURL) {
super(serverURL);
this.httpclient = new HttpClient();
login();
}
/** language unused by googlewiki */
public WikiRepositoryGW(String serverURL, String username, String password) {
super(serverURL);
this.username = username;
this.password = password;
this.httpclient = new HttpClient();
login();
}
@Override
public String getPageURL(String name) {
return getServerURL() + "/" + name;
}
public IWikiPage getPageByURI(String sourceURI) {
int lastslash = sourceURI.lastIndexOf('/');
String pagename = sourceURI.substring(lastslash + 1);
if (sourceURI.indexOf(getServerURL()) == -1)
throw new RuntimeException(
"given page uri doesnt match given repository");
return this.getPageByName(pagename);
}
public Collection<IWikiPage> changedAfter(long date) {
Collection<IWikiPage> result = new HashSet<IWikiPage>();
Collection<IWikiPage> allPages = this.getAllWifPages();
for (IWikiPage page : allPages)
if (page.getChangeDate() > date)
result.add(page);
return result;
}
@SuppressWarnings("unchecked")
public Collection<IWikiPage> findPages(String query) {
HashSet<IWikiPage> result = new HashSet<IWikiPage>();
// http://code.google.com/p/akur/w/list?can=1&q=wikipedia+sandbox&colspec=PageName+Summary+Changed+ChangedBy
String searchURL = this.getServerURL().replaceAll("/wiki",
"/w/list?can=1&q=");
try {
searchURL += URLEncoder.encode(query, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
searchURL += "&colspec=PageName+Summary+Changed+ChangedBy";
org.dom4j.Document indexDoc = Util.getInputStreamAsDocument(Util
.getInputStreamFromUrl(this.httpclient, searchURL));
List<?> list = indexDoc.selectNodes("//*[@class='results']"
+ "/descendant::*[@href and not(@style) and not(@onclick) ]");
for (Object o : list) {
Node n = (Node) o;
result.add(this.getPageByName(n.getText()));
}
return result;
}
public Collection<IWikiPage> getAllWifPages() {
Set<IWikiPage> result = new HashSet<IWikiPage>();
// serverUrl = http://code.google.com/p/PROJECTNAME/wiki
String indexUrl = getServerURL().replace("/wiki", "/w") + "/list";
Node next = null;
do {
InputStream in = Util.getInputStreamFromUrl(indexUrl);
org.dom4j.Document inDoc = Util.getInputStreamAsDocument(in);
next = inDoc
.selectSingleNode("//A[starts-with(text(), 'Next ')]/@href");
List<?> list = inDoc.selectNodes("//TD[@class='vt id col_0']/A");
for (Object o : list) {
// extract pagename
Node node = (Node) o;
String s = node.getText();
assert s != null;
result.add(getPageByName(s));
}
if (next != null)
indexUrl = getServerURL().replace("/wiki", "/w") + "/"
+ next.getText();
} while (next != null);
log.debug(result.size());
return result;
}
public IWikiPage getPageByName(String name) {
return new WikiPageGW(this, name);
}
/**
* there is no rootpage in googlewiki
*/
public IWikiPage getRootPage() {
return null;
}
public String toWikiSyntax(Reader wifReader) {
try {
String wif = Util.transformToStringAndBufferXslt(wifReader,
"wif2googlewiki.xslt");
return wif;
} catch (TransformerConfigurationException e1) {
throw new RuntimeException(e1);
} catch (UnsupportedEncodingException e1) {
throw new RuntimeException(e1);
}
}
private void login() {
String loginURL = "https://www.google.com/accounts/LoginAuth";
GetMethod get = new GetMethod(
"https://www.google.com/accounts/ManageAccount");
PostMethod postLogin = new PostMethod(loginURL);
postLogin.addParameter("continue",
"https://www.google.com/accounts/ManageAccount");
if (this.username != null)
postLogin.addParameter("Email", this.username);
else
throw new RuntimeException("no username");
if (this.password != null)
postLogin.addParameter("Passwd", this.password);
else
throw new RuntimeException("no password");
postLogin.addParameter("PersistentCookie", "yes");
postLogin.addParameter("rmShown", "1");
postLogin.addParameter("signIn", "Anmeldung");
try {
this.httpclient.executeMethod(get);
this.httpclient.executeMethod(postLogin);
get = new GetMethod(
"https://www.google.com/accounts/CheckCookie?continue=https%3A%2F%2Fwww.google.com%2Faccounts%2FManageAccount&chtml=LoginDoneHtml");
this.httpclient.executeMethod(get);
Document doc = Util.getInputStreamAsDocument(get
.getResponseBodyAsStream());
String url = doc.selectSingleNode("//META/@content").getText();
url = url.substring(url.indexOf('\'') + 1, url.lastIndexOf('\''));
log.debug("URL: " + url);
get = new GetMethod(url);
this.httpclient.executeMethod(get);
} catch (HttpException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
protected HttpClient getHttpClient() {
return this.httpclient;
}
}
|