Java tutorial
/* * Wikipedia.java * Copyright (C) 2007 David Milne, d.n.milne@gmail.com * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package ruc.irm.wikit.db; import com.sleepycat.je.EnvironmentLockedException; import org.apache.commons.cli.*; import org.apache.commons.lang3.BooleanUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.commons.lang3.tuple.ImmutableTriple; import org.apache.commons.lang3.tuple.Triple; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import ruc.irm.wikit.common.conf.Conf; import ruc.irm.wikit.common.conf.ConfFactory; import ruc.irm.wikit.db.je.WDatabase; import ruc.irm.wikit.db.je.WEntry; import ruc.irm.wikit.db.je.WEnvironment; import ruc.irm.wikit.db.je.WIterator; import ruc.irm.wikit.db.je.it.PageIterator; import ruc.irm.wikit.db.je.it.TitleIterator; import ruc.irm.wikit.model.Page; import ruc.irm.wikit.util.ConsoleLoop; import ruc.irm.wikit.util.ProgressTracker; import scala.Console; import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.Scanner; import static ruc.irm.wikit.db.je.WDatabase.DatabaseType.articlesByTitle; import static ruc.irm.wikit.db.je.WDatabase.DatabaseType.categoriesByTitle; /** * Represents a single dump or instance of Wikipedia */ public class Wikipedia { private static Logger LOG = LoggerFactory.getLogger(Wikipedia.class); private WEnvironment env; public Wikipedia(Conf conf) throws EnvironmentLockedException { this.env = new WEnvironment(conf); } /** * Returns the environment that this is connected to * * @return the environment that this is connected to */ public WEnvironment getEnvironment() { return env; } /** * Returns true if the preparation work has been completed, otherwise false * * @return true if the preparation work has been completed, otherwise false */ public boolean isReady() { return env.isReady(); } /** * Returns the Page referenced by the given id. The page can be cast into the appropriate type for * more specific functionality. * * @param id the id of the Page to retrieve. * @return the Page referenced by the given id, or null if one does not exist. */ public Page getPageById(int id) { return Page.createPage(env, id); } public PageIterator getPageIterator() { return new PageIterator(env); } public Integer getIdByArticleTitle(String title) { return env.getDbArticlesByTitle().retrieve(title); } public WIterator<String, Integer> getArticleTitleIterator() { return env.getDbArticlesByTitle().getIterator(); } public Integer getIdByCategoryTitle(String title) { return env.getDbCategoriesByTitle().retrieve(title); } public WIterator<String, Integer> getCategoryTitleIterator() { return env.getDbCategoriesByTitle().getIterator(); } /** * Tidily closes the database environment behind this wikipedia instance. This should be done whenever * one is finished using it. */ public void close() { env.close(); this.env = null; } @Override public void finalize() { if (this.env != null) LOG.warn("Unclosed wikipedia. You may be causing a memory leak."); } public static void main(String[] args) throws ParseException, IOException { String helpMsg = "usage: Wikipedia -c config.xml -pid 123"; HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); Options options = new Options(); options.addOption(new Option("c", true, "config file")); options.addOption(new Option("type", true, "database type: " + "articleByTitle|categoryByTitle|page")); CommandLine commandLine = parser.parse(options, args); if (!commandLine.hasOption("c") || !commandLine.hasOption("type")) { helpFormatter.printHelp(helpMsg, options); return; } Conf conf = ConfFactory.createConf(commandLine.getOptionValue("c"), true); Wikipedia wikipedia = new Wikipedia(conf); String type = commandLine.getOptionValue("type"); switch (type) { case "page": ConsoleLoop.loop(new ImmutableTriple<String, String, ConsoleLoop.Handler>("id", "list page by id", new ConsoleLoop.Handler() { @Override public void handle(String input) throws IOException { Page page = wikipedia.getPageById(NumberUtils.toInt(input)); System.out.println(page); if (page != null) { System.out.println(page.getContent()); } } }), new ImmutableTriple<String, String, ConsoleLoop.Handler>("list", "list all", new ConsoleLoop.Handler() { @Override public void handle(String input) throws IOException { PageIterator it = wikipedia.getPageIterator(); Scanner scanner = new Scanner(System.in); while (it.hasNext()) { Page page = it.next(); System.out.println(page); if (page.getContent().length() > 200) { System.out.println(page.getContent().substring(0, 200)); } else { System.out.println(page.getContent()); } System.out.println("type exit to return, or enter to continue"); String command = scanner.nextLine(); if (command.equalsIgnoreCase("exit")) { break; } } it.close(); } })); break; case "articleByTitle": ConsoleLoop.loop(new ImmutableTriple<String, String, ConsoleLoop.Handler>("title", "input title " + "and return its id", new ConsoleLoop.Handler() { @Override public void handle(String input) throws IOException { Integer id = wikipedia.getIdByArticleTitle(input); System.out.println(id); } }), new ImmutableTriple<String, String, ConsoleLoop.Handler>("list", "list all " + "article title and its id", new ConsoleLoop.Handler() { @Override public void handle(String input) throws IOException { WIterator<String, Integer> it = wikipedia.getArticleTitleIterator(); int count = 0; while (it.hasNext()) { WEntry<String, Integer> entry = it.next(); System.out.println("\t" + entry.getKey() + "\t" + entry.getValue()); count++; if (count % 20 == 0) { System.out.println("type exit to return, or enter to continue"); String command = new Scanner(System.in).nextLine(); if (command.equalsIgnoreCase("exit")) { break; } } } it.close(); } })); break; case "categoryByTitle": ConsoleLoop.loop(new ImmutableTriple<String, String, ConsoleLoop.Handler>("title", "get id by " + "title", new ConsoleLoop.Handler() { @Override public void handle(String input) throws IOException { Integer id = wikipedia.getIdByCategoryTitle(input); System.out.println(id); } }), new ImmutableTriple<String, String, ConsoleLoop.Handler>("list", "list all", new ConsoleLoop.Handler() { @Override public void handle(String input) throws IOException { WIterator<String, Integer> it = wikipedia.getCategoryTitleIterator(); int count = 0; while (it.hasNext()) { WEntry<String, Integer> entry = it.next(); System.out.println("\t" + entry.getKey() + "\t" + entry.getValue()); count++; if (count % 20 == 0) { System.out.println("type exit to return, or enter to continue"); String command = new Scanner(System.in).nextLine(); if (command.equalsIgnoreCase("exit")) { break; } } } it.close(); } })); break; } ; wikipedia.close(); } }