org.paxle.core.doc.impl.BasicDocumentFactoryTest.java Source code

Java tutorial

Introduction

Here is the source code for org.paxle.core.doc.impl.BasicDocumentFactoryTest.java

Source

/**
 * This file is part of the Paxle project.
 * Visit http://www.paxle.net for more information.
 * Copyright 2007-2010 the original author or authors.
 *
 * Licensed under the terms of the Common Public License 1.0 ("CPL 1.0").
 * Any use, reproduction or distribution of this program constitutes the recipient's acceptance of this agreement.
 * The full license text is available under http://www.opensource.org/licenses/cpl1.0.txt
 * or in the file LICENSE.txt in the root directory of the Paxle distribution.
 *
 * Unless required by applicable law or agreed to in writing, this software is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

package org.paxle.core.doc.impl;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.net.URI;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.Enumeration;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.TimeZone;
import java.util.Map.Entry;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;

import javax.activation.DataHandler;
import javax.activation.DataSource;

import junit.framework.TestCase;
import junitx.framework.ArrayAssert;
import junitx.framework.FileAssert;
import junitx.framework.ListAssert;

import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.TeeInputStream;
import org.apache.commons.io.output.TeeOutputStream;
import org.paxle.core.doc.Field;
import org.paxle.core.doc.ICommand;
import org.paxle.core.doc.ICommandProfile;
import org.paxle.core.doc.ICrawlerDocument;
import org.paxle.core.doc.IDocumentFactory;
import org.paxle.core.doc.IIndexerDocument;
import org.paxle.core.doc.IParserDocument;
import org.paxle.core.doc.LinkInfo;
import org.paxle.core.io.temp.ITempDir;
import org.paxle.core.io.temp.ITempFileManager;
import org.paxle.core.io.temp.impl.TempFileManager;

public class BasicDocumentFactoryTest extends TestCase {
    private static final File CRAWLER_FILE = new File("src/test/resources/paxle.html");
    private static final File PARSER_FILE = new File("src/test/resources/paxle.txt");

    private ITempFileManager tmpFileManager;
    private IDocumentFactory docFactory;
    GregorianCalendar cal;

    @SuppressWarnings("unchecked")
    @Override
    protected void setUp() throws Exception {
        super.setUp();

        this.cal = new GregorianCalendar(TimeZone.getTimeZone("Europe/Vienna"));
        this.cal.set(GregorianCalendar.YEAR, 2009);
        this.cal.set(GregorianCalendar.MONTH, 11);
        this.cal.set(GregorianCalendar.DAY_OF_MONTH, 16);
        this.cal.set(GregorianCalendar.HOUR_OF_DAY, 19);
        this.cal.set(GregorianCalendar.MINUTE, 01);
        this.cal.set(GregorianCalendar.SECOND, 14);
        this.cal.set(GregorianCalendar.MILLISECOND, 728);

        this.tmpFileManager = new TempFileManager();
        this.docFactory = new BasicDocumentFactory() {
            {
                this.tempFileManager = tmpFileManager;
                this.activate(Collections.EMPTY_MAP);
            }
        };
    }

    @Override
    protected void tearDown() throws Exception {
        super.tearDown();

        // cleanup temp files
        final Map<File, ITempDir> tempFiles = ((TempFileManager) this.tmpFileManager).getFileMap();
        if (tempFiles != null) {
            for (File file : tempFiles.keySet()) {
                assertTrue(file.delete());
            }
        }
    }

    protected ICrawlerDocument createTestCDoc(Class<?> crawlerDocClass, URI location)
            throws IOException, ParseException {
        // creating a dummy crawler-document
        final ICrawlerDocument cDoc = (ICrawlerDocument) this.docFactory.createDocument(crawlerDocClass);
        cDoc.setOID(1441654849);
        cDoc.setStatus(ICrawlerDocument.Status.OK, "CrawlerDocument is OK");
        cDoc.setLocation(location);
        cDoc.setCharset("ISO-8859-1");
        cDoc.setMimeType("text/html");
        cDoc.setCrawlerDate(cal.getTime());
        cDoc.setLastModDate(cal.getTime());
        cDoc.setLanguages(new String[] { "en" });
        cDoc.setContent(CRAWLER_FILE);
        return cDoc;
    }

    @SuppressWarnings("serial")
    protected IParserDocument createTestPDoc(Class<?> parserDocClass) throws IOException {
        final IParserDocument pDoc = (IParserDocument) this.docFactory.createDocument(parserDocClass);
        pDoc.setOID(266560296);
        pDoc.setStatus(IParserDocument.Status.OK, "ParserDocument is OK");
        pDoc.setTextFile(PARSER_FILE);
        pDoc.setLastChanged(cal.getTime());
        pDoc.setTitle("Paxle    - PAXLE Search Framework");
        pDoc.setAuthor("Paxle");
        pDoc.setCharset(Charset.forName("UTF-8"));
        pDoc.setMimeType("text/html");
        pDoc.setKeywords(Arrays.asList("en", "start"));
        pDoc.setLanguages(new HashSet<String>() {
            {
                add("en");
            }
        });
        pDoc.setHeadlines(Arrays.asList(new String[] { "Paxle", "What is Paxle?", "What can you do with Paxle?",
                "Is it difficult to use Paxle?" }));
        pDoc.setLinks(new HashMap<URI, LinkInfo>() {
            {
                put(URI.create("http://www.osgi.org/"), new LinkInfo("OSGi", LinkInfo.Status.FILTERED,
                        "Blocked by Robotx.txt", "http://www.paxle.net"));
                put(URI.create("http://lucene.apache.org/"), new LinkInfo("Lucene"));
            }
        });
        return pDoc;
    }

    protected IIndexerDocument createTestIDoc(Class<?> indexerDocClass) throws IOException {
        final IIndexerDocument iDoc = (IIndexerDocument) this.docFactory.createDocument(indexerDocClass);
        iDoc.setOID(0);
        iDoc.setStatus(IIndexerDocument.Status.OK, "IndexerDocument is OK");
        iDoc.set(IIndexerDocument.AUTHOR, "Paxle");
        iDoc.set(IIndexerDocument.KEYWORDS, new String[] { "en", "start" });
        iDoc.set(IIndexerDocument.LANGUAGES, new String[] { "en" });
        iDoc.set(IIndexerDocument.LAST_CRAWLED, new Date(CRAWLER_FILE.lastModified()));
        iDoc.set(IIndexerDocument.LAST_MODIFIED, new Date(PARSER_FILE.lastModified()));
        iDoc.set(IIndexerDocument.LOCATION, "http://www.paxle.net");
        iDoc.set(IIndexerDocument.MIME_TYPE, "text/html");
        iDoc.set(IIndexerDocument.PROTOCOL, "http");
        iDoc.set(IIndexerDocument.SIZE, Long.valueOf(new File("src/test/resources/paxle.html").length()));
        iDoc.set(IIndexerDocument.TITLE, "Paxle");
        iDoc.set(IIndexerDocument.TEXT, PARSER_FILE);
        return iDoc;
    }

    protected ICommand createTestCommand() throws IOException, ParseException {
        final URI location = URI.create("http://www.paxle.net");

        // creating a dummy crawler-document
        final ICrawlerDocument cDoc = this.createTestCDoc(BasicCrawlerDocument.class, location);

        // creating a dummy parser-document
        final IParserDocument pDoc = this.createTestPDoc(BasicParserDocument.class);

        // creating a dummy indexer-document
        final IIndexerDocument iDoc = this.createTestIDoc(BasicIndexerDocument.class);

        // creating a dummy command
        final BasicCommand cmd = this.docFactory.createDocument(BasicCommand.class);
        cmd.setOID(412550205);
        cmd.setProfileOID(372627797);
        cmd.setLocation(location);
        cmd.setCrawlerDocument(cDoc);
        cmd.setParserDocument(pDoc);
        cmd.setIndexerDocuments(new IIndexerDocument[] { iDoc });

        return cmd;
    }

    public static void assertEquals(ICommand expected, ICommand actual) throws IOException {
        if (expected == null && actual == null)
            return;
        else if (expected != null && actual == null)
            fail();
        else if (expected == null && actual != null)
            fail();

        assertEquals(expected.getOID(), actual.getOID());
        assertEquals(expected.getProfileOID(), actual.getProfileOID());
        assertEquals(expected.getLocation(), actual.getLocation());

        final ICrawlerDocument cdoc1 = expected.getCrawlerDocument();
        final ICrawlerDocument cdoc2 = actual.getCrawlerDocument();
        assertEquals(cdoc1, cdoc2);

        final IParserDocument pdoc1 = expected.getParserDocument();
        final IParserDocument pdoc2 = actual.getParserDocument();
        assertEquals(pdoc1, pdoc2);
    }

    public static void assertEquals(ICrawlerDocument expected, ICrawlerDocument actual) {
        if (expected == null && actual == null)
            return;
        else if (expected != null && actual == null)
            fail();
        else if (expected == null && actual != null)
            fail();

        assertEquals(expected.getOID(), actual.getOID());
        assertEquals(expected.getLocation(), actual.getLocation());
        assertEquals(expected.getCharset(), actual.getCharset());
        assertEquals(expected.getMimeType(), actual.getMimeType());
        assertEquals(expected.getCrawlerDate(), actual.getCrawlerDate());
        assertEquals(expected.getLastModDate(), actual.getLastModDate());
        ArrayAssert.assertEquals(expected.getLanguages(), actual.getLanguages());

        final File crawlerFile1 = expected.getContent();
        final File crawlerFile2 = actual.getContent();
        FileAssert.assertBinaryEquals(crawlerFile1, crawlerFile2);
    }

    public static void assertEquals(IParserDocument expected, IParserDocument actual) throws IOException {
        if (expected == null && actual == null)
            return;
        else if (expected != null && actual == null)
            fail();
        else if (expected == null && actual != null)
            fail();

        assertEquals(expected.getOID(), actual.getOID());
        assertEquals(expected.getStatus(), actual.getStatus());
        assertEquals(expected.getStatusText(), actual.getStatusText());
        assertEquals(expected.getLastChanged(), actual.getLastChanged());
        assertEquals(expected.getTitle(), actual.getTitle());
        assertEquals(expected.getAuthor(), actual.getAuthor());
        assertEquals(expected.getCharset(), actual.getCharset());
        assertEquals(expected.getMimeType(), actual.getMimeType());
        ListAssert.assertEquals(new ArrayList<String>(expected.getKeywords()),
                new ArrayList<String>(actual.getKeywords()));
        ListAssert.assertEquals(new ArrayList<String>(expected.getLanguages()),
                new ArrayList<String>(actual.getLanguages()));
        ListAssert.assertEquals(new ArrayList<String>(expected.getHeadlines()),
                new ArrayList<String>(actual.getHeadlines()));

        final File parserFile1 = expected.getTextFile();
        final File parserFile2 = actual.getTextFile();
        FileAssert.assertBinaryEquals(parserFile1, parserFile2);

        final Map<URI, LinkInfo> links1 = expected.getLinks();
        final Map<URI, LinkInfo> links2 = actual.getLinks();
        assertEquals(links1.size(), links2.size());
        for (Entry<URI, LinkInfo> entries : links1.entrySet()) {
            final URI key1 = entries.getKey();
            final LinkInfo value1 = entries.getValue();
            final LinkInfo value2 = links2.get(key1);
            assertNotNull(value2);
            assertEquals(value1, value2);
        }
    }

    public static void assertEquals(LinkInfo expected, LinkInfo actual) {
        assertEquals(expected.getStatus(), actual.getStatus());
        assertEquals(expected.getStatusText(), actual.getStatusText());
        assertEquals(expected.getStatusCode(), actual.getStatusCode());
        assertEquals(expected.getTitle(), actual.getTitle());
        assertEquals(expected.getLinkOrigin(), actual.getLinkOrigin());
    }

    public static void assertEquals(IIndexerDocument expected, IIndexerDocument actual) {
        if (expected == null && actual == null)
            return;
        else if (expected != null && actual == null)
            fail();
        else if (expected == null && actual != null)
            fail();

        assertEquals(expected.getStatus(), actual.getStatus());
        assertEquals(expected.getStatusText(), actual.getStatusText());

        for (Entry<Field<?>, ?> entries : expected.getFields().entrySet()) {
            final Field<?> key1 = entries.getKey();
            final Class<?> type = key1.getType();

            final Serializable value1 = (Serializable) entries.getValue();
            final Serializable value2 = actual.get(key1);
            assertNotNull(value2);

            if (type.isAssignableFrom(File.class)) {
                assertNotSame(value1, value2);
                FileAssert.assertBinaryEquals((File) value1, (File) value2);
            } else if (type.isArray()) {
                if (type.getComponentType().isAssignableFrom(String.class)) {
                    ArrayAssert.assertEquals((String[]) value1, (String[]) value2);
                }
            } else {
                assertEquals((Object) value1, (Object) value2);
            }
        }
    }

    public void testCreateCommand() throws IOException {
        ICommand cmd = null;

        cmd = this.docFactory.createDocument(ICommand.class);
        assertNotNull(cmd);
        assertEquals(0, cmd.getOID());
        assertEquals(-1, cmd.getProfileOID());

        cmd = this.docFactory.createDocument(BasicCommand.class);
        assertNotNull(cmd);
    }

    public void testCreateCommandProfile() throws IOException {
        ICommandProfile profile = null;

        profile = this.docFactory.createDocument(ICommandProfile.class);
        assertNotNull(profile);

        profile = this.docFactory.createDocument(BasicCommandProfile.class);
        assertNotNull(profile);
    }

    public void testCreateCrawlerDocument() throws IOException {
        ICrawlerDocument cdoc = null;

        cdoc = this.docFactory.createDocument(ICrawlerDocument.class);
        assertNotNull(cdoc);

        cdoc = this.docFactory.createDocument(BasicCrawlerDocument.class);
        assertNotNull(cdoc);
    }

    public void testCreateParserDocument() throws IOException {
        IParserDocument pdoc = null;

        pdoc = this.docFactory.createDocument(IParserDocument.class);
        assertNotNull(pdoc);

        pdoc = this.docFactory.createDocument(BasicParserDocument.class);
        assertNotNull(pdoc);

        pdoc = this.docFactory.createDocument(CachedParserDocument.class);
        assertNotNull(pdoc);
    }

    public void testCreateIndexerDocument() throws IOException {
        IIndexerDocument idoc = null;

        idoc = this.docFactory.createDocument(IIndexerDocument.class);
        assertNotNull(idoc);

        idoc = this.docFactory.createDocument(BasicIndexerDocument.class);
        assertNotNull(idoc);
    }

    public void testMarshalCommand() throws IOException, ParseException {
        final ICommand cmd = this.createTestCommand();
        this.docFactory.marshal(cmd, System.out);
    }

    public void testUnmarshalBasicCommand() throws IOException {
        InputStream input = null;
        try {
            input = new FileInputStream(new File("src/test/resources/command.xml"));
            final ICommand cmd = this.docFactory.unmarshal(input, null);
            assertNotNull(cmd);
        } finally {
            if (input != null)
                input.close();
        }
    }

    public void testMarshalUnmarshalBasicCommand() throws IOException, ParseException {
        // creating a test command
        final ICommand cmd1 = this.createTestCommand();

        // marshal command
        final ByteArrayOutputStream bout = new ByteArrayOutputStream();
        final TeeOutputStream out = new TeeOutputStream(System.out, bout);
        final Map<String, DataHandler> attachments = this.docFactory.marshal(cmd1, out);
        out.close();

        // unmarshal command
        final ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
        final ICommand cmd2 = this.docFactory.unmarshal(bin, attachments);

        // check if the commands are equal
        assertNotSame(cmd1, cmd2);
        assertEquals(cmd1, cmd2);
    }

    public void testMarshalUnmarshalBasicCrawlerDocument() throws IOException, ParseException {
        final ICrawlerDocument expected = this.createTestCDoc(BasicCrawlerDocument.class,
                URI.create("http://www.paxle.net"));
        assertNotNull(expected);

        // marshal crawler-document
        final ByteArrayOutputStream bout = new ByteArrayOutputStream();
        final TeeOutputStream out = new TeeOutputStream(System.out, bout);
        final Map<String, DataHandler> attachments = this.docFactory.marshal(expected, out);
        out.close();

        // unmarshal crawler-document
        final ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
        final ICrawlerDocument actual = this.docFactory.unmarshal(bin, attachments);

        assertEquals(expected, actual);
    }

    public void testMarshalUnmarshalBasicParserDocument() throws IOException {
        final IParserDocument expected = this.createTestPDoc(BasicParserDocument.class);
        assertNotNull(expected);

        // marshal parser-document
        final ByteArrayOutputStream bout = new ByteArrayOutputStream();
        final TeeOutputStream out = new TeeOutputStream(System.out, bout);
        final Map<String, DataHandler> attachments = this.docFactory.marshal(expected, out);
        out.close();

        // unmarshal crawler-document
        final ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
        final IParserDocument actual = this.docFactory.unmarshal(bin, attachments);

        assertEquals(expected, actual);
    }

    public void testMarshalUnmarshalCachedParserDocument() throws IOException {
        final IParserDocument expected = this.createTestPDoc(CachedParserDocument.class);
        assertNotNull(expected);

        // marshal parser-document
        final ByteArrayOutputStream bout = new ByteArrayOutputStream();
        final TeeOutputStream out = new TeeOutputStream(System.out, bout);
        final Map<String, DataHandler> attachments = this.docFactory.marshal(expected, out);
        out.close();

        // unmarshal crawler-document
        final ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
        final IParserDocument actual = this.docFactory.unmarshal(bin, attachments);

        assertEquals(expected, actual);
    }

    public void testMarshalUnmarshalBasicIndexerDocument() throws IOException {
        final IIndexerDocument expected = this.createTestIDoc(BasicIndexerDocument.class);
        assertNotNull(expected);

        // marshal parser-document
        final ByteArrayOutputStream bout = new ByteArrayOutputStream();
        final TeeOutputStream out = new TeeOutputStream(System.out, bout);
        final Map<String, DataHandler> attachments = this.docFactory.marshal(expected, out);
        out.close();

        // unmarshal crawler-document
        final ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
        final IIndexerDocument actual = this.docFactory.unmarshal(bin, attachments);

        assertEquals(expected, actual);
    }

    public void testStoreMarshalledCommand() throws IOException, ParseException {
        // Create the ZIP file
        final File outFile = File.createTempFile("command", ".zip");
        outFile.deleteOnExit();
        ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(outFile));

        // creating a test command
        final ICommand cmd = this.createTestCommand();

        // marshal command
        final ZipEntry commandEntry = new ZipEntry("command.xml");
        commandEntry.setComment("command.xml");
        zipOut.putNextEntry(commandEntry);

        final TeeOutputStream out = new TeeOutputStream(System.out, zipOut);
        final Map<String, DataHandler> attachments = this.docFactory.marshal(cmd, out);
        zipOut.closeEntry();

        // write attachments
        if (attachments != null) {
            for (Entry<String, DataHandler> attachment : attachments.entrySet()) {
                final String cid = attachment.getKey();
                final DataHandler data = attachment.getValue();

                final ZipEntry zipEntry = new ZipEntry(cid);
                zipEntry.setComment(data.getName());
                zipOut.putNextEntry(zipEntry);

                IOUtils.copy(data.getInputStream(), zipOut);
                zipOut.closeEntry();
            }
        }
        zipOut.close();
        System.out.println("Command written into file: " + outFile.toString());

        // print content
        final ZipFile zf = new ZipFile(outFile);
        for (Enumeration<? extends ZipEntry> entries = zf.entries(); entries.hasMoreElements();) {
            ZipEntry entry = entries.nextElement();
            System.out.println(entry.getName() + ": " + entry.getComment());
        }
        zf.close();
    }

    public void testLoadUnmarshalledCommand() throws IOException, ParseException {
        final ZipFile zf = new ZipFile(new File("src/test/resources/command.zip"));

        // process attachments
        final Map<String, DataHandler> attachments = new HashMap<String, DataHandler>();
        for (Enumeration<? extends ZipEntry> entries = zf.entries(); entries.hasMoreElements();) {
            final ZipEntry entry = entries.nextElement();
            final String name = entry.getName();
            if (name.equals("command.xml"))
                continue;

            // create a data-source to load the attachment
            final DataSource source = new DataSource() {
                private ZipFile zip = zf;
                private ZipEntry zipEntry = entry;

                public String getContentType() {
                    return "application/x-java-serialized-object";
                }

                public InputStream getInputStream() throws IOException {
                    return this.zip.getInputStream(this.zipEntry);
                }

                public String getName() {
                    return this.zipEntry.getName();
                }

                public OutputStream getOutputStream() throws IOException {
                    throw new UnsupportedOperationException();
                }
            };
            final DataHandler handler = new DataHandler(source);
            attachments.put(name, handler);
        }

        // process command
        final ZipEntry commandEntry = zf.getEntry("command.xml");
        final InputStream commandInput = zf.getInputStream(commandEntry);

        // marshal command
        TeeInputStream input = new TeeInputStream(commandInput, System.out);
        final ICommand cmd1 = this.docFactory.unmarshal(input, attachments);
        assertNotNull(cmd1);
        zf.close();

        final ICommand cmd2 = this.createTestCommand();
        assertEquals(cmd2, cmd1);
    }
}