org.lobid.lodmill.hadoop.IntegrationTestLobidNTriplesToJsonLd.java Source code

Java tutorial

Introduction

Here is the source code for org.lobid.lodmill.hadoop.IntegrationTestLobidNTriplesToJsonLd.java

Source

/* Copyright 2013 Fabian Steeg. Licensed under the Eclipse Public License 1.0 */

package org.lobid.lodmill.hadoop;

import java.io.IOException;
import java.net.URI;
import java.util.Scanner;

import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.ClusterMapReduceTestCase;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Utils;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.lobid.lodmill.hadoop.NTriplesToJsonLd.NTriplesToJsonLdMapper;
import org.lobid.lodmill.hadoop.NTriplesToJsonLd.NTriplesToJsonLdReducer;
import org.slf4j.LoggerFactory;

/**
 * Test {@link #NTriplesToJsonLd} job with blank nodes.
 * 
 * @author Fabian Steeg (fsteeg)
 */
@SuppressWarnings("javadoc")
public class IntegrationTestLobidNTriplesToJsonLd extends ClusterMapReduceTestCase {
    private static final String TEST_FILE_TRIPLES_1 = "src/test/resources/lobid-org-with-blank-nodes-1.nt";
    private static final String TEST_FILE_TRIPLES_2 = "src/test/resources/lobid-org-with-blank-nodes-2.nt";
    private static final String TEST_FILE_SUBJECTS = "src/test/resources/lobid-org-required-subjects.out";
    private static final String HDFS_IN_TRIPLES_1 = "blank-nodes-test/sample-1.nt";
    private static final String HDFS_IN_TRIPLES_2 = "blank-nodes-test/sample-2.nt";
    private static final String HDFS_IN_SUBJECTS = "blank-nodes-test/subjects";
    private static final String HDFS_OUT = "out/sample";
    private static final String HDFS_OUT_ZIP = "out/zip";
    private FileSystem hdfs = null;

    @Before
    @Override
    public void setUp() throws Exception {
        System.setProperty("hadoop.log.dir", "/tmp/logs");
        super.setUp();
        hdfs = getFileSystem();
        hdfs.copyFromLocalFile(new Path(TEST_FILE_TRIPLES_1), new Path(HDFS_IN_TRIPLES_1));
        hdfs.copyFromLocalFile(new Path(TEST_FILE_TRIPLES_2), new Path(HDFS_IN_TRIPLES_2));
        hdfs.copyFromLocalFile(new Path(TEST_FILE_SUBJECTS), new Path(HDFS_IN_SUBJECTS));
    }

    @Test
    public void testBlankNodeResolution() throws IOException, ClassNotFoundException, InterruptedException {
        final Job job = createJob();
        assertTrue("Job should complete successfully", job.waitForCompletion(true));
        final String result = readResults().toString();
        System.err.println("JSON-LD output:\n" + result);
        assertEquals("Expect two lines", 2, result.trim().split("\n").length);
        assertTrue("Expect correct long", result.contains("pos#long") && result.contains("2.3377220"));
        assertTrue("Expect correct lat", result.contains("pos#lat") && result.contains("48.8681710"));
        assertTrue("Expect correct country name", result.contains("ns#country-name") && result.contains("France"));
        assertTrue("Expect correct locality", result.contains("ns#locality") && result.contains("Paris"));
        assertTrue("Expect correct postal code", result.contains("ns#postal-code") && result.contains("75002"));
        assertTrue("Expect correct street-address",
                result.contains("ns#street-address") && result.contains("Rue de Louvois 4"));
        assertTrue("Expect resolved funder type",
                result.contains("Corporate Body or Foundation under Private Law"));
        assertTrue("Expect resolved stock size", result.contains("10,001 - 30,000"));
        assertTrue("Expect resolved type triple for location", result.contains("wgs84_pos#SpatialThing"));
        assertTrue("Expect resolved contributor name", result.contains("Zayer, Eric"));
        assertFalse("Unresolved blank node should be filtered", result.contains("preferredNameEntityForThePerson"));
    }

    private Job createJob() throws IOException {
        final JobConf conf = createJobConf();
        conf.setStrings("mapred.textoutputformat.separator", " ");
        conf.setStrings(CollectSubjects.PREFIX_KEY, "http://lobid.org/organisation");
        final URI zippedMapFile = CollectSubjects.asZippedMapFile(hdfs, new Path(HDFS_IN_SUBJECTS),
                new Path(HDFS_OUT_ZIP + "/" + CollectSubjects.MAP_FILE_ZIP));
        DistributedCache.addCacheFile(zippedMapFile, conf);
        final Job job = new Job(conf);
        job.setJobName("IntegrationTestLobidNTriplesToJsonLd");
        FileInputFormat.addInputPaths(job, HDFS_IN_TRIPLES_1 + "," + HDFS_IN_TRIPLES_2);
        FileOutputFormat.setOutputPath(job, new Path(HDFS_OUT));
        job.setMapperClass(NTriplesToJsonLdMapper.class);
        job.setReducerClass(NTriplesToJsonLdReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        return job;
    }

    private StringBuilder readResults() throws IOException {
        final Path[] outputFiles = FileUtil.stat2Paths(
                getFileSystem().listStatus(new Path(HDFS_OUT), new Utils.OutputFileUtils.OutputFilesFilter()));
        assertEquals("Expect a single output file", 1, outputFiles.length);
        final StringBuilder builder = new StringBuilder();
        try (final Scanner scanner = new Scanner(getFileSystem().open(outputFiles[0]))) {
            while (scanner.hasNextLine())
                builder.append(scanner.nextLine()).append("\n");
        }
        return builder;
    }

    @Override
    @After
    public void tearDown() {
        try {
            hdfs.close();
            super.stopCluster();
        } catch (Exception e) {
            LoggerFactory.getLogger(IntegrationTestLobidNTriplesToJsonLd.class).error(e.getMessage(), e);
        }
    }
}