annis.administration.AdministrationDao.java Source code

Java tutorial

Introduction

Here is the source code for annis.administration.AdministrationDao.java

Source

/*
 * Copyright 2009-2011 Collaborative Research Centre SFB 632
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package annis.administration;

import annis.dao.autogenqueries.QueriesGenerator;
import annis.examplequeries.ExampleQuery;
import annis.exceptions.AnnisException;
import annis.model.QueryNode;
import annis.ql.parser.QueryData;
import annis.security.UserConfig;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;
import com.google.common.io.Files;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.sql.DataSource;
import org.apache.commons.dbcp2.BasicDataSource;
import org.apache.commons.dbcp2.DelegatingConnection;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.filefilter.DirectoryFileFilter;
import org.apache.commons.io.filefilter.FileFileFilter;
import org.apache.commons.lang3.StringUtils;
import org.codehaus.jackson.map.AnnotationIntrospector;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.SerializationConfig;
import org.codehaus.jackson.xc.JaxbAnnotationIntrospector;
import org.postgresql.PGConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.dao.DataAccessException;
import org.springframework.jdbc.core.ResultSetExtractor;
import org.springframework.jdbc.core.RowMapper;
import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
import org.springframework.jdbc.core.simple.ParameterizedSingleColumnRowMapper;
import org.springframework.jdbc.datasource.DataSourceUtils;
import org.springframework.transaction.annotation.Isolation;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;

/**
 *
 */
public class AdministrationDao extends AbstractAdminstrationDao {

    private static final Logger log = LoggerFactory.getLogger(AdministrationDao.class);

    // if this is true, the staging area is not deleted
    private boolean temporaryStagingArea;

    private DeleteCorpusDao deleteCorpusDao;

    /**
     * Searches for textes which are empty or only contains whitespaces. If that
     * is the case the visualizer and no document visualizer are defined in the
     * corpus properties file a new file is created and stores a new config which
     * disables document browsing.
     *
     *
     * @param corpusID The id of the corpus which texts are analyzed.
     */
    private void analyzeTextTable(String toplevelCorpusName) {
        List<String> rawTexts = getAnnisDao().getRawText(toplevelCorpusName);

        // pattern for checking the token layer
        final Pattern WHITESPACE_MATCHER = Pattern.compile("^\\s+$");

        for (String s : rawTexts) {

            if (s != null && WHITESPACE_MATCHER.matcher(s).matches()) {
                // deactivate doc browsing if no document browser configuration is exists
                if (getAnnisDao().getDocBrowserConfiguration(toplevelCorpusName) == null) {
                    // should exists anyway
                    Properties corpusConf;
                    try {
                        corpusConf = getAnnisDao().getCorpusConfiguration(toplevelCorpusName);
                    } catch (FileNotFoundException ex) {
                        log.error("not found a corpus configuration, so skip analyzing the text table", ex);
                        return;
                    }

                    // disable document browsing if it is not explicit switch on by the
                    // user in the corpus.properties
                    boolean hasKey = corpusConf.containsKey("browse-documents");
                    boolean isActive = Boolean.parseBoolean(corpusConf.getProperty("browse-documents"));

                    if (!(hasKey && isActive)) {
                        log.info("disable document browser");
                        corpusConf.put("browse-documents", "false");
                        getAnnisDao().setCorpusConfiguration(toplevelCorpusName, corpusConf);
                    }

                    // once disabled don't search in further texts
                    return;
                }
            }
        }
    }

    public ImportStatus initImportStatus() {
        return new CorpusAdministration.ImportStatsImpl();
    }

    public enum EXAMPLE_QUERIES_CONFIG {

        IF_MISSING, TRUE, FALSE

    }

    /**
     * If this is true and no example_queries.tab is found, automatic queries are
     * generated.
     */
    private EXAMPLE_QUERIES_CONFIG generateExampleQueries;

    private String schemaVersion;

    /**
     * A mapping for file-endings to mime types.
     */
    private Map<String, String> mimeTypeMapping;

    private Map<String, String> tableInsertSelect;

    private Map<String, String> tableInsertFrom;

    // all files have to carry this suffix.
    private String annisFileSuffix = ".annis";
    /**
     * Optional tab for example queries. If this tab not exist, a dummy file from
     * the resource folder is used.
     */
    private static final String EXAMPLE_QUERIES_TAB = "example_queries";

    /**
     * The name of the file and the relation containing the resolver information.
     */
    private static final String FILE_RESOLVER_VIS_MAP = "resolver_vis_map";
    // tables imported from bulk files
    // DO NOT CHANGE THE ORDER OF THIS LIST!  Doing so may cause foreign key failures during import.

    private final String[] importedTables = { "corpus", "corpus_annotation", "text", "node", "node_annotation",
            "component", "rank", "edge_annotation", FILE_RESOLVER_VIS_MAP, EXAMPLE_QUERIES_TAB };

    private final String[] tablesToCopyManually = { "corpus", "corpus_annotation", "text", FILE_RESOLVER_VIS_MAP,
            EXAMPLE_QUERIES_TAB, "corpus_stats", "media_files" };
    // tables created during import

    private final String[] createdTables = { "corpus_stats", "media_files", "nodeidmapping" };

    private final ObjectMapper jsonMapper = new ObjectMapper();

    private QueriesGenerator queriesGenerator;

    /**
     * Called when Spring configuration finished
     */
    public void init() {
        AnnotationIntrospector introspector = new JaxbAnnotationIntrospector();
        jsonMapper.setAnnotationIntrospector(introspector);
        // the json should be as compact as possible in the database
        jsonMapper.configure(SerializationConfig.Feature.INDENT_OUTPUT, false);
    }

    ///// Subtasks of creating the database
    protected void dropDatabase(String database) {

        log.debug("dropping possible existing database");
        closeAllConnections(database);
        getJdbcTemplate().execute("DROP DATABASE IF EXISTS " + database);

    }

    protected void dropUser(String username) {
        log.debug("dropping possible existing user");
        getJdbcTemplate().execute("DROP USER IF EXISTS " + username);

    }

    protected void createUser(String username, String password) {
        log.info("creating user: " + username);
        getJdbcTemplate().execute("CREATE USER " + username + " PASSWORD '" + password + "'");
    }

    protected void createDatabase(String database, String owner) {
        log.info("creating database: " + database + " OWNER " + owner + " ENCODING = 'UTF8'");
        getJdbcTemplate().execute("CREATE DATABASE " + database + " OWNER " + owner + " ENCODING = 'UTF8'");
    }

    protected void installPlPgSql() {
        log.info("installing stored procedure language plpgsql");
        try {
            getJdbcTemplate().execute("CREATE LANGUAGE plpgsql");
        } catch (Exception ex) {
            log.warn("plpqsql was already installed: " + ex.getMessage());
        }
    }

    protected void createFunctionUniqueToplevelCorpusName() {
        log.info("creating trigger function: unique_toplevel_corpus_name");
        executeSqlFromScript("unique_toplevel_corpus_name.sql");
    }

    protected void createSchema() {
        log.info("creating ANNIS database schema (" + getSchemaVersion() + ")");
        executeSqlFromScript("schema.sql");

        // update schema version
        getJdbcTemplate().execute("DELETE FROM repository_metadata WHERE \"name\"='schema-version'");

        getJdbcTemplate().execute("INSERT INTO repository_metadata " + "VALUES ('schema-version', '"
                + StringUtils.replace(getSchemaVersion(), "'", "''") + "');");

    }

    protected void createSchemaIndexes() {
        log.info("creating ANNIS database schema indexes (" + getDatabaseSchemaVersion() + ")");
        executeSqlFromScript("schemaindex.sql");
    }

    protected void populateSchema() {
        log.info("populating the schemas with default values");
        bulkloadTableFromResource("resolver_vis_map",
                new FileSystemResource(new File(getScriptPath(), FILE_RESOLVER_VIS_MAP + annisFileSuffix)));
        // update the sequence
        executeSqlFromScript("update_resolver_sequence.sql");

        log.info("creating immutable functions for extracting annotations");
        executeSqlFromScript("functions_get.sql");
    }

    /**
     * Get the real schema name and version as used by the database.
     *
     * @return
     */
    @Transactional(readOnly = true, propagation = Propagation.REQUIRED)
    public String getDatabaseSchemaVersion() {
        try {

            List<Map<String, Object>> result = getJdbcTemplate()
                    .queryForList("SELECT \"value\" FROM repository_metadata WHERE \"name\"='schema-version'");

            String schema = result.size() > 0 ? (String) result.get(0).get("value") : "";
            return schema;
        } catch (DataAccessException ex) {
            String error = "Wrong database schema (too old to get the exact number), "
                    + "please initialize the database.";
            log.error(error);
        }
        return "";
    }

    public boolean checkDatabaseSchemaVersion() throws AnnisException {
        String dbSchemaVersion = getDatabaseSchemaVersion();
        if (getSchemaVersion() != null && !getSchemaVersion().equalsIgnoreCase(dbSchemaVersion)) {
            String error = "Wrong database schema \"" + dbSchemaVersion + "\", please initialize the database.";
            log.error(error);
            throw new AnnisException(error);
        }
        return true;
    }

    public void initializeDatabase(String host, String port, String database, String user, String password,
            String defaultDatabase, String superUser, String superPassword, boolean useSSL, String pgSchema) {
        // connect as super user to the default database to create new user and database
        if (superPassword != null) {
            log.info("Creating Annis database and user.");
            getDataSource().setInnerDataSource(
                    createDataSource(host, port, defaultDatabase, superUser, superPassword, useSSL, pgSchema));

            createDatabaseAndUser(database, user, password);
        }
        // switch to new database as new user for the rest of the initialization procedure
        getDataSource()
                .setInnerDataSource(createDataSource(host, port, database, user, password, useSSL, pgSchema));

        //
        if (pgSchema != null && !"public".equalsIgnoreCase(pgSchema)) {
            pgSchema = pgSchema.toLowerCase().replaceAll("[^a-z0-9]", "_");
            log.info("creating PostgreSQL schema {}", pgSchema);
            // we have to create a schema before we can use it
            try {
                getJdbcTemplate().execute("CREATE SCHEMA " + pgSchema + ";");
            } catch (DataAccessException ex) {
                // ignore if the schema already exists
                log.info("schema " + pgSchema + " already exists");
            }
        }

        setupDatabase();

    }

    private void createDatabaseAndUser(String database, String user, String password) {
        dropDatabase(database);
        dropUser(user);
        createUser(user, password);
        createDatabase(database, user);

        installPlPgSql();
    }

    private void setupDatabase() {
        createFunctionUniqueToplevelCorpusName();
        createSchema();
        createSchemaIndexes();
        populateSchema();
    }

    private BasicDataSource createDataSource(File dbProperties) throws IOException, URISyntaxException {
        BasicDataSource result;

        Properties props = new Properties();
        try (InputStream is = new FileInputStream(dbProperties)) {
            props.load(is);

            String rawJdbcURL = props.getProperty("datasource.url").trim();

            rawJdbcURL = StringUtils.removeStart(rawJdbcURL, "jdbc:");
            URI jdbcURL = new URI(rawJdbcURL);

            result = createDataSource(jdbcURL.getHost(), "" + jdbcURL.getPort(), jdbcURL.getPath().substring(1), // remove the "/" at the beginning
                    props.getProperty("datasource.username"), props.getProperty("datasource.password"),
                    "true".equalsIgnoreCase(props.getProperty("datasource.ssl")),
                    props.getProperty("datasource.schema"));
        }

        return result;
    }

    private BasicDataSource createDataSource(String host, String port, String database, String user,
            String password, boolean useSSL, String schema) {

        String url = "jdbc:postgresql://" + host + ":" + port + "/" + database;

        // DriverManagerDataSource is deprecated
        // return new DriverManagerDataSource("org.postgresql.Driver", url, user, password);
        BasicDataSource result = new BasicDataSource();
        result.setUrl(url);
        if (useSSL) {
            result.setConnectionProperties("ssl=true");
        }
        result.setUsername(user);
        result.setPassword(password);
        result.setValidationQuery("SELECT 1;");
        result.setAccessToUnderlyingConnectionAllowed(true);
        if (schema == null) {
            schema = "public";
        }
        result.setConnectionInitSqls(Arrays.asList("SET search_path TO \"$user\"," + schema));

        result.setDriverClassName("org.postgresql.Driver");

        return result;
    }

    /**
     * Reads ANNIS files from several directories.
     *
     * @param path Specifies the path to the corpora, which should be imported.
     * @param aliasName An alias name for this corpus. Can be null.
     * @param overwrite If set to true conflicting top level corpora are deleted.
     * @param waitForOtherTasks If true wait for other tasks to finish, if false
     * abort.
     *
     * @return true if successful
     */
    @Transactional(readOnly = false, propagation = Propagation.REQUIRES_NEW, isolation = Isolation.READ_COMMITTED)
    public boolean importCorpus(String path, String aliasName, boolean overwrite, boolean waitForOtherTasks) {

        // check schema version first
        checkDatabaseSchemaVersion();

        if (!lockRepositoryMetadataTable(waitForOtherTasks)) {
            log.error("Another import is currently running");
            return false;
        }

        // explicitly unset any timeout
        getJdbcTemplate().update("SET statement_timeout TO 0");

        ANNISFormatVersion annisFormatVersion = getANNISFormatVersion(path);

        if (annisFormatVersion == ANNISFormatVersion.V3_3) {
            return importVersion4(path, aliasName, overwrite, annisFormatVersion);
        } else if (annisFormatVersion == ANNISFormatVersion.V3_1 || annisFormatVersion == ANNISFormatVersion.V3_2) {
            return importVersion3(path, aliasName, overwrite, annisFormatVersion);
        }

        log.error("Unknown ANNIS import format version");
        return false;
    }

    private boolean importVersion4(String path, String aliasName, boolean overwrite, ANNISFormatVersion version) {
        this.annisFileSuffix = ".annis";
        createStagingAreaV33(temporaryStagingArea);
        bulkImport(path, version);

        String toplevelCorpusName = getTopLevelCorpusFromTmpArea();

        // remove conflicting top level corpora, when override is set to true.
        if (overwrite) {
            deleteCorpusDao.checkAndRemoveTopLevelCorpus(toplevelCorpusName);
        } else {
            checkTopLevelCorpus();
        }

        applyConstraints();
        createStagingAreaIndexes(version);

        fixResolverVisMapTable(toplevelCorpusName, tableInStagingArea(FILE_RESOLVER_VIS_MAP));
        analyzeStagingTables();

        addDocumentNameMetaData();

        Offsets offsets = calculateOffsets();
        long corpusID = getNewToplevelCorpusID(offsets);
        createNodeIdMapping();

        importBinaryData(path, toplevelCorpusName);

        extendStagingText(corpusID);
        extendStagingExampleQueries(corpusID);

        analyzeAutoGeneratedQueries(corpusID);

        computeCorpusStatistics(path);

        analyzeStagingTables();

        insertCorpus(corpusID, offsets);

        computeCorpusPath(corpusID);

        createAnnotations(corpusID);

        createAnnoCategory(corpusID);

        // create the new facts table partition
        createFacts(corpusID, version, offsets);

        if (temporaryStagingArea) {
            dropStagingArea();
        }

        // create empty corpus properties file
        if (getAnnisDao().getCorpusConfigurationSave(toplevelCorpusName) == null) {
            log.info("creating new corpus.properties file");
            getAnnisDao().setCorpusConfiguration(toplevelCorpusName, new Properties());
        }

        analyzeFacts(corpusID);
        analyzeTextTable(toplevelCorpusName);
        generateExampleQueries(corpusID);

        if (aliasName != null && !aliasName.isEmpty()) {
            addCorpusAlias(corpusID, aliasName);
        }
        return true;
    }

    private boolean importVersion3(String path, String aliasName, boolean overwrite, ANNISFormatVersion version) {
        this.annisFileSuffix = ".tab";

        createStagingAreaV32(temporaryStagingArea);
        bulkImport(path, version);

        String toplevelCorpusName = getTopLevelCorpusFromTmpArea();

        // remove conflicting top level corpora, when override is set to true.
        if (overwrite) {
            deleteCorpusDao.checkAndRemoveTopLevelCorpus(toplevelCorpusName);
        } else {
            checkTopLevelCorpus();
        }

        createStagingAreaIndexes(version);

        fixResolverVisMapTable(toplevelCorpusName, tableInStagingArea(FILE_RESOLVER_VIS_MAP));
        computeTopLevelCorpus();
        analyzeStagingTables();

        computeLeftTokenRightToken();

        removeUnecessarySpanningRelations();

        addUniqueNodeNameAppendix();
        adjustRankPrePost();
        adjustTextId();
        addDocumentNameMetaData();

        Offsets offsets = calculateOffsets();
        long corpusID = getNewToplevelCorpusID(offsets);
        createNodeIdMapping();

        importBinaryData(path, toplevelCorpusName);

        extendStagingText(corpusID);
        extendStagingExampleQueries(corpusID);

        analyzeAutoGeneratedQueries(corpusID);

        computeRealRoot();
        computeLevel();
        computeCorpusStatistics(path);
        computeSpanFromSegmentation();

        applyConstraints();
        analyzeStagingTables();

        insertCorpus(corpusID, offsets);

        computeCorpusPath(corpusID);

        createAnnotations(corpusID);

        createAnnoCategory(corpusID);

        // create the new facts table partition
        createFacts(corpusID, version, offsets);

        if (temporaryStagingArea) {
            dropStagingArea();
        }

        // create empty corpus properties file
        if (getAnnisDao().getCorpusConfigurationSave(toplevelCorpusName) == null) {
            log.info("creating new corpus.properties file");
            getAnnisDao().setCorpusConfiguration(toplevelCorpusName, new Properties());
        }

        analyzeFacts(corpusID);
        analyzeTextTable(toplevelCorpusName);
        generateExampleQueries(corpusID);

        if (aliasName != null && !aliasName.isEmpty()) {
            addCorpusAlias(corpusID, aliasName);
        }
        return true;
    }

    ///// Subtasks of importing a corpus
    protected void dropIndexes() {
        log.info("dropping indexes");
        for (String index : listIndexesOnTables(allTables())) {
            log.debug("dropping index: " + index);
            getJdbcTemplate().execute("DROP INDEX " + index);
        }
    }

    void createStagingAreaV33(boolean useTemporary) {
        log.info("creating staging area for import format version 3.3");
        MapSqlParameterSource args = makeArgs().addValue(":tmp", useTemporary ? "TEMPORARY" : "UNLOGGED");
        executeSqlFromScript("staging_area.sql", args);
    }

    void createStagingAreaV32(boolean useTemporary) {
        log.info("creating staging area for import format version 3.1/3.2");
        MapSqlParameterSource args = makeArgs().addValue(":tmp", useTemporary ? "TEMPORARY" : "UNLOGGED");
        executeSqlFromScript("staging_area_v32.sql", args);
    }

    /**
     * Reads tab seperated files from the filesystem, but it takes only files into
     * account with the {@link DefaultAdministrationDao#REL_ANNIS_FILE_SUFFIX}
     * suffix. Further it is straight forward except for the
     * {@link DefaultAdministrationDao#FILE_RESOLVER_VIS_MAP} and the
     * {@link DefaultAdministrationDao#EXAMPLE_QUERIES_TAB}. This is done by this
     * method automatically.
     *
     * <ul>
     *
     * <li>{@link DefaultAdministrationDao#FILE_RESOLVER_VIS_MAP}: For backwards
     * compatibility, the columns must be counted, since there exists one
     * additional column for visibility behaviour of visualizers.</li>
     *
     * <li>{@link DefaultAdministrationDao#EXAMPLE_QUERIES_TAB}: Takes into
     * account the state of {@link #generateExampleQueries}.</li>
     *
     * </ul>
     *
     * @param path The path to the ANNIS files. The files have to have this suffix
     * @param version {@link DefaultAdministrationDao#REL_ANNIS_FILE_SUFFIX}
     */
    void bulkImport(String path, ANNISFormatVersion version) {
        log.info("bulk-loading data");

        for (String table : importedTables) {
            if (table.equalsIgnoreCase(FILE_RESOLVER_VIS_MAP)) {
                importResolverVisMapTable(path, table);
            }
            // check if example query exists. If not copy it from the resource folder.
            else if (table.equalsIgnoreCase(EXAMPLE_QUERIES_TAB)) {
                File f = new File(path, table + annisFileSuffix);
                if (f.exists()) {
                    log.info(table + annisFileSuffix + " file exists");
                    bulkloadTableFromResource(tableInStagingArea(table), new FileSystemResource(f));

                    if (generateExampleQueries == (EXAMPLE_QUERIES_CONFIG.IF_MISSING)) {
                        generateExampleQueries = EXAMPLE_QUERIES_CONFIG.FALSE;
                    }
                } else {
                    if (generateExampleQueries == EXAMPLE_QUERIES_CONFIG.IF_MISSING) {
                        generateExampleQueries = EXAMPLE_QUERIES_CONFIG.TRUE;
                    }

                    log.info(table + annisFileSuffix + " file not found");
                }
            } else if (table.equalsIgnoreCase("node")) {
                bulkImportNode(path, version);
            } else {
                bulkloadTableFromResource(tableInStagingArea(table),
                        new FileSystemResource(new File(path, table + annisFileSuffix)));
            }
        }
    }

    private void bulkImportNode(String path, ANNISFormatVersion version) {
        // check column number by reading first line
        File nodeTabFile = new File(path, "node" + annisFileSuffix);
        try (BufferedReader reader = new BufferedReader(
                new InputStreamReader(new FileInputStream(nodeTabFile), "UTF-8"));) {

            String firstLine = reader.readLine();

            int columnNumber = firstLine == null ? 13 : StringUtils.splitPreserveAllTokens(firstLine, '\t').length;
            if (version == ANNISFormatVersion.V3_3 || version == ANNISFormatVersion.V3_2) {
                // new node table with segmentations
                // no special handling needed
                bulkloadTableFromResource(tableInStagingArea("node"), new FileSystemResource(nodeTabFile));
            } else if (version == ANNISFormatVersion.V3_1) {
                getJdbcTemplate().execute("DROP TABLE IF EXISTS _tmpnode;");
                // old node table without segmentations
                // create temporary table for  bulk import
                getJdbcTemplate().execute("CREATE TEMPORARY TABLE _tmpnode" + "\n(\n" + "id bigint,\n"
                        + "text_ref integer,\n" + "corpus_ref integer,\n" + "namespace varchar,\n"
                        + "name varchar,\n" + "\"left\" integer,\n" + "\"right\" integer,\n"
                        + "token_index integer,\n" + "continuous boolean,\n" + "span varchar\n" + ");");

                bulkloadTableFromResource("_tmpnode", new FileSystemResource(nodeTabFile));

                log.info("copying nodes from temporary helper table into staging area");
                getJdbcTemplate().execute("INSERT INTO " + tableInStagingArea("node") + "\n"
                        + "  SELECT id, text_ref, corpus_ref, namespace AS layer, name, \"left\", "
                        + "\"right\", token_index, "
                        + "NULL AS seg_name, NULL AS seg_left, NULL AS seg_left, continuous, " + "span\n"
                        + "FROM _tmpnode");
            } else {
                throw new RuntimeException("Illegal number of columns in node" + annisFileSuffix + ", "
                        + "should be 13 or 10 but was " + columnNumber);
            }
        } catch (IOException ex) {
            log.error(null, ex);
        }
    }

    void createStagingAreaIndexes(ANNISFormatVersion version) {
        log.info("creating indexes for staging area");
        if (version == ANNISFormatVersion.V3_3) {
            executeSqlFromScript("indexes_staging_v33.sql");
        } else {
            executeSqlFromScript("indexes_staging_v32.sql");
        }
    }

    void computeTopLevelCorpus() {
        log.info("computing top-level corpus");
        executeSqlFromScript("toplevel_corpus.sql");
    }

    void importBinaryData(String path, String toplevelCorpusName) {
        log.info("importing all binary data from ExtData");
        File extData = new File(path + "/ExtData");
        if (extData.canRead() && extData.isDirectory()) {
            // import toplevel corpus media files
            File[] topFiles = extData.listFiles((FileFilter) FileFileFilter.FILE);
            for (File data : topFiles) {
                String extension = FilenameUtils.getExtension(data.getName());
                try {
                    if (mimeTypeMapping.containsKey(extension)) {
                        log.info("import " + data.getCanonicalPath() + " to staging area");

                        // search for corpus_ref
                        String sqlScript = "SELECT id FROM _corpus WHERE top_level IS TRUE LIMIT 1";
                        long corpusID = getJdbcTemplate().queryForLong(sqlScript);

                        importSingleFile(data.getCanonicalPath(), toplevelCorpusName, corpusID);
                    } else {
                        log.warn("not importing " + data.getCanonicalPath() + " since file type is unknown");
                    }
                } catch (IOException ex) {
                    log.error("no canonical path given", ex);
                }
            }

            // get each subdirectory (which corresponds to an document name)
            File[] documents = extData.listFiles((FileFilter) DirectoryFileFilter.DIRECTORY);
            for (File doc : documents) {
                if (doc.isDirectory() && doc.canRead()) {
                    File[] dataFiles = doc.listFiles((FileFilter) FileFileFilter.FILE);
                    for (File data : dataFiles) {
                        String extension = FilenameUtils.getExtension(data.getName());
                        try {
                            if (mimeTypeMapping.containsKey(extension)) {
                                log.info("import " + data.getCanonicalPath() + " to staging area");

                                // search for corpus_ref
                                String sqlScript = "SELECT id FROM _corpus WHERE \"name\" = ? LIMIT 1";
                                long corpusID = getJdbcTemplate().queryForLong(sqlScript, doc.getName());

                                importSingleFile(data.getCanonicalPath(), toplevelCorpusName, corpusID);
                            } else {
                                log.warn(
                                        "not importing " + data.getCanonicalPath() + " since file type is unknown");
                            }
                        } catch (IOException ex) {
                            log.error("no canonical path given", ex);
                        }
                    }
                }
            }
        }
    }

    /**
     * Imports a single binary file.
     *
     * @param file Specifies the file to be imported.
     * @param corpusRef Assigns the file this corpus.
     * @param toplevelCorpusName The toplevel corpus name
     */
    private void importSingleFile(String file, String toplevelCorpusName, long corpusRef) {

        BinaryImportHelper preStat = new BinaryImportHelper(file, getRealDataDir(), toplevelCorpusName, corpusRef,
                mimeTypeMapping);
        getJdbcTemplate().execute(BinaryImportHelper.SQL, preStat);

    }

    /**
     * Updates the example queries table in the staging area. The final toplevel
     * corpus must already be computed.
     *
     * @param toplevelID The final top level corpus id.
     *
     */
    void extendStagingExampleQueries(long toplevelID) {
        log.info("extending _example_queries");
        executeSqlFromScript("extend_staging_example_queries.sql", makeArgs().addValue(":id", toplevelID));
    }

    void extendStagingText(long toplevelID) {
        log.info("extending _text");
        executeSqlFromScript("extend_staging_text.sql", makeArgs().addValue(":id", toplevelID));
    }

    void computeLeftTokenRightToken() {
        log.info("computing values for struct.left_token and struct.right_token");
        executeSqlFromScript("left_token_right_token.sql");

        // re-analyze node since we added new columns
        log.info("analyzing node");
        getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("node"));

    }

    void computeRealRoot() {
        log.info("computing real root for rank");
        executeSqlFromScript("root.sql");
    }

    void computeLevel() {
        log.info("computing values for rank.level (dominance and pointing relations)");
        executeSqlFromScript("level.sql");

        log.info("computing values for rank.level (coverage)");
        executeSqlFromScript("level_coverage.sql");
    }

    void computeCorpusStatistics(String path) {

        File f = new File(path);
        String absolutePath = path;
        try {
            absolutePath = f.getCanonicalPath();
        } catch (IOException ex) {
            log.error("Something went really wrong when calculating the canonical path", ex);
        }

        log.info("computing statistics for top-level corpus");
        MapSqlParameterSource args = makeArgs().addValue(":path", absolutePath);
        executeSqlFromScript("corpus_stats.sql", args);
    }

    void computeCorpusPath(long corpusID) {
        MapSqlParameterSource args = makeArgs().addValue(":id", corpusID);
        log.info("computing path information of the corpus tree for corpus with ID " + corpusID);
        executeSqlFromScript("compute_corpus_path.sql", args);
    }

    void addDocumentNameMetaData() {
        log.info("add the document name as metadata");
        executeSqlFromScript("adddocmetadata.sql");
    }

    protected void adjustRankPrePost() {
        log.info("updating pre and post order in _rank");
        executeSqlFromScript("adjustrankprepost.sql");
        log.info("analyzing _rank");
        getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("rank"));
    }

    protected void adjustTextId() {
        log.info("updating id in _text and text_ref in _node");
        executeSqlFromScript("adjusttextid.sql");
        log.info("analyzing _node and _text");
        getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("text"));
        getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("node"));
    }

    protected void addUniqueNodeNameAppendix() {
        // first check if this is actually necessary
        log.info("check if node names are unique");

        getJdbcTemplate().execute("ALTER TABLE _node ADD COLUMN unique_name_appendix varchar;");

        List<Integer> checkDuplicate = getJdbcTemplate().queryForList(
                "SELECT COUNT(*) from _node GROUP BY \"name\", corpus_ref HAVING COUNT(*) > 1 LIMIT 1",
                Integer.class);
        if (checkDuplicate.isEmpty()) {
            log.info("node names are unique, no update necessary");
        } else {
            log.info("add an unique node name appendix");
            executeSqlFromScript("unique_node_name_appendix.sql");
        }
    }

    /**
     *
     * @return the new corpus ID
     */
    void createNodeIdMapping() {

        log.info("creating node ID mapping (for properly sorted IDs)");
        executeSqlFromScript("node_id_mapping.sql");

    }
    /*
    private long getRecentCorpusID()
    {
      int numOfEntries = getJdbcTemplate().queryForObject(
        "SELECT COUNT(*) from corpus_stats", Integer.class);
        
      long recentCorpusId = 0;
        
      if (numOfEntries > 0)
      {
        recentCorpusId = getJdbcTemplate().queryForObject(
    "SELECT max(id) FROM corpus_stats", Long.class);
        log.debug("the id from recently imported corpus: {}", recentCorpusId);
      }
      return recentCorpusId;
    }
    */

    long getNewToplevelCorpusID(Offsets offsets) {
        log.info("query for the new corpus ID");

        long maxLocalID = getJdbcTemplate().queryForObject("SELECT MAX(id) FROM _corpus WHERE top_level IS TRUE",
                Long.class);

        long result = maxLocalID + offsets.getCorpusID();

        log.info("new corpus ID is " + result);
        return result;
    }

    void computeSpanFromSegmentation() {
        log.info("computing span value for segmentation nodes");
        executeSqlFromScript("span_from_segmentation.sql");
    }

    void applyConstraints() {
        log.info("activating relational constraints");
        executeSqlFromScript("constraints.sql");
    }

    private Offsets calculateOffsets() {
        log.info("querying ID offsets");

        long offsetCorpusID = getJdbcTemplate()
                .queryForObject("SELECT COALESCE((SELECT max(max_corpus_id)+1 FROM corpus_stats),0)", Long.class);
        long offsetCorpusPost = getJdbcTemplate()
                .queryForObject("SELECT COALESCE((SELECT max(max_corpus_post)+1 FROM corpus_stats),0)", Long.class);

        long offsetNodeID = getJdbcTemplate()
                .queryForObject("SELECT COALESCE((SELECT max(max_node_id)+1 FROM corpus_stats),0)", Long.class);

        return new Offsets(offsetCorpusID, offsetCorpusPost, offsetNodeID);
    }

    void insertCorpus(long corpusID, Offsets offsets) {

        MapSqlParameterSource args = offsets.makeArgs().addValue(":id", corpusID);

        log.info("moving corpus from staging area to main db");
        for (String table : tablesToCopyManually) {
            log.debug("moving table \"{}\" from staging area to main db", table);
            StringBuilder sql = new StringBuilder();

            String predefinedFrom = tableInsertFrom == null ? null : tableInsertFrom.get(table);
            String predefinedSelect = tableInsertSelect == null ? null : tableInsertSelect.get(table);

            if (predefinedFrom != null || predefinedSelect != null) {
                if (predefinedFrom == null) {
                    predefinedFrom = predefinedSelect;
                }

                sql.append("INSERT INTO ");
                sql.append(table);
                sql.append(" ( ");
                sql.append(predefinedSelect);

                sql.append(" ) (SELECT ");
                sql.append(predefinedFrom);
                sql.append(" FROM ");
                sql.append(tableInStagingArea(table)).append(")");
            } else {
                sql.append("INSERT INTO ");
                sql.append(table);
                sql.append(" (SELECT * FROM ");
                sql.append(tableInStagingArea(table)).append(")");
            }
            executeSql(sql.toString(), args);

        }
    }

    void dropStagingArea() {
        log.info("dropping staging area");

        // tables must be dropped in reverse order
        List<String> tables = importedAndCreatedTables();
        Collections.reverse(tables);

        for (String table : tables) {
            getJdbcTemplate().execute("DROP TABLE " + tableInStagingArea(table));
        }

    }

    void dropMaterializedTables() {
        log.info("dropping materialized tables");

        getJdbcTemplate().execute("DROP TABLE facts");

    }

    void analyzeStagingTables() {
        log.info("analyzing staging area");
        for (String t : importedTables) {
            log.debug("analyzing " + t);
            getJdbcTemplate().execute("ANALYZE " + tableInStagingArea(t));
        }
    }

    void createAnnotations(long corpusID) {
        MapSqlParameterSource args = makeArgs().addValue(":id", corpusID);
        log.info("creating annotations table for corpus with ID " + corpusID);
        executeSqlFromScript("annotations.sql", args);

        log.info("indexing annotations table for corpus with ID " + corpusID);
        executeSqlFromScript("indexes_annotations.sql", args);
    }

    void createAnnoCategory(long corpusID) {
        MapSqlParameterSource args = makeArgs().addValue(":id", corpusID);
        log.info("creating annotation category table for corpus with ID " + corpusID);
        executeSqlFromScript("annotation_category.sql", args);
    }

    void analyzeFacts(long corpusID) {
        log.info("analyzing facts table for corpus with ID " + corpusID);
        getJdbcTemplate().execute("ANALYZE facts_" + corpusID);
    }

    /**
     * Update the statistics for the "facts" table as a whole.
     */
    @Transactional(propagation = Propagation.REQUIRED)
    public void analyzeParentFacts() {
        log.info("analyzing parent facts table");
        // explicitly unset any timeout. Since this function might be called independent
        // from the import process we have to set it manually.
        getJdbcTemplate().update("SET statement_timeout TO 0");
        getJdbcTemplate().execute("ANALYZE facts");
    }

    void createFacts(long corpusID, ANNISFormatVersion version, Offsets offsets) {
        MapSqlParameterSource args = offsets.makeArgs().addValue(":id", corpusID);

        log.info("creating materialized facts table for corpus with ID " + corpusID);
        if (version == ANNISFormatVersion.V3_3) {
            executeSqlFromScript("facts.sql", args);
        } else {
            executeSqlFromScript("facts_v32.sql", args);
        }

        log.info("indexing the new facts table (general indexes)");
        executeSqlFromScript("indexes.sql", args);

        log.info("indexing the new facts table (edge related indexes)");
        executeSqlFromScript("indexes_edge.sql", args);

    }

    void removeUnecessarySpanningRelations() {
        log.info("setting \"continuous\" to a correct value");
        executeSqlFromScript("set_continuous.sql");

        // re-analyze node since we added new columns
        log.info("analyzing node");
        getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("node"));

        log.info("removing unnecessary span relations");
        executeSqlFromScript("remove_span_relations.sql");

    }

    ///// Other sub tasks
    public List<Long> listToplevelCorpora() {
        String sql = "SELECT id FROM corpus WHERE top_level = 'y'";

        return getJdbcTemplate().query(sql, ParameterizedSingleColumnRowMapper.newInstance(Long.class));
    }

    /**
     * Delete files not used by this instance in the data directory.
     */
    @Transactional(readOnly = true)
    public void cleanupData() {

        List<String> allFilesInDatabaseList = getJdbcTemplate()
                .queryForList("SELECT filename FROM media_files AS m", String.class);

        File dataDir = getRealDataDir();

        Set<File> allFilesInDatabase = new HashSet<>();
        for (String singleFileName : allFilesInDatabaseList) {
            allFilesInDatabase.add(new File(dataDir, singleFileName));
        }

        log.info("Cleaning up the data directory");
        // go through each file of the folder and check if it is not included
        File[] childFiles = dataDir.listFiles();
        if (childFiles != null) {
            for (File f : childFiles) {
                if (f.isFile() && !allFilesInDatabase.contains(f)) {
                    if (!f.delete()) {
                        log.warn("Could not delete {}", f.getAbsolutePath());
                    }
                }
            }
        }
    }

    public List<Map<String, Object>> listCorpusStats() {
        return getJdbcTemplate().queryForList("SELECT * FROM corpus_info ORDER BY name");
    }

    /**
     * Lists the corpora using the connection information of a given
     * "database.properties". file
     *
     * @param databaseProperties
     * @return
     */
    public List<Map<String, Object>> listCorpusStats(File databaseProperties) {
        List<Map<String, Object>> result = new LinkedList<>();

        DataSource origDataSource = getDataSource().getInnerDataSource();
        try {
            if (databaseProperties != null) {
                getDataSource().setInnerDataSource(createDataSource(databaseProperties));
            }
            result = getJdbcTemplate().queryForList("SELECT * FROM corpus_info ORDER BY name");
        } catch (IOException | URISyntaxException | DataAccessException ex) {
            if (databaseProperties == null) {
                log.error("Could not query corpus list", ex);
            } else {
                log.error("Could not query corpus list for the file " + databaseProperties.getAbsolutePath(), ex);
            }
        } finally {
            getDataSource().setInnerDataSource(origDataSource);
        }
        return result;
    }

    public List<String> listUsedIndexes() {
        log.info("retrieving list of used indexes");
        return listIndexDefinitions(true);
    }

    public List<String> listUnusedIndexes() {
        log.info("retrieving list of unused indexes");
        return listIndexDefinitions(false);
    }

    /**
     * Provides a list where the keys are the aliases and the values are the
     * corpus names.
     *
     * @param databaseProperties
     * @return
     */
    public Multimap<String, String> listCorpusAlias(File databaseProperties) {
        Multimap<String, String> result = TreeMultimap.create();

        DataSource origDataSource = getDataSource().getInnerDataSource();
        try {
            if (databaseProperties != null) {
                getDataSource().setInnerDataSource(createDataSource(databaseProperties));
            }
            result = getJdbcTemplate().query(
                    "SELECT a.alias AS alias, c.name AS corpus\n" + "FROM corpus_alias AS a, corpus AS c\n"
                            + "WHERE\n" + " a.corpus_ref = c.id",
                    new ResultSetExtractor<Multimap<String, String>>() {

                        @Override
                        public Multimap<String, String> extractData(ResultSet rs)
                                throws SQLException, DataAccessException {
                            Multimap<String, String> data = TreeMultimap.create();
                            while (rs.next()) {
                                // alias -> corpus name
                                data.put(rs.getString(1), rs.getString(2));
                            }
                            return data;
                        }
                    });

        } catch (IOException | URISyntaxException | DataAccessException ex) {
            if (databaseProperties == null) {
                log.error("Could not query corpus list", ex);
            } else {
                log.error("Could not query corpus list for the file " + databaseProperties.getAbsolutePath(), ex);
            }
        } finally {
            getDataSource().setInnerDataSource(origDataSource);
        }

        return result;
    }

    @Transactional(readOnly = true)
    public UserConfig retrieveUserConfig(final String userName) {
        String sql = "SELECT * FROM user_config WHERE id=?";
        UserConfig config = getJdbcTemplate().query(sql, new Object[] { userName },
                new ResultSetExtractor<UserConfig>() {
                    @Override
                    public UserConfig extractData(ResultSet rs) throws SQLException, DataAccessException {

                        // default to empty config
                        UserConfig c = new UserConfig();

                        if (rs.next()) {
                            try {
                                c = jsonMapper.readValue(rs.getString("config"), UserConfig.class);
                            } catch (IOException ex) {
                                log.error("Could not parse JSON that is stored in database (user configuration)",
                                        ex);
                            }
                        }
                        return c;
                    }
                });

        return config;
    }

    @Transactional(readOnly = false)
    public void storeUserConfig(String userName, UserConfig config) {
        String sqlUpdate = "UPDATE user_config SET config=?::json WHERE id=?";
        String sqlInsert = "INSERT INTO user_config(id, config) VALUES(?,?)";
        try {
            String jsonVal = jsonMapper.writeValueAsString(config);

            // if no row was affected there is no entry yet and we should create one
            if (getJdbcTemplate().update(sqlUpdate, jsonVal, userName) == 0) {
                getJdbcTemplate().update(sqlInsert, userName, jsonVal);
            }
        } catch (IOException ex) {
            log.error("Cannot serialize user config JSON for database.", ex);
        }
    }

    public void addCorpusAlias(long corpusID, String alias) {
        getJdbcTemplate().update(
                "INSERT INTO corpus_alias (alias, corpus_ref)\n" + "VALUES(\n" + "  ?, \n" + "  ?\n" + ");", alias,
                corpusID);
    }

    public void addCorpusAlias(String corpusName, String alias) {
        getJdbcTemplate().update("INSERT INTO corpus_alias (alias, corpus_ref)\n" + "SELECT ? AS alias, c.id\n"
                + "FROM corpus AS c WHERE c.top_level AND c.name=? LIMIT 1;", alias, corpusName);
    }

    ///// Helpers
    private List<String> importedAndCreatedTables() {
        List<String> tables = new ArrayList<>();
        tables.addAll(Arrays.asList(importedTables));
        tables.addAll(Arrays.asList(createdTables));
        return tables;
    }

    private List<String> allTables() {
        List<String> tables = new ArrayList<>();
        tables.addAll(Arrays.asList(importedTables));
        tables.addAll(Arrays.asList(createdTables));
        //tables.addAll(Arrays.asList(materializedTables));
        return tables;
    }

    private ParameterizedSingleColumnRowMapper<String> stringRowMapper() {
        return ParameterizedSingleColumnRowMapper.newInstance(String.class);
    }

    // executes an SQL script from $ANNIS_HOME/scripts
    @Transactional(propagation = Propagation.MANDATORY)
    public PreparedStatement executeSqlFromScript(String script) {
        return executeSqlFromScript(script, null);
    }

    // bulk-loads a table from a resource
    private void bulkloadTableFromResource(String table, Resource resource) {
        log.debug("bulk-loading data from '" + resource.getFilename() + "' into table '" + table + "'");
        String sql = "COPY " + table + " FROM STDIN WITH DELIMITER E'\t' NULL AS 'NULL'";

        try {
            // retrieve the currently open connection if running inside a transaction
            Connection originalCon = DataSourceUtils.getConnection(getDataSource());
            Connection con = originalCon;
            if (con instanceof DelegatingConnection) {
                DelegatingConnection<?> delCon = (DelegatingConnection<?>) con;
                con = delCon.getInnermostDelegate();
            }

            Preconditions.checkState(con instanceof PGConnection,
                    "bulk-loading only works with a PostgreSQL JDBC connection");

            // Postgres JDBC4 8.4 driver now supports the copy API
            PGConnection pgCon = (PGConnection) con;
            pgCon.getCopyAPI().copyIn(sql, resource.getInputStream());

            DataSourceUtils.releaseConnection(originalCon, getDataSource());

        } catch (SQLException e) {
            throw new DatabaseAccessException(e);
        } catch (IOException e) {
            throw new FileAccessException(e);
        }
    }

    // get a list of indexes on the imported Snd created tables tables which are not
    // auto-created by postgres (namely, primary key and unique constraints)
    // exploits the fact that the index has the same name as the constraint
    private List<String> listIndexesOnTables(List<String> tables) {
        String sql = "" + "SELECT indexname " + "FROM pg_indexes " + "WHERE tablename IN ("
                + StringUtils.repeat("?", ",", tables.size()) + ") " + "AND lower(indexname) NOT IN "
                + "   (SELECT lower(conname) FROM pg_constraint WHERE contype in ('p', 'u'))";

        return getJdbcTemplate().query(sql, tables.toArray(), stringRowMapper());
    }

    private List<String> listIndexDefinitions(boolean used) {
        return listIndexDefinitions(used, allTables());
    }

    /*
     * Returns the CREATE INDEX statement for all indexes on the Annis tables,
     * that are not auto-created by PostgreSQL (primary keys and unique
     * constraints).
     *
     * @param used   If True, return used indexes. If False, return unused indexes
     * (scan count is 0).
     */
    public List<String> listIndexDefinitions(boolean used, List<String> tables) {
        String scansOp = used ? "!=" : "=";
        String sql = "SELECT pg_get_indexdef(x.indexrelid) AS indexdef " + "FROM pg_index x, pg_class c "
                + "WHERE x.indexrelid = c.oid " + "AND c.relname IN ( "
                + StringUtils.repeat("?", ",", tables.size()) + ") " + "AND pg_stat_get_numscans(x.indexrelid) "
                + scansOp + " 0";
        return getJdbcTemplate().query(sql, tables.toArray(), stringRowMapper());
    }

    public List<String> listIndexDefinitions(String... tables) {
        String sql = "" + "SELECT pg_get_indexdef(x.indexrelid) AS indexdef "
                + "FROM pg_index x, pg_class c, pg_indexes i " + "WHERE x.indexrelid = c.oid "
                + "AND c.relname = i.indexname " + "AND i.tablename IN ( "
                + StringUtils.repeat("?", ",", tables.length) + " )";
        return getJdbcTemplate().query(sql, tables, new ParameterizedSingleColumnRowMapper<String>());
    }

    public List<String> listUsedIndexes(String... tables) {
        String sql = "" + "SELECT pg_get_indexdef(x.indexrelid) AS indexdef "
                + "FROM pg_index x, pg_class c, pg_indexes i " + "WHERE x.indexrelid = c.oid "
                + "AND c.relname = i.indexname " + "AND i.tablename IN ( "
                + StringUtils.repeat("?", ",", tables.length) + " ) "
                + "AND pg_stat_get_numscans(x.indexrelid) != 0";
        return getJdbcTemplate().query(sql, tables, new ParameterizedSingleColumnRowMapper<String>());
    }

    public boolean resetStatistics() {
        try {
            getJdbcTemplate().queryForList("SELECT pg_stat_reset()");
            return true;
        } catch (DataAccessException e) {
            return false;
        }
    }

    /**
     * Retrieves the name of the top level corpus in the corpus.tab file.
     *
     * <p>
     * At this point, the tab files must be in the staging area.</p>
     *
     * @return The name of the toplevel corpus or an empty String if no top level
     * corpus is found.
     */
    private String getTopLevelCorpusFromTmpArea() {
        String sql = "SELECT name FROM " + tableInStagingArea("corpus") + " WHERE type='CORPUS'\n"
                + "AND pre = (SELECT min(pre) FROM " + tableInStagingArea("corpus") + ")\n"
                + "AND post = (SELECT max(post) FROM " + tableInStagingArea("corpus") + ")";

        return getJdbcTemplate().query(sql, new ResultSetExtractor<String>() {
            @Override
            public String extractData(ResultSet rs) throws SQLException, DataAccessException {
                if (rs.next()) {
                    return rs.getString("name");
                } else {
                    return null;
                }
            }
        });
    }

    ///// Getter / Setter
    public boolean isTemporaryStagingArea() {
        return temporaryStagingArea;
    }

    public void setTemporaryStagingArea(boolean temporaryStagingArea) {
        this.temporaryStagingArea = temporaryStagingArea;
    }

    /**
     * Get the name and version of the schema this @{link AdministrationDao} is
     * configured to work with.
     *
     * @return
     */
    public String getSchemaVersion() {
        return schemaVersion;
    }

    public void setSchemaVersion(String schemaVersion) {
        this.schemaVersion = schemaVersion;
    }

    public Map<String, String> getMimeTypeMapping() {
        return mimeTypeMapping;
    }

    public void setMimeTypeMapping(Map<String, String> mimeTypeMapping) {
        this.mimeTypeMapping = mimeTypeMapping;
    }

    public Map<String, String> getTableInsertSelect() {
        return tableInsertSelect;
    }

    public void setTableInsertSelect(Map<String, String> tableInsertSelect) {
        this.tableInsertSelect = tableInsertSelect;
    }

    public Map<String, String> getTableInsertFrom() {
        return tableInsertFrom;
    }

    public void setTableInsertFrom(Map<String, String> tableInsertFrom) {
        this.tableInsertFrom = tableInsertFrom;
    }

    private void readOldResolverVisMapFormat(File resolver_vis_tab) {
        StringBuilder sb = new StringBuilder();
        sb.append("CREATE TABLE tmp_resolver_vis_map ");
        sb.append("( ");
        sb.append("\"corpus\"   varchar, ");
        sb.append("\"version\"    varchar, ");
        sb.append("\"namespace\"   varchar, ");
        sb.append("\"element\"    varchar, ");
        sb.append("\"vis_type\"   varchar NOT NULL, ");
        sb.append("\"display_name\"   varchar NOT NULL, ");
        sb.append("\"order\" integer default '0', ");
        sb.append("\"mappings\" varchar");
        sb.append(");");

        getJdbcTemplate().execute(sb.toString());

        bulkloadTableFromResource("tmp_resolver_vis_map", new FileSystemResource(resolver_vis_tab));

        sb = new StringBuilder();

        sb.append("INSERT INTO ");
        sb.append(tableInStagingArea(FILE_RESOLVER_VIS_MAP));
        sb.append("\n\t");
        sb.append(" (");
        sb.append("corpus, ");
        sb.append("version, ");
        sb.append("namespace, ");
        sb.append("element, ");
        sb.append("vis_type, ");
        sb.append("display_name, ");
        sb.append("\"order\", ");
        sb.append("mappings");
        sb.append(")");
        sb.append("\n");
        sb.append("SELECT tmp.corpus, ");
        sb.append("tmp.version, ");
        sb.append("tmp.namespace, ");
        sb.append("tmp.element, ");
        sb.append("tmp.vis_type, ");
        sb.append("tmp.display_name, ");
        sb.append("tmp.\"order\", ");
        sb.append("tmp.mappings");
        sb.append("\n\t");
        sb.append("FROM tmp_resolver_vis_map AS tmp; ");

        getJdbcTemplate().execute(sb.toString());
        getJdbcTemplate().execute("DROP TABLE tmp_resolver_vis_map;");
    }

    /**
     * Imported the old and the new version of the resolver_vis_map.tab. The new
     * version has an additional column for visibility status of the
     * visualization.
     *
     * @param path The path to the ANNIS file.
     * @param table The final table in the database of the resolver_vis_map table.
     */
    private void importResolverVisMapTable(String path, String table) {
        try {

            // count cols for detecting old resolver_vis_map table format
            File resolver_vis_tab = new File(path, table + annisFileSuffix);

            if (!resolver_vis_tab.isFile()) {
                return;
            }

            String firstLine;
            try (BufferedReader bReader = new BufferedReader(
                    new InputStreamReader(new FileInputStream(resolver_vis_tab), "UTF-8"))) {
                firstLine = bReader.readLine();
            }

            int cols = 9; // default number
            if (firstLine != null) {
                String[] entries = firstLine.split("\t");
                cols = entries.length;
                log.debug("the first row: {} amount of cols: {}", entries, cols);
            }

            switch (cols) {
            // old format
            case 8:
                readOldResolverVisMapFormat(resolver_vis_tab);
                break;
            // new format
            case 9:
                bulkloadTableFromResource(tableInStagingArea(table),
                        new FileSystemResource(new File(path, table + annisFileSuffix)));
                break;
            default:
                log.error("invalid amount of cols");
                throw new RuntimeException();
            }

        } catch (IOException | FileAccessException e) {
            log.error("could not read {}", table, e);
        }
    }

    /**
     * Removes any unwanted entries from the resolver_vis_map table
     *
     * @param toplevelCorpus
     * @param table
     */
    private void fixResolverVisMapTable(String toplevelCorpus, String table) {
        log.info("checking resolver_vis_map for errors");

        // delete all entries that reference a different corpus than the imported one
        int invalidRows = getJdbcTemplate().update("DELETE FROM " + table + " WHERE corpus <> ?", toplevelCorpus);
        if (invalidRows > 0) {
            log.warn(
                    "there were " + invalidRows + " rows in the resolver_vis_map that referenced the wrong corpus");
        }

    }

    /**
     * Generates example queries if no example queries tab file is defined by the
     * user.
     */
    private void generateExampleQueries(long corpusID) {
        // set in the annis.properties file.
        if (generateExampleQueries == EXAMPLE_QUERIES_CONFIG.TRUE) {
            queriesGenerator.generateQueries(corpusID);
        }
    }

    /**
     * @return the generateExampleQueries
     */
    public EXAMPLE_QUERIES_CONFIG isGenerateExampleQueries() {
        return generateExampleQueries;
    }

    /**
     * @param generateExampleQueries the generateExampleQueries to set
     */
    public void setGenerateExampleQueries(EXAMPLE_QUERIES_CONFIG generateExampleQueries) {
        this.generateExampleQueries = generateExampleQueries;
    }

    /**
     * Counts nodes and operators of the AQL example query and writes it back to
     * the staging area.
     *
     * @param corpusID specifies the corpus, the analyze things.
     *
     */
    private void analyzeAutoGeneratedQueries(long corpusID) {
        // read the example queries from the staging area
        List<ExampleQuery> exampleQueries = getJdbcTemplate().query("SELECT * FROM _" + EXAMPLE_QUERIES_TAB,
                new RowMapper<ExampleQuery>() {
                    @Override
                    public ExampleQuery mapRow(ResultSet rs, int i) throws SQLException {
                        ExampleQuery eQ = new ExampleQuery();
                        eQ.setExampleQuery(rs.getString("example_query"));
                        return eQ;
                    }
                });

        // count the nodes of the aql Query
        countExampleQueryNodes(exampleQueries);

        // fetch the operators
        getOperators(exampleQueries, "\\.(\\*)?|\\>|\\>\\*|_i_");

        writeAmountOfNodesBack(exampleQueries);
    }

    /**
     * Maps example queries to integer, which represents the amount of nodes of
     * the aql query.
     *
     */
    private void countExampleQueryNodes(List<ExampleQuery> exampleQueries) {

        for (ExampleQuery eQ : exampleQueries) {

            QueryData query = getAnnisDao().parseAQL(eQ.getExampleQuery(), null);

            int count = 0;
            for (List<QueryNode> qNodes : query.getAlternatives()) {
                count += qNodes.size();
            }

            eQ.setNodes(count);
        }
    }

    /**
     * Writes the counted nodes and the used operators back to the staging area.
     *
     */
    private void writeAmountOfNodesBack(List<ExampleQuery> exampleQueries) {
        StringBuilder sb = new StringBuilder();

        for (ExampleQuery eQ : exampleQueries) {
            sb.append("UPDATE ").append("_").append(EXAMPLE_QUERIES_TAB).append(" SET ");
            sb.append("nodes=").append(String.valueOf(eQ.getNodes()));
            sb.append(" WHERE example_query='");
            sb.append(eQ.getExampleQuery()).append("';\n");

            sb.append("UPDATE ").append("_").append(EXAMPLE_QUERIES_TAB).append(" SET ");
            sb.append("used_ops='").append(String.valueOf(eQ.getUsedOperators()));
            sb.append("' WHERE example_query='");
            sb.append(eQ.getExampleQuery()).append("';\n");
        }

        getJdbcTemplate().execute(sb.toString());
    }

    /**
     * Fetches operators used in the {@link ExampleQuery#getExampleQuery()} with a
     * given regex.
     *
     * @param exQueries Set the used operators property of each member.
     * @param regex The regex to search operators.
     */
    private void getOperators(List<ExampleQuery> exQueries, String regex) {

        Pattern opsRegex = Pattern.compile(regex);
        for (ExampleQuery eQ : exQueries) {
            List<String> ops = new ArrayList<>();
            Matcher m = opsRegex.matcher(eQ.getExampleQuery().replaceAll("\\s", ""));

            while (m.find()) {
                ops.add(m.group());
            }

            eQ.setUsedOperators("{" + StringUtils.join(ops, ",") + "}");
        }
    }

    /**
     * @return the queriesGenerator
     */
    public QueriesGenerator getQueriesGenerator() {
        return queriesGenerator;
    }

    /**
     * @param queriesGenerator the queriesGenerator to set
     */
    public void setQueriesGenerator(QueriesGenerator queriesGenerator) {
        this.queriesGenerator = queriesGenerator;
    }

    public DeleteCorpusDao getDeleteCorpusDao() {
        return deleteCorpusDao;
    }

    public void setDeleteCorpusDao(DeleteCorpusDao deleteCorpusDao) {
        this.deleteCorpusDao = deleteCorpusDao;
    }

    /**
     * Checks, if a already exists a corpus with the same name of the top level
     * corpus in the corpus.tab file. If this is the case an Exception is thrown
     * and the import is aborted.
     *
     * @throws
     * annis.administration.DefaultAdministrationDao.ConflictingCorpusException
     */
    private void checkTopLevelCorpus() throws ConflictingCorpusException {
        String corpusName = getTopLevelCorpusFromTmpArea();
        if (existConflictingTopLevelCorpus(corpusName)) {
            String msg = "There already exists a top level corpus with the name: " + corpusName;
            throw new ConflictingCorpusException(msg);
        }
    }

    private ANNISFormatVersion getANNISFormatVersion(String path) {
        File pathDir = new File(path);
        if (pathDir.isDirectory()) {
            // check for existance of "annis.version" file
            File versionFile = new File(pathDir, "annis.version");
            if (versionFile.isFile() && versionFile.exists()) {
                try {
                    // read the first line
                    String firstLine = Files.readFirstLine(versionFile, Charsets.UTF_8);
                    if ("3.3".equals(firstLine.trim())) {
                        return ANNISFormatVersion.V3_3;
                    }
                } catch (IOException ex) {
                    log.warn("Could not read annis.version file", ex);
                }
            } else {
                // we have to distinguish between 3.1 and 3.2
                File nodeTab = new File(pathDir, "node.tab");
                if (nodeTab.isFile() && nodeTab.exists()) {
                    try {
                        String firstLine = Files.readFirstLine(nodeTab, Charsets.UTF_8);
                        List<String> cols = Splitter.on('\t').splitToList(firstLine);
                        if (cols.size() == 13) {
                            return ANNISFormatVersion.V3_2;
                        } else if (cols.size() == 10) {
                            return ANNISFormatVersion.V3_1;
                        }
                    } catch (IOException ex) {
                        log.warn("Could not read node.tab file", ex);
                    }
                }
            }
        }
        return ANNISFormatVersion.UNKNOWN;
    }

    public static class ConflictingCorpusException extends AnnisException {

        public ConflictingCorpusException(String msg) {
            super(msg);
        }
    }

    public static class Offsets {
        private final long corpusID;
        private final long corpusPost;
        private final long nodeID;

        public Offsets(long corpusID, long corpusPost, long nodeID) {
            this.corpusID = corpusID;
            this.corpusPost = corpusPost;
            this.nodeID = nodeID;
        }

        public long getCorpusID() {
            return corpusID;
        }

        public long getCorpusPost() {
            return corpusPost;
        }

        public MapSqlParameterSource makeArgs() {
            return new MapSqlParameterSource().addValue(":offset_corpus_id", corpusID)
                    .addValue(":offset_corpus_post", corpusPost).addValue(":offset_node_id", nodeID);
        }
    }

}