org.ala.hbase.LimneticDataLoader.java Source code

Java tutorial

Introduction

Here is the source code for org.ala.hbase.LimneticDataLoader.java

Source

/***************************************************************************
 * Copyright (C) 2010 Atlas of Living Australia
 * All Rights Reserved.
 *
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 ***************************************************************************/
package org.ala.hbase;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

import javax.inject.Inject;

import org.ala.dao.InfoSourceDAO;
import org.ala.dao.TaxonConceptDao;
import org.ala.model.ExtantStatus;
import org.ala.model.Habitat;
import org.ala.model.InfoSource;
import org.ala.util.SpringUtils;
import org.ala.util.TabReader;
import org.apache.log4j.Logger;
import org.springframework.context.ApplicationContext;
import org.springframework.stereotype.Component;

import au.org.ala.data.model.LinnaeanRankClassification;

/**
 * This class loads data reports extracted from Limnetic files into the BIE.
 *
 * @author Tommy Wang (tommy.wang@csiro.au)
 */
@Component("limneticDataLoader")
public class LimneticDataLoader {

    protected static Logger logger = Logger.getLogger(LimneticDataLoader.class);

    private static final String DATA_DIR = "/data/bie-staging/limnetic";

    protected static final String HABITAT_CODE = "Limnetic";
    protected static final int INFOSOURCE_ID = 1063;
    protected static final String DEFAULT_INFOSOURCE_URL = "http://www.ala.org.au";

    @Inject
    protected InfoSourceDAO infoSourceDao;

    @Inject
    protected TaxonConceptDao taxonConceptDao;

    public static void main(String[] args) throws Exception {
        ApplicationContext context = SpringUtils.getContext();
        LimneticDataLoader l = context.getBean(LimneticDataLoader.class);
        l.load();
        System.exit(1);
    }

    /**
     * @throws Exception
     */
    private void load() throws Exception {
        loadCsvData(DATA_DIR);
    }

    private void loadCsvData(String dir) throws Exception {
        System.out.println("Starting to load data from " + dir);

        InfoSource infosource = infoSourceDao.getById(INFOSOURCE_ID);

        long start = System.currentTimeMillis();

        // add the taxon concept regions

        File inputDir = new File(dir);

        if (inputDir.isDirectory()) {

            String[] inputFiles = inputDir.list();

            for (String csvFile : inputFiles) {

                if (csvFile.endsWith(".csv")) {
                    csvFile = dir + File.separator + csvFile;

                    TabReader tr = new TabReader(csvFile, false);
                    String[] values = null;
                    int matchCounter = 0;
                    int nonMatchCounter = 0;
                    String guid = null;
                    String previousScientificName = null;
                    while ((values = tr.readNext()) != null) {
                        if (values.length == 8) {
                            String identifier = values[0];
                            String kingdom = values[1];
                            String phylum = values[2];
                            String klass = values[3];
                            String order = values[4];
                            String family = values[5];
                            String genus = values[6];
                            String currentScientificName = values[7];

                            //                     System.out.println("Processing '" + currentScientificName + "'");

                            LinnaeanRankClassification linnaeanRankClassification = new LinnaeanRankClassification(
                                    kingdom, phylum, klass, order, family, genus, currentScientificName);

                            if (!currentScientificName.equalsIgnoreCase(previousScientificName)) {
                                guid = taxonConceptDao.findLsidByName(currentScientificName,
                                        linnaeanRankClassification, null);
                                if (guid == null) {
                                    //                           System.out.println("Unable to find LSID for '" + currentScientificName + "'");
                                    nonMatchCounter++;
                                } else {
                                    //                           System.out.println("Found LSID for '" + currentScientificName + "' - " + guid);
                                }
                                previousScientificName = currentScientificName;
                            }
                            if (guid != null) {

                                List<Habitat> habitatList = new ArrayList<Habitat>();
                                Habitat h = new Habitat(HABITAT_CODE);
                                h.setInfoSourceId(Integer.toString(infosource.getId()));
                                h.setInfoSourceName(infosource.getName());
                                h.setInfoSourceURL(DEFAULT_INFOSOURCE_URL);
                                habitatList.add(h);

                                logger.info("Adding guid=" + guid + " SciName=" + currentScientificName
                                        + " Habitat=" + HABITAT_CODE);
                                //append it to the existing habitats
                                taxonConceptDao.addHabitat(guid, habitatList, true);
                                matchCounter++;
                            }
                        } else {
                            System.out.println("Incorrect number of fields in tab file - " + csvFile);
                        }
                    }
                    tr.close();
                    long finish = System.currentTimeMillis();
                    System.out.println("File: " + csvFile + ". " + matchCounter + " Limnetic records loaded. "
                            + nonMatchCounter + " Limnetic records cannot be matched. Time taken "
                            + (((finish - start) / 1000) / 60) + " minutes, " + (((finish - start) / 1000) % 60)
                            + " seconds.");
                }
            }
        }
    }

    /**
     * @param taxonConceptDao the taxonConceptDao to set
     */
    public void setTaxonConceptDao(TaxonConceptDao taxonConceptDao) {
        this.taxonConceptDao = taxonConceptDao;
    }

    /**
     * @param infoSourceDao the infoSourceDao to set
     */
    public void setInfoSourceDao(InfoSourceDAO infoSourceDao) {
        this.infoSourceDao = infoSourceDao;
    }
}