Java tutorial
/** * This file is part of GeneMANIA. * Copyright (C) 2010 University of Toronto. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package org.genemania.mediator.lucene.exporter; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.genemania.configobj.ConfigObj; import org.genemania.configobj.Section; import org.genemania.domain.Organism; import org.genemania.mediator.lucene.LuceneMediator; public class Generic2LuceneExporter { public static final int NETWORK_ID = 0; public static final int NETWORK_NAME = 1; public static final int NETWORK_METADATA_ID = 2; public static final int NETWORK_DESCRIPTION = 3; public static final int NETWORK_DEFAULT_SELECTED = 4; public static final int NETWORK_GROUP_ID = 5; private Map<String, String> networkGroupColours; private String genericDbPath; private String indexPath; private String basePath; private String profileName; private ConfigObj config; public Generic2LuceneExporter() { networkGroupColours = Collections.emptyMap(); } public void close() { } static String join(String connector, String[] items) { StringBuilder builder = new StringBuilder(); for (String item : items) { if (builder.length() > 0) { builder.append(connector); } builder.append(item); } return builder.toString(); } public static void main(String[] args) throws Exception { if (args.length < 3) { System.out.println("Usage:"); System.out.printf("%s <db-config.cfg> <raw-data-path> <colours.txt> [<profile>]\n", Generic2LuceneExporter.class.getName()); System.out.println(); return; } String configurationPath = args[0]; String basePath = args[1]; String colourConfigPath = args[2]; String profileName; if (args.length == 4) { profileName = args[3]; } else { profileName = null; } final Map<String, String> colours = loadColours(colourConfigPath); ConfigObj config = new ConfigObj(new FileReader(configurationPath)); String genericDbPath = join(File.separator, new String[] { basePath, config.getSection("FileLocations").getEntry("generic_db_dir") }); final Generic2LuceneExporter exporter = new Generic2LuceneExporter(); exporter.setNetworkGroupColours(colours); exporter.setBasePath(basePath); exporter.setGenericDbPath(genericDbPath); exporter.setProfileName(profileName); exporter.setConfig(config); exporter.setIndexPath("lucene_index"); exporter.export(); } public void export() throws Exception { final ExportProfile profile = createExportProfile(basePath, profileName); Analyzer analyzer = createAnalyzer(); try { final Map<String, Long> namingSourceIds = new HashMap<String, Long>(); File indexFile = new File(makeIndexPath("base")); FSDirectory directory = FSDirectory.open(indexFile); final IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, MaxFieldLength.UNLIMITED); processFile(genericDbPath, "GENE_NAMING_SOURCES.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); exportNamingSource(indexWriter, parts); namingSourceIds.put(parts[1], Long.parseLong(parts[0])); return true; } }); processFile(genericDbPath, "TAGS.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); exportTag(indexWriter, parts); return true; } }); processFile(genericDbPath, "ONTOLOGIES.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); exportOntologies(indexWriter, parts); return true; } }); processFile(genericDbPath, "ONTOLOGY_CATEGORIES.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); exportOntologyCategories(indexWriter, parts); return true; } }); exportStatistics(indexWriter); indexWriter.close(); String[] organisms = config.getSection("Organisms").getEntry("organisms").split("\\s*,\\s*"); for (final String organismId : organisms) { Section organismSection = config.getSection(organismId); final String shortName = organismSection.getEntry("short_name"); System.out.println(shortName); RAMDirectory ramDirectory = new RAMDirectory(); final IndexWriter writer = new IndexWriter(ramDirectory, analyzer, true, MaxFieldLength.UNLIMITED); final Organism organism = new Organism(); processFile(genericDbPath, "ORGANISMS.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); if (parts[1].equals(shortName)) { exportOrganism(writer, parts); populateOrganism(organism, parts); return false; } return true; } }); final Long entrezNamingSourceId = namingSourceIds.get("Entrez Gene ID"); final Map<Long, String> externalIds = new HashMap<Long, String>(); final Map<Long, Long> externalNamingSourceIds = new HashMap<Long, Long>(); final Set<Long> nodes = new HashSet<Long>(); processFile(genericDbPath, "GENES.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); long organismId = Long.parseLong(parts[5]); if (organismId == organism.getId()) { exportGene(writer, parts); long nodeId = Long.parseLong(parts[4]); nodes.add(nodeId); long namingSourceId = Long.parseLong(parts[3]); if (namingSourceId == entrezNamingSourceId) { externalIds.put(nodeId, parts[1]); externalNamingSourceIds.put(nodeId, namingSourceId); } } return true; } }); final Map<Long, Long> geneDataToNodeIds = new HashMap<Long, Long>(); processFile(genericDbPath, "NODES.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); long nodeId = Long.parseLong(parts[0]); if (nodes.contains(nodeId)) { exportNode(writer, parts, String.valueOf(organism.getId())); geneDataToNodeIds.put(Long.parseLong(parts[2]), nodeId); } return true; } }); processFile(genericDbPath, "GENE_DATA.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); long geneDataId = Long.parseLong(parts[0]); Long nodeId = geneDataToNodeIds.get(geneDataId); if (nodeId != null) { String externalId = externalIds.get(nodeId); long namingSourceId = -1; if (externalId != null) { namingSourceId = externalNamingSourceIds.get(nodeId); } exportGeneData(writer, parts, externalId, namingSourceId); } return true; } }); final Set<Long> groups = new HashSet<Long>(); processFile(genericDbPath, "NETWORK_GROUPS.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); long organismId = Long.parseLong(parts[4]); if (organismId == organism.getId()) { exportGroup(writer, parts); groups.add(Long.parseLong(parts[0])); } return true; } }); final Set<Long> metadata = new HashSet<Long>(); final Set<Long> networks = new HashSet<Long>(); processFile(genericDbPath, "NETWORKS.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); long groupId = Long.parseLong(parts[5]); long networkId = Long.parseLong(parts[0]); if (groups.contains(groupId) && profile.includesNetwork(parts)) { exportNetwork(writer, parts); long metadataId = Long.parseLong(parts[2]); metadata.add(metadataId); networks.add(networkId); } return true; } }); processFile(genericDbPath, "NETWORK_METADATA.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); long metadataId = Long.parseLong(parts[0]); if (metadata.contains(metadataId)) { exportNetworkMetadata(writer, parts); } return true; } }); processFile(genericDbPath, "NETWORK_TAG_ASSOC.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); long networkId = Long.parseLong(parts[1]); if (networks.contains(networkId)) { exportNetworkTagAssoc(writer, parts); } return true; } }); final Set<Long> attribute_groups = new HashSet<Long>(); processFile(genericDbPath, "ATTRIBUTE_GROUPS.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); long organismId = Long.parseLong(parts[1]); if (organismId == organism.getId()) { exportAttributeGroup(writer, parts); long group_id = Long.parseLong(parts[0]); attribute_groups.add(group_id); } return true; } }); final Set<Long> attributes = new HashSet<Long>(); processFile(genericDbPath, "ATTRIBUTES.txt", new FileHandler() { @Override public boolean process(String line) throws IOException { String[] parts = line.split("\t", -1); long group_id = Long.parseLong(parts[1]); if (attribute_groups.contains(group_id)) { exportAttribute(writer, parts); long attribute_id = Long.parseLong(parts[0]); attributes.add(attribute_id); } return true; } }); writer.close(); String gmOrganismId = organismSection.getEntry("gm_organism_id"); File organismFile = new File(makeIndexPath(String.format("%s", gmOrganismId))); FSDirectory fileDirectory = FSDirectory.open(organismFile); IndexWriter organismWriter = new IndexWriter(fileDirectory, analyzer, true, MaxFieldLength.UNLIMITED); IndexReader reader = IndexReader.open(ramDirectory); organismWriter.addIndexes(new IndexReader[] { reader }); organismWriter.close(); fileDirectory.close(); ramDirectory.close(); Properties properties = new Properties(); properties.put("short_name", shortName); properties.put("common_name", organismSection.getEntry("common_name")); properties.put("organism_id", gmOrganismId); String propertyPath = String.format("%s%smetadata.xml", gmOrganismId, File.separator); FileOutputStream out = new FileOutputStream(makeIndexPath(propertyPath)); try { properties.storeToXML(out, null, "UTF-8"); } finally { out.close(); } } } finally { close(); } } public String makeIndexPath(String path) { if (indexPath != null && !indexPath.equals("")) { return indexPath + File.separator + path; } else { return path; } } private static ExportProfile createExportProfile(String basePath, String name) throws IOException { if (name == null) { return DefaultExportProfile.instance(); } File file = new File(name); String profile = file.getName(); if ("all".equals(profile)) { return DefaultExportProfile.instance(); } if ("core".equals(profile)) { return new CoreExportProfile(basePath); } return new CustomExportProfile(name); } private void exportStatistics(IndexWriter writer) throws IOException { Document doc = new Document(); Date date = new Date(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.STATISTICS, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.STATISTICS_BUILD_DATE, DateTools.dateToString(date, Resolution.DAY), Store.YES, Index.ANALYZED)); writer.addDocument(doc); } protected void exportOntologyCategories(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.ONTOLOGY, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ONTOLOGYCATEGORY_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ONTOLOGYCATEGORY_ONTOLOGY_ID, parts[1], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ONTOLOGYCATEGORY_NAME, parts[2], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ONTOLOGYCATEGORY_DESCRIPTION, parts[3], Store.YES, Index.ANALYZED)); writer.addDocument(doc); } protected void exportOntologies(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.ONTOLOGY, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ONTOLOGY_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ONTOLOGY_NAME, parts[1], Store.YES, Index.ANALYZED)); writer.addDocument(doc); } public static Map<String, String> loadColours(String path) throws IOException { Map<String, String> colours = new HashMap<String, String>(); Pattern pattern = Pattern.compile("(.*?)\\s+([A-Fa-f0-9]+)\\s*(//.*)?"); BufferedReader reader = new BufferedReader(new FileReader(path)); String line = reader.readLine(); while (line != null) { Matcher matcher = pattern.matcher(line); if (!matcher.matches()) { continue; } String colour = matcher.group(2); String groupType = matcher.group(1); colours.put(groupType, colour); line = reader.readLine(); } return colours; } private static void populateOrganism(Organism organism, String[] parts) { organism.setId(Long.parseLong(parts[0])); organism.setName(parts[1]); organism.setDescription(parts[2]); } private static void processFile(String basePath, String fileName, FileHandler handler) throws IOException { String path = join(File.separator, new String[] { basePath, fileName }); BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), "utf-8")); String line = reader.readLine(); while (line != null) { if (!handler.process(line)) { break; } line = reader.readLine(); } reader.close(); } public void setNetworkGroupColours(Map<String, String> colours) { networkGroupColours = colours; } private static Analyzer createAnalyzer() { return LuceneMediator.createDefaultAnalyzer(); } public void exportOrganism(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.ORGANISM, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ORGANISM_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ORGANISM_NAME, parts[1], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ORGANISM_DESCRIPTION, parts[2], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ORGANISM_ALIAS, parts[3], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ORGANISM_ONTOLOGY_ID, parts[4], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ORGANISM_TAXONOMY_ID, parts[5], Store.YES, Index.ANALYZED)); writer.addDocument(doc); } public void exportGroup(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); String colour = networkGroupColours.get(parts[2]); if (colour == null) { colour = "000000"; } doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.GROUP, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GROUP_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GROUP_NAME, parts[1], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GROUP_CODE, parts[2], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GROUP_DESCRIPTION, parts[3], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GROUP_ORGANISM_ID, parts[4], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GROUP_COLOUR, colour, Store.YES, Index.ANALYZED)); writer.addDocument(doc); } public void exportNetwork(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.NETWORK, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORK_ID, parts[NETWORK_ID], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORK_NAME, parts[NETWORK_NAME], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORK_METADATA_ID, parts[NETWORK_METADATA_ID], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORK_DESCRIPTION, parts[NETWORK_DESCRIPTION], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORK_DEFAULT_SELECTED, parts[NETWORK_DEFAULT_SELECTED].equals("0") ? "false" : "true", Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORK_GROUP_ID, parts[NETWORK_GROUP_ID], Store.YES, Index.ANALYZED)); writer.addDocument(doc); } public void exportNetworkMetadata(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.NETWORKMETADATA, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_SOURCE, parts[1], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_REFERENCE, parts[2], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_PUBMED_ID, parts[3], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_AUTHORS, parts[4], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_PUBLICATION_NAME, parts[5], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_YEAR_PUBLISHED, parts[6], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_PROCESSING_DESC, parts[7], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_NETWORK_TYPE, parts[8], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_ALIAS, parts[9], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_INTERACTION_COUNT, parts[10], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_DYNAMIC_RANGE, parts[11], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_EDGE_WEIGHT_DIST, parts[12], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_ACCESS_STATS, parts[13], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_COMMENT, parts[14], Store.YES, Index.ANALYZED)); if (parts.length > 15) { doc.add(new Field(LuceneMediator.NETWORKMETADATA_OTHER, parts[15], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_TITLE, parts[16], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_URL, parts[17], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKMETADATA_SOURCE_URL, parts[18], Store.YES, Index.ANALYZED)); } writer.addDocument(doc); } public void exportNode(IndexWriter writer, String[] parts, String organismId) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.NODE, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NODE_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NODE_NAME, parts[1], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NODE_GENEDATA_ID, parts[2], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NODE_ORGANISM_ID, organismId, Store.NO, Index.ANALYZED)); writer.addDocument(doc); } public void exportGene(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.GENE, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GENE_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GENE_SYMBOL, parts[1], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GENE_NAMINGSOURCE_ID, parts[3], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GENE_NODE_ID, parts[4], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GENE_ORGANISM_ID, parts[5], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GENE_DEFAULT_SELECTED, parts[6].equals("0") ? "false" : "true", Store.YES, Index.ANALYZED)); writer.addDocument(doc); } public void exportNamingSource(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.NAMINGSOURCE, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NAMINGSOURCE_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NAMINGSOURCE_NAME, parts[1], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NAMINGSOURCE_RANK, parts[2], Store.YES, Index.ANALYZED)); String namingSource = (parts[3].length() > 0) ? parts[3] : ""; doc.add(new Field(LuceneMediator.NAMINGSOURCE_SHORT_NAME, namingSource, Store.YES, Index.ANALYZED)); writer.addDocument(doc); } public void exportGeneData(IndexWriter writer, String[] parts, String externalId, long namingSourceId) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.GENEDATA, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GENEDATA_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GENEDATA_DESCRIPTION, parts[1], Store.YES, Index.ANALYZED)); if (externalId != null) { doc.add(new Field(LuceneMediator.GENEDATA_EXTERNAL_ID, externalId, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.GENEDATA_NAMINGSOURCE_ID, String.valueOf(namingSourceId), Store.YES, Index.ANALYZED)); } writer.addDocument(doc); } protected void exportTag(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.TAG, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.TAG_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.TAG_NAME, parts[1], Store.YES, Index.ANALYZED)); writer.addDocument(doc); } protected void exportNetworkTagAssoc(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.NETWORKTAGASSOC, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKTAGASSOC_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKTAGASSOC_NETWORK_ID, parts[1], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.NETWORKTAGASSOC_TAG_ID, parts[2], Store.YES, Index.ANALYZED)); writer.addDocument(doc); } protected void exportAttributeGroup(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.ATTRIBUTEGROUP, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTEGROUP_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTEGROUP_ORGANISM_ID, parts[1], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTEGROUP_NAME, parts[2], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTEGROUP_CODE, parts[3], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTEGROUP_DESCRIPTION, parts[4], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTEGROUP_LINKOUT_LABEL, parts[5], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTEGROUP_LINKOUT_URL, parts[6], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTEGROUP_DEFAULT_SELECTED, parts[7], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTEGROUP_PUBLICATION_NAME, parts[8], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTEGROUP_PUBLICATION_URL, parts[9], Store.YES, Index.ANALYZED)); writer.addDocument(doc); } protected void exportAttribute(IndexWriter writer, String[] parts) throws IOException { Document doc = new Document(); doc.add(new Field(LuceneMediator.TYPE, LuceneMediator.ATTRIBUTE, Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTE_ID, parts[0], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTE_ORGANISM_ID, parts[1], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTE_GROUP_ID, parts[2], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTE_EXTERNAL_ID, parts[3], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTE_NAME, parts[4], Store.YES, Index.ANALYZED)); doc.add(new Field(LuceneMediator.ATTRIBUTE_DESCRIPTION, parts[5], Store.YES, Index.ANALYZED)); writer.addDocument(doc); } interface FileHandler { boolean process(String line) throws IOException; } public String getGenericDbPath() { return genericDbPath; } public void setGenericDbPath(String genericDbPath) { this.genericDbPath = genericDbPath; } public String getIndexPath() { return indexPath; } public void setIndexPath(String indexPath) { this.indexPath = indexPath; } public String getBasePath() { return basePath; } public void setBasePath(String basePath) { this.basePath = basePath; } public String getProfileName() { return profileName; } public void setProfileName(String profileName) { this.profileName = profileName; } public ConfigObj getConfig() { return config; } public void setConfig(ConfigObj config) { this.config = config; } }