Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.vxquery.runtime.functions.index; import java.io.File; import java.io.IOException; import java.nio.file.Paths; import java.text.SimpleDateFormat; import java.util.concurrent.ConcurrentHashMap; import org.apache.hyracks.data.std.api.IPointable; import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.vxquery.datamodel.accessors.TaggedValuePointable; import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder; import org.apache.vxquery.exceptions.ErrorCode; import org.apache.vxquery.exceptions.SystemException; import org.apache.vxquery.index.IndexDocumentBuilder; import org.apache.vxquery.runtime.functions.index.update.MetaFileUtil; import org.apache.vxquery.runtime.functions.index.update.XmlMetadata; import org.apache.vxquery.runtime.functions.util.FunctionHelper; import org.apache.vxquery.xmlparser.IParser; import org.apache.vxquery.xmlparser.ITreeNodeIdProvider; import org.apache.vxquery.xmlparser.XMLParser; public class IndexConstructorUtil { private final TaggedValuePointable nodep = (TaggedValuePointable) TaggedValuePointable.FACTORY .createPointable(); private final SequenceBuilder sb = new SequenceBuilder(); private boolean isMetaFilePresent = false; private MetaFileUtil metaFileUtil; private ConcurrentHashMap<String, XmlMetadata> metadataMap = new ConcurrentHashMap<>(); public void evaluate(String collectioFolder, String indexFolder, IPointable result, ArrayBackedValueStorage abvs, ITreeNodeIdProvider nodeIdProvider, ArrayBackedValueStorage abvsFileNode, boolean isElementPath, String nodeId) throws IOException { metaFileUtil = new MetaFileUtil(indexFolder); isMetaFilePresent = metaFileUtil.isMetaFilePresent(); metaFileUtil.setCollection(collectioFolder); File collectionDirectory = new File(collectioFolder); if (!collectionDirectory.exists()) { throw new IOException("The collection directory (" + collectioFolder + ") does not exist."); } try { abvs.reset(); sb.reset(abvs); Directory dir = FSDirectory.open(Paths.get(indexFolder)); Analyzer analyzer = new CaseSensitiveAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); // Create will overwrite the index everytime iwc.setOpenMode(OpenMode.CREATE); //Create an index writer IndexWriter writer = new IndexWriter(dir, iwc); //Add files to index indexXmlFiles(collectionDirectory, writer, isElementPath, abvsFileNode, nodeIdProvider, sb, nodeId); if (!isMetaFilePresent) { // Write metadata map to a file. metaFileUtil.updateMetadataMap(metadataMap, indexFolder); metaFileUtil.writeMetadataToFile(); } //This makes write slower but search faster. writer.forceMerge(1); writer.close(); sb.finish(); result.set(abvs); } catch (IOException e) { throw new SystemException(ErrorCode.SYSE0001, e); } } /* * This function goes recursively one file at a time. First it turns the file into an ABVS document node, then * it indexes that document node. */ public void indexXmlFiles(File collectionDirectory, IndexWriter writer, boolean isElementPath, ArrayBackedValueStorage abvsFileNode, ITreeNodeIdProvider nodeIdProvider, SequenceBuilder sb, String nodeId) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy, HH:mm:ss"); for (File file : collectionDirectory.listFiles()) { if (readableXmlFile(file.getPath())) { abvsFileNode.reset(); IndexDocumentBuilder ibuilder = getIndexBuilder(file, writer, abvsFileNode, nodeIdProvider, nodeId); ibuilder.printStart(); if (!isMetaFilePresent) { XmlMetadata xmlMetadata = new XmlMetadata(); xmlMetadata.setPath(file.getCanonicalPath()); xmlMetadata.setFileName(file.getName()); xmlMetadata.setLastModified(sdf.format(file.lastModified())); xmlMetadata.setMd5(metaFileUtil.generateMD5(file)); metadataMap.put(file.getCanonicalPath(), xmlMetadata); } } else if (file.isDirectory()) { // Consider all XML file in sub directories. indexXmlFiles(file, writer, isElementPath, abvsFileNode, nodeIdProvider, sb, nodeId); } } } public boolean readableXmlFile(String path) { return path.toLowerCase().endsWith(".xml") || path.toLowerCase().endsWith(".xml.gz"); } public IndexDocumentBuilder getIndexBuilder(File file, IndexWriter writer, ArrayBackedValueStorage abvsFileNode, ITreeNodeIdProvider nodeIdProvider, String nodeId) throws IOException { //Get the document node IParser parser = new XMLParser(false, nodeIdProvider, nodeId); FunctionHelper.readInDocFromString(file.getPath(), abvsFileNode, parser); nodep.set(abvsFileNode.getByteArray(), abvsFileNode.getStartOffset(), abvsFileNode.getLength()); //Add the document to the index //Creates one lucene doc per file return new IndexDocumentBuilder(nodep, writer, file.getCanonicalPath()); } }