Example usage for org.apache.lucene.index IndexWriter close

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter close.

Prototype

@Override
public void close() throws IOException

Source Link

Document

Closes all open resources and releases the write lock.

Usage

From source file:arena.lucene.LuceneIndexUpdater.java

License:Open Source License

public int updateIndex(boolean deleteAllFromIndexFirst, Iterable<T> valueobjects) {
    IndexWriter writer = null;
    try {//from   www.ja  v a2  s .c  o m
        writer = new IndexWriter(directoryBean.getDirectory(), analyzer, deleteAllFromIndexFirst,
                MaxFieldLength.LIMITED);
        int docCount = 0;
        for (T vo : valueobjects) {
            Term pkTerm = this.contentMarshall.getPKTerm(vo);
            writer.deleteDocuments(pkTerm);

            Document doc = this.contentMarshall.serialize(vo);
            if (doc != null) {
                writer.addDocument(doc);
                docCount++;
            }
        }
        if (this.searchersToReset != null) {
            for (LuceneIndexSearcher<?> searcher : this.searchersToReset) {
                searcher.reset();
            }
        }
        return docCount;
    } catch (IOException err) {
        throw new RuntimeException("Error deleting documents from lucene index", err);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (IOException err) {
            }
        }
    }
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

/**
 * Creates the synonym index/*from   w w  w .  j  av  a 2 s  . com*/
 *
 * @throws IOException
 */
private void indexSKOSModel() throws IOException {
    IndexWriterConfig cfg = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(indexDir, cfg);
    writer.getConfig().setRAMBufferSizeMB(48);
    /* iterate SKOS concepts, create Lucene docs and add them to the index */
    ResIterator concept_iter = skosModel.listResourcesWithProperty(RDF.type, SKOS.Concept);
    while (concept_iter.hasNext()) {
        Resource skos_concept = concept_iter.next();
        Document concept_doc = createDocumentsFromConcept(skos_concept);
        writer.addDocument(concept_doc);
    }
    writer.close();
}

From source file:at.ac.univie.mminf.luceneSKOS.skos.impl.SKOSEngineImpl.java

License:Apache License

/**
 * Creates the synonym index// w  w  w  . j  av  a2 s  . c  o m
 * 
 * @throws IOException
 */
private void indexSKOSModel() throws IOException {
    IndexWriterConfig cfg = new IndexWriterConfig(matchVersion, analyzer);
    IndexWriter writer = new IndexWriter(indexDir, cfg);
    writer.getConfig().setRAMBufferSizeMB(48);

    /* iterate SKOS concepts, create Lucene docs and add them to the index */
    ResIterator concept_iter = skosModel.listResourcesWithProperty(RDF.type, SKOS.Concept);
    while (concept_iter.hasNext()) {
        Resource skos_concept = concept_iter.next();

        Document concept_doc = createDocumentsFromConcept(skos_concept);

        // System.out.println("Adding document to index " + concept_doc);

        writer.addDocument(concept_doc);
    }

    writer.close();
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngine.java

License:Open Source License

public void indexFilesSemantically(String pathToIndex, StatusBar statusBar) {
    if (statusBar != null)
        statusBar.setStatus("Creating index from semantic annotations");

    SAXBuilder builder = new SAXBuilder();
    XMLOutputter outputter = new XMLOutputter(
            Format.getRawFormat().setIndent("").setLineSeparator("").setExpandEmptyElements(false));

    try {/*from   ww  w. j  a v  a2 s  . c o  m*/
        String[] descriptions = FileOperations.getAllDescriptions(new File(pathToIndex), true);
        if (descriptions == null)
            return;
        float numAllDocsPercent = (float) descriptions.length / 100f;
        DecimalFormat df = (DecimalFormat) NumberFormat.getInstance();
        df.setMaximumFractionDigits(1);

        // Preparing objects for the index:
        HashMap<String, ElementEntry> elementMap = new HashMap<String, ElementEntry>(descriptions.length);
        HashMap<Element, LinkedList<String>> element2document = new HashMap<Element, LinkedList<String>>(
                descriptions.length);

        // in the first run we identify the semantic objects that we want to index and build
        // a table were we can relate them to the documents (identified by their path)
        for (int i = 0; i < descriptions.length; i++) {
            try {
                Element e = builder.build(new FileInputStream(descriptions[i])).getRootElement();
                List l = RetrievalToolkit.xpathQuery(e, "//Semantic/SemanticBase", null);
                for (Object aL : l) {
                    Element semanticElement = (Element) aL;
                    String xmlString = outputter.outputString(semanticElement).trim()
                            .replaceAll("id=\"id_[0-9]*\"", "");
                    // check if element is already there, indicator is its string representation.
                    if (!elementMap.keySet().contains(xmlString)) {
                        // its not here, put it in.
                        elementMap.put(xmlString, new ElementEntry(semanticElement, elementMap.size()));
                        //                            System.out.println(xmlString);
                    }
                    // now get the unified element
                    semanticElement = elementMap.get(xmlString).semanticElement;
                    // and check if there is an entry in the table for where to find the element
                    if (!element2document.keySet().contains(semanticElement)) {
                        element2document.put(semanticElement, new LinkedList<String>());
                    }
                    // and add found document if not already there:
                    List documentList = element2document.get(semanticElement);
                    if (!documentList.contains(descriptions[i]))
                        documentList.add(descriptions[i]);
                }
                if (statusBar != null)
                    statusBar.setStatus(
                            "Parsing documents for nodes: " + df.format((float) i / numAllDocsPercent));
            } catch (JDOMException e1) {
                System.err.println("Exception in document #" + i + ": " + e1.getMessage());
            } catch (IOException e1) {
                e1.printStackTrace();
            }
        }
        // read stats:
        // System.out.println("Got " + countOverallElements + " Elements in " + descriptions.length + " descriptions, " + elementMap.size() + " elements are pairwise different.");

        // Now we can add the nodes to a lucene index:
        // fields: label, id, type, files (separated by '|'), xml, all
        // -------------------------------------------

        // opening the index for writing:
        boolean createFlag = true;
        String indexDir = parseSemanticIndexDirectory(pathToIndex);
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriter writer = new IndexWriter(indexDir, analyzer, createFlag);

        if (statusBar != null)
            statusBar.setStatus("Creating index for " + element2document.size() + " different available nodes");

        // iterating through nodes and storing them:
        for (Element semElement : element2document.keySet()) {
            // needed for later XPath :( otherwise everthing in the whole document is retrieved.

            String fileList = getFileListFromNode(element2document.get(semElement));
            Document idxDocument = new Document();
            // adding the file itself ...
            idxDocument.add(new Field("files", fileList, Field.Store.YES, Field.Index.NO));

            //                System.out.println(((Element) o).getTextTrim());

            //                StringBuilder all = new StringBuilder(255);
            //                 adding the label
            //                addToDocument(idxDocument, semElement, "//Label/Name", "label", all);
            String elementLabel = semElement.getChild("Label", semElement.getNamespace())
                    .getChildTextTrim("Name", semElement.getNamespace());
            Field labelField = new Field("label", elementLabel, Field.Store.YES, Field.Index.TOKENIZED);
            labelField.setBoost(1.2f);
            idxDocument.add(labelField);

            // adding the type:
            String elementType = semElement.getAttribute("type", xsi).getValue().trim();
            idxDocument.add(new Field("type", elementType, Field.Store.YES, Field.Index.NO));
            // adding the XML contents:
            String xmlString = outputter.outputString(semElement);
            idxDocument.add(new Field("xml", xmlString, Field.Store.YES, Field.Index.NO));
            // adding the id:
            idxDocument.add(
                    new Field("id", elementMap.get(xmlString.trim().replaceAll("id=\"id_[0-9]*\"", "")).id + "",
                            Field.Store.YES, Field.Index.UN_TOKENIZED));
            // TODO: split the indexing for objects based on type:
            // adding all, unstored for retrieval only
            if (elementType.equals("AgentObjectType")) {
                createIndexDocumentFromSemanticAgent(semElement, idxDocument);
            } else if (elementType.equals("EventType")) {
                createIndexDocumentFromSemanticElement(semElement, idxDocument);
            } else if (elementType.equals("SemanticPlaceType")) {
                createIndexDocumentFromSemanticElement(semElement, idxDocument);
            } else if (elementType.equals("SemanticTimeType")) {
                createIndexDocumentFromSemanticElement(semElement, idxDocument);
            } else {
                createIndexDocumentFromSemanticElement(semElement, idxDocument);
            }

            writer.addDocument(idxDocument);

        }
        // now optimize and close the index:
        // todo: open index for appending and/or updating
        writer.optimize();
        writer.close();

        // Now we can create the powerset for each existing graph
        // (based on sorted node ids) and store
        // all resulting graphs within an index.
        // ----------------------------------------------------------
        if (statusBar != null)
            statusBar.setStatus("Creating and merging of available graphs");
        HashMap<Graph, HashSet<String>> graph2document = new HashMap<Graph, HashSet<String>>(
                descriptions.length);
        for (int i = 0; i < descriptions.length; i++)
            try {
                Element e = builder.build(new FileInputStream(descriptions[i])).getRootElement();
                List l = RetrievalToolkit.xpathQuery(e, "//Semantic/SemanticBase", null);
                HashMap<String, Integer> docID2overallID = new HashMap<String, Integer>(l.size());
                LinkedList<Relation> relations = new LinkedList<Relation>();
                LinkedList<Integer> nodes = new LinkedList<Integer>();
                for (Object aL : l) {
                    Element semanticElement = (Element) aL;
                    String xmlString = outputter.outputString(semanticElement);
                    int id = elementMap.get(xmlString.trim().replaceAll("id=\"id_[0-9]*\"", "")).id;
                    String docID = semanticElement.getAttribute("id").getValue();
                    docID2overallID.put(docID, id);
                    nodes.add(id);
                }
                // get all relations with global ids and eliminate inverse relations
                l = RetrievalToolkit.xpathQuery(e, "//Graph/Relation", null);
                for (Object aL1 : l) {
                    Element relation = (Element) aL1;
                    int source = docID2overallID.get(relation.getAttribute("source").getValue().substring(1));
                    int target = docID2overallID.get(relation.getAttribute("target").getValue().substring(1));
                    String type = relation.getAttribute("type").getValue();
                    type = type.substring(type.lastIndexOf(':') + 1);
                    Relation r = eliminateInverse(new Relation(source, target, type));
                    relations.add(r);
                }

                // now create a graph object
                Collections.sort(nodes);
                Collections.sort(relations);
                LinkedList<Node> nodeList = new LinkedList<Node>();
                for (Integer node : nodes) {
                    nodeList.add(new Node(node));
                }
                Graph g = new Graph(nodeList, relations);
                HashSet<String> docs = new HashSet<String>(1);
                docs.add(descriptions[i]);
                graph2document.put(g, docs);

            } catch (JDOMException e1) {
                System.err.println(new StringBuilder().append("Exception in document #").append(i).append(": ")
                        .append(e1.getMessage()).toString());
            }

        HashMap<String, Graph> str2graph = new HashMap<String, Graph>(graph2document.size() / 2);
        HashMap<Graph, HashSet<String>> g2d = new HashMap<Graph, HashSet<String>>(descriptions.length);

        /*
        For now we reduce the number of graphs by identifiying and merging duplicates and
        remove redundant entries:
        */
        for (Graph g : graph2document.keySet()) {
            if (str2graph.containsKey(g.toString())) {
                g2d.get(str2graph.get(g.toString())).addAll(graph2document.get(g));
            } else {
                str2graph.put(g.toString(), g);
                g2d.put(g, graph2document.get(g));
            }
        }
        graph2document = g2d;
        System.out.println(graph2document.size() + " non trivial different graphs were found");
        // now put all the available graphs into an index:
        // -----------------------------------------------

        // for now we will store a simple text file:
        if (statusBar != null)
            statusBar.setStatus("Saving index of paths");

        boolean createPathIndexFlag = true;
        String pathIndexDir = parsePathIndexDirectory(pathToIndex);
        IndexWriter pathIndexWriter = new IndexWriter(pathIndexDir, new GraphAnalyzer(), createPathIndexFlag);

        for (Graph graph : graph2document.keySet()) {
            HashSet<String> files = graph2document.get(graph);
            Document idxDocument = new Document();
            // adding the file itself ...
            for (String s : files) {
                idxDocument.add(new Field("file", s, Field.Store.YES, Field.Index.NO));
            }
            // adding the graph ...
            idxDocument.add(new Field("graph", graph.toString(), Field.Store.YES, Field.Index.TOKENIZED));
            //                idxDocument.add(Field.UnIndexed("graph", graph.toString()));
            // adding the paths
            StringBuilder sb = new StringBuilder(256);
            sb.append(graph.toString());
            List<Path> pathList = (new LabeledGraph(graph)).get2Paths();
            if (!pathList.isEmpty())
                sb.append(' ');
            for (Iterator<Path> iterator1 = pathList.iterator(); iterator1.hasNext();) {
                Path path = iterator1.next();
                sb.append(path.toString());
                if (iterator1.hasNext())
                    sb.append(' ');
            }
            idxDocument.add(new Field("paths", sb.toString(), Field.Store.YES, Field.Index.TOKENIZED));
            pathIndexWriter.addDocument(idxDocument);
        }
        // now optimize and close the index:
        pathIndexWriter.optimize();
        pathIndexWriter.close();

    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:at.lux.fotoretrieval.retrievalengines.LuceneRetrievalEngine.java

License:Open Source License

/**
 * In general we take the base path for our search for the pathToIndex parameter.
 * we then add the directory "index" and create it there.
 *
 * @param pathToIndex//  w ww.  j  ava2 s  . c o m
 * @param statusBar
 */
public void indexFiles(String pathToIndex, StatusBar statusBar) {
    // parsing and eventually creating the directory for the index ...
    String indexDir = parseFulltextIndexDirectory(pathToIndex);

    Analyzer analyzer = new StandardAnalyzer();
    boolean createFlag = true;
    SAXBuilder builder = new SAXBuilder();
    String prefix = "Creating fulltext index: ";
    try {
        IndexWriter writer = new IndexWriter(indexDir, analyzer, createFlag);
        String[] descriptions = FileOperations.getAllDescriptions(new File(pathToIndex), true);
        if (descriptions == null)
            return;
        float numAllDocsPercent = (float) descriptions.length / 100f;
        DecimalFormat df = (DecimalFormat) NumberFormat.getInstance();
        df.setMaximumFractionDigits(1);

        for (int i = 0; i < descriptions.length; i++) {
            try {
                Element e = builder.build(new FileInputStream(descriptions[i])).getRootElement();
                Document idxDocument = new Document();
                // adding the file itself ...
                idxDocument.add(new Field("file", descriptions[i], Field.Store.YES, Field.Index.NO));
                // adding all given names
                StringBuilder all = new StringBuilder(255);

                List l = RetrievalToolkit.xpathQuery(e, "//Graph/Relation", null);
                //                    System.out.println("NumberOfRelations: " + l.size());

                addToDocument(idxDocument, e, "//Agent/Name/GivenName", "GivenName", all);
                addToDocument(idxDocument, e, "//Agent/Name/FamilyName", "FamilyName", all);
                addToDocument(idxDocument, e, "//Label/Name", "Label", all);
                addToDocument(idxDocument, e, "//FreeTextAnnotation", "FreeTextAnnotation", all);
                addToDocument(idxDocument, e, "//StructuredAnnotation/Who/Name", "Who", all);
                addToDocument(idxDocument, e, "//StructuredAnnotation/Where/Name", "Where", all);
                addToDocument(idxDocument, e, "//StructuredAnnotation/How/Name", "How", all);
                addToDocument(idxDocument, e, "//StructuredAnnotation/Why/Name", "Why", all);
                addToDocument(idxDocument, e, "//StructuredAnnotation/When/Name", "When", all);
                addToDocument(idxDocument, e, "//StructuredAnnotation/WhatObject/Name", "WhatObjects", all);
                addToDocument(idxDocument, e, "//StructuredAnnotation/WhatAction/Name", "WhatAction", all);

                idxDocument.add(new Field("all", all.toString(), Field.Store.NO, Field.Index.TOKENIZED));

                writer.addDocument(idxDocument);

                if (statusBar != null) {
                    StringBuilder status = new StringBuilder(13).append(prefix);
                    status.append(df.format(((float) i) / numAllDocsPercent));
                    status.append('%');
                    statusBar.setStatus(status.toString());
                }

            } catch (Exception e1) {
                System.err.println("Error with file " + descriptions[i] + " (" + e1.getMessage() + ")");
            }
        }
        writer.optimize();
        writer.close();
        if (statusBar != null) {
            statusBar.setStatus("Indexing finished");
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LuceneRetrievalEngine.java

License:Open Source License

public void indexFilesSemantically(String pathToIndex, StatusBar statusBar) {
    if (statusBar != null)
        statusBar.setStatus("Creating index from semantic annotations");

    SAXBuilder builder = new SAXBuilder();
    XMLOutputter outputter = new XMLOutputter(
            Format.getRawFormat().setIndent("").setLineSeparator("").setExpandEmptyElements(false));

    try {//from   ww  w . j a va  2  s  .  c  o  m
        String[] descriptions = FileOperations.getAllDescriptions(new File(pathToIndex), true);
        if (descriptions == null)
            return;
        float numAllDocsPercent = (float) descriptions.length / 100f;
        DecimalFormat df = (DecimalFormat) NumberFormat.getInstance();
        df.setMaximumFractionDigits(1);

        // Preparing objects for the index:
        HashMap<String, ElementEntry> elementMap = new HashMap<String, ElementEntry>(descriptions.length);
        HashMap<Element, LinkedList<String>> element2document = new HashMap<Element, LinkedList<String>>(
                descriptions.length);

        // in the first run we identify the semantic objects that we want to index and build
        // a table were we can relate them to the documents (identified by their path)
        for (int i = 0; i < descriptions.length; i++) {
            try {
                Element e = builder.build(new FileInputStream(descriptions[i])).getRootElement();
                List l = RetrievalToolkit.xpathQuery(e, "//Semantic/SemanticBase", null);
                for (Iterator iterator = l.iterator(); iterator.hasNext();) {
                    Element semanticElement = (Element) iterator.next();
                    String xmlString = outputter.outputString(semanticElement).trim()
                            .replaceAll("id=\"id_[0-9]*\"", "");
                    // check if element is already there, indicator is its string representation.
                    if (!elementMap.keySet().contains(xmlString)) {
                        // its not here, put it in.
                        elementMap.put(xmlString, new ElementEntry(semanticElement, elementMap.size()));
                        //                            System.out.println(xmlString);
                    }
                    // now get the unified element
                    semanticElement = elementMap.get(xmlString).semanticElement;
                    // and check if there is an entry in the table for where to find the element
                    if (!element2document.keySet().contains(semanticElement)) {
                        element2document.put(semanticElement, new LinkedList<String>());
                    }
                    // and add found document if not already there:
                    List documentList = element2document.get(semanticElement);
                    if (!documentList.contains(descriptions[i]))
                        documentList.add(descriptions[i]);
                }
                if (statusBar != null)
                    statusBar.setStatus(
                            "Parsing documents for nodes: " + df.format((float) i / numAllDocsPercent));
            } catch (JDOMException e1) {
                System.err.println("Exception in document #" + i + ": " + e1.getMessage());
            } catch (IOException e1) {
                e1.printStackTrace();
            }
        }
        // read stats:
        // System.out.println("Got " + countOverallElements + " Elements in " + descriptions.length + " descriptions, " + elementMap.size() + " elements are pairwise different.");

        // Now we can add the nodes to a lucene index:
        // fields: label, id, type, files (separated by '|'), xml, all
        // -------------------------------------------

        // opening the index for writing:
        boolean createFlag = true;
        String indexDir = parseSemanticIndexDirectory(pathToIndex);
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriter writer = new IndexWriter(indexDir, analyzer, createFlag);

        if (statusBar != null)
            statusBar.setStatus("Creating index for " + element2document.size() + " different available nodes");

        // iterating through nodes and storing them:
        for (Iterator<Element> iterator = element2document.keySet().iterator(); iterator.hasNext();) {
            Element semElement = iterator.next();
            // needed for later XPath :( otherwise everthing in the whole document is retrieved.

            String fileList = getFileListFromNode(element2document.get(semElement));
            Document idxDocument = new Document();
            // adding the file itself ...
            idxDocument.add(new Field("files", fileList, Field.Store.YES, Field.Index.NO));

            //                System.out.println(((Element) o).getTextTrim());

            StringBuilder all = new StringBuilder(255);
            // adding the label
            //                addToDocument(idxDocument, semElement, "//Label/Name", "label", all);
            String elementLabel = semElement.getChild("Label", semElement.getNamespace())
                    .getChildTextTrim("Name", semElement.getNamespace());
            idxDocument.add(new Field("label", elementLabel, Field.Store.YES, Field.Index.TOKENIZED));

            // adding the type:
            String elementType = semElement.getAttribute("type", xsi).getValue().trim();
            idxDocument.add(new Field("type", elementType, Field.Store.YES, Field.Index.NO));
            // adding the XML contents:
            String xmlString = outputter.outputString(semElement);
            idxDocument.add(new Field("xml", xmlString, Field.Store.YES, Field.Index.NO));
            // adding the id:
            idxDocument.add(
                    new Field("id", elementMap.get(xmlString.trim().replaceAll("id=\"id_[0-9]*\"", "")).id + "",
                            Field.Store.YES, Field.Index.NO));
            // adding all, unstored for retrieval only
            List l = RetrievalToolkit.xpathQuery(semElement, "*//*", null);
            for (Iterator it3 = l.iterator(); it3.hasNext();) {
                Element e = (Element) it3.next();
                all.append(e.getTextTrim());
                all.append(" ");
            }
            idxDocument.add(new Field("all", all.toString(), Field.Store.NO, Field.Index.TOKENIZED));

            writer.addDocument(idxDocument);

        }
        // now optimize and close the index:
        // todo: open index for appending and/or updating
        writer.optimize();
        writer.close();

        // Now we can create the powerset for each existing graph
        // (based on sorted node ids) and store
        // all resulting graphs within an index.
        // ----------------------------------------------------------
        if (statusBar != null)
            statusBar.setStatus("Creating and merging powersets of available graphs");
        HashMap<Graph, HashSet<String>> graph2document = new HashMap<Graph, HashSet<String>>(
                descriptions.length);
        for (int i = 0; i < descriptions.length; i++) {
            try {
                Element e = builder.build(new FileInputStream(descriptions[i])).getRootElement();
                List l = RetrievalToolkit.xpathQuery(e, "//Semantic/SemanticBase", null);
                HashMap<String, Integer> docID2overallID = new HashMap<String, Integer>(l.size());
                LinkedList<Relation> relations = new LinkedList<Relation>();
                LinkedList<Integer> nodes = new LinkedList<Integer>();
                for (Iterator iterator = l.iterator(); iterator.hasNext();) {
                    Element semanticElement = (Element) iterator.next();
                    String xmlString = outputter.outputString(semanticElement);
                    int id = elementMap.get(xmlString.trim().replaceAll("id=\"id_[0-9]*\"", "")).id;
                    String docID = semanticElement.getAttribute("id").getValue();
                    docID2overallID.put(docID, id);
                    nodes.add(id);
                }
                // get all relations with global ids and eliminate inverse relations
                l = RetrievalToolkit.xpathQuery(e, "//Graph/Relation", null);
                for (Iterator iterator = l.iterator(); iterator.hasNext();) {
                    Element relation = (Element) iterator.next();
                    int source = docID2overallID.get(relation.getAttribute("source").getValue().substring(1));
                    int target = docID2overallID.get(relation.getAttribute("target").getValue().substring(1));
                    String type = relation.getAttribute("type").getValue();
                    type = type.substring(type.lastIndexOf(':') + 1);
                    Relation r = eliminateInverse(new Relation(source, target, type));
                    relations.add(r);
                }

                // now create a graph object
                Collections.sort(nodes);
                Collections.sort(relations);
                LinkedList<Node> nodeList = new LinkedList<Node>();
                for (Iterator<Integer> iterator = nodes.iterator(); iterator.hasNext();) {
                    nodeList.add(new Node(iterator.next()));
                }
                Graph g = new Graph(nodeList, relations);
                //                    List<Graph> powerSet = new LinkedList<Graph>();
                //                    powerSet.add(g);
                HashSet<String> docs = new HashSet<String>(1);
                docs.add(descriptions[i]);
                graph2document.put(g, docs);
                /*
                        
                                    // add all these subgraphs and the reference to the document to
                                    // a data structure:
                                    for (Iterator<Graph> iterator = powerSet.iterator(); iterator.hasNext();) {
                Graph graph = iterator.next();
                //                        List<Graph> relationsPowerSet = graph.getPowerSetOfRelations();
                //                        for (Iterator<Graph> iterator1 = relationsPowerSet.iterator(); iterator1.hasNext();) {
                //                            Graph graph1 = iterator1.next();
                //                        }
                // add graph if not trivial:
                if (graph.getNodes().size() > 1) {
                    // containsKey for Graph does not match my needs -
                    // different graph objects reference the same graph!
                    if (string2graph.containsKey(graph.toString())) {
                        graph = string2graph.get(graph.toString());
                        graph2document.get(graph).add(descriptions[i]);
                    } else {
                        HashSet<String> docs = new HashSet<String>(1);
                        docs.add(descriptions[i]);
                        graph2document.put(graph, docs);
                    }
                }
                                    }
                */
            } catch (JDOMException e1) {
                System.err.println("Exception in document #" + i + ": " + e1.getMessage());
            }
        }

        HashMap<String, Graph> str2graph = new HashMap<String, Graph>(graph2document.size() / 2);
        HashMap<Graph, HashSet<String>> g2d = new HashMap<Graph, HashSet<String>>(descriptions.length);

        /*
        For now we reduce the number of graphs by identifiying and merging duplicates and
        remove redundant entries:
        */
        for (Iterator<Graph> iterator = graph2document.keySet().iterator(); iterator.hasNext();) {
            Graph g = iterator.next();
            if (str2graph.containsKey(g.toString())) {
                g2d.get(str2graph.get(g.toString())).addAll(graph2document.get(g));
            } else {
                str2graph.put(g.toString(), g);
                g2d.put(g, graph2document.get(g));
            }
        }
        graph2document = g2d;
        System.out.println(graph2document.size() + " non trivial different graphs were found");
        // now put all the available graphs into an index:
        // -----------------------------------------------
        // todo: create real fast storable index of subgraphs instead of file :-) possible candidate a trie

        // for now we will store a simple text file:
        if (statusBar != null)
            statusBar.setStatus("Storing powersets of available graphs as file");
        String indexFile;
        if (!pathToIndex.endsWith(File.separator)) {
            indexFile = pathToIndex + File.separator + "idx_graphs.list";
        } else {
            indexFile = pathToIndex + "idx_graphs.list";
        }
        File f = new File(indexFile);
        BufferedWriter bw = new BufferedWriter(
                new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(f, false))));
        for (Iterator<Graph> iterator = graph2document.keySet().iterator(); iterator.hasNext();) {
            Graph g = iterator.next();
            bw.write(g.toString());
            for (Iterator<String> iterator1 = graph2document.get(g).iterator(); iterator1.hasNext();) {
                String s = iterator1.next();
                bw.write("|" + s);
            }
            bw.write("\n");
        }
        bw.close();
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:au.edu.unimelb.csse.join.JoinFunctionalTest.java

License:Apache License

/**
 * This test is actually commented out.. to run the test.. match counting has to be enabled in JoinLogic
 * @throws Exception//from   w ww  .j a  v a  2 s .c o  m
 */
public void testNumberOfCallsToMatch() throws Exception {
    String sent = "(NP" + "(NP" + "(DT The)" + "(NN year))" + "(NP" + "(NP(CD 1956))" + "(PP" + "(IN in)"
            + "(NP(JJ rugby)(NN union))" + ")" + ")" + "(. .)" + ")";
    Analyzer analyser = new FastStringAnalyser();
    RAMDirectory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, analyser, true, IndexWriter.MaxFieldLength.UNLIMITED);

    Document d = new Document();
    d.add(new Field("sent", sent, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS,
            Field.TermVector.WITH_POSITIONS));
    writer.addDocument(d);

    writer.close();

    IndexSearcher searcher = new IndexSearcher(dir);
    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE, false, 6);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE_WITH_FC, false, 1);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP, false, 2);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, false, 1);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE, true, 6);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE_WITH_FC, true, 5);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP, true, 6);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, true, 5);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.SIMPLE, false, 23);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.SIMPLE_WITH_FC, false, 10);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.EARLY_STOP, false, 10);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, false, 8);

}

From source file:au.edu.unimelb.csse.join.JoinFunctionalTest.java

License:Apache License

public void testFilterjoin() throws Exception {
    String sent = "(NP" + "(NP" + "(DT The)" + "(NN year))" + "(NP" + "(NP(CD 1956))" + "(PP" + "(IN in)"
            + "(NP(JJ rugby)(NN union))" + ")" + ")" + "(. .)" + ")";
    Analyzer analyser = new FastStringAnalyser();
    RAMDirectory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, analyser, true, IndexWriter.MaxFieldLength.UNLIMITED);

    Document d = new Document();
    d.add(new Field("sent", sent, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS,
            Field.TermVector.WITH_POSITIONS));
    writer.addDocument(d);// www . j a va2s . co m

    writer.close();

    IndexSearcher searcher = new IndexSearcher(dir);

    boolean[] lookaheadOptions = new boolean[] { false, true };
    for (TermJoinType type : TermJoinType.values()) {
        for (boolean lookahead : lookaheadOptions) {
            QueryBuilder builder = new QueryBuilder("//PP[/IN AND /NP]");
            TreebankQuery query = builder.parse(type, lookahead);
            SimpleHitCollector hitCollector = new SimpleHitCollector(10);
            searcher.search(query, hitCollector);
            assertEquals(1, hitCollector.totalHits);
        }
    }

    QueryBuilder builder = new QueryBuilder("//PP[/IN AND /NP/JJ/rugby]");
    TreebankQuery query = builder.parse(TermJoinType.SIMPLE, true);
    SimpleHitCollector hitCollector = new SimpleHitCollector(10);
    searcher.search(query, hitCollector);
    assertEquals(1, hitCollector.totalHits);

}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Creates the IRMNG homonym index based on the DWCA and species homonyms supplied from the NSL
 * @param exportsDir/*w  w  w.ja  v  a 2s. co m*/
 * @param indexDir
 * @throws Exception
 */
public void createIrmngIndex(String exportsDir, String indexDir) throws Exception {
    Analyzer analyzer = new LowerCaseKeywordAnalyzer();
    IndexWriter irmngWriter = createIndexWriter(new File(indexDir + File.separator + "irmng"), analyzer, true);
    indexIrmngDwcA(irmngWriter, irmngDwcaDirectory);
    indexIRMNG(irmngWriter, exportsDir + File.separator + "ala-species-homonyms.txt", RankType.SPECIES);
    irmngWriter.forceMerge(1);
    irmngWriter.close();
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

public void createIndex(String exportsDir, String indexDir, String acceptedFile, String synonymFile,
        String irmngDwca, boolean generateSciNames, boolean generateCommonNames) throws Exception {

    Analyzer analyzer = new LowerCaseKeywordAnalyzer();
    //generate the extra id index
    createExtraIdIndex(indexDir + File.separator + "id",
            new File(exportsDir + File.separator + "identifiers.txt"));
    if (generateSciNames) {
        indexALA(createIndexWriter(new File(indexDir + File.separator + "cb"), analyzer, true), acceptedFile,
                synonymFile);//exportsDir + File.separator + "ala_accepted_concepts_dump.txt");//, exportsDir + File.separator + lexFile);
        //IRMNG index to aid in the resolving of homonyms
        IndexWriter irmngWriter = createIndexWriter(new File(indexDir + File.separator + "irmng"), analyzer,
                true);/* ww  w.  j a  v a 2 s.c  om*/
        indexIrmngDwcA(irmngWriter, irmngDwca);

        indexIRMNG(irmngWriter, exportsDir + File.separator + "ala-species-homonyms.txt", RankType.SPECIES);
        irmngWriter.forceMerge(1);
        irmngWriter.close();
    }
    if (generateCommonNames) {
        //vernacular index to search for common names
        indexCommonNames(createIndexWriter(new File(indexDir + File.separator + "vernacular"),
                new KeywordAnalyzer(), true), exportsDir, indexDir);
    }
}