Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:axiom.db.utils.LuceneManipulator.java

License:Open Source License

public void compress(String dbDir) throws Exception {
    System.setProperty("org.apache.lucene.FSDirectory.class", "org.apache.lucene.store.TransFSDirectory");

    File dbhome = new File(dbDir);
    String url = getUrl(dbhome);/* www.  j  a  v a 2 s .c  om*/

    FSDirectory indexDir = FSDirectory.getDirectory(dbhome, false);
    if (indexDir instanceof TransFSDirectory) {
        FSDirectory.setDisableLocks(true);
        TransFSDirectory d = (TransFSDirectory) indexDir;
        d.setDriverClass(DRIVER_CLASS);
        d.setUrl(url);
        d.setUser(null);
        d.setPassword(null);
    }

    File ndbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_tmp");
    File olddbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_old");
    FSDirectory nindexDir = FSDirectory.getDirectory(ndbhome, true);
    if (nindexDir instanceof TransFSDirectory) {
        FSDirectory.setDisableLocks(true);
        TransFSDirectory d = (TransFSDirectory) nindexDir;
        d.setDriverClass(DRIVER_CLASS);
        d.setUrl(url);
        d.setUser(null);
        d.setPassword(null);
    }

    IndexSearcher searcher = null;
    IndexWriter writer = null;
    LuceneManager lmgr = null;

    try {
        searcher = new IndexSearcher(indexDir);
        PerFieldAnalyzerWrapper a = LuceneManager.buildAnalyzer();
        writer = IndexWriterManager.getWriter(nindexDir, a, true);
        final int numDocs = searcher.getIndexReader().numDocs();

        HashSet deldocs = new HashSet();
        HashMap infos = new HashMap();
        for (int i = 0; i < numDocs; i++) {
            Document doc = searcher.doc(i);
            String delprop = doc.get(DeletedInfos.DELETED);
            final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR
                    + doc.get(LuceneManager.LAYER_OF_SAVE);
            if (delprop != null && "true".equals(delprop)) {
                deldocs.add(id);
            } else {
                Object v;
                if ((v = infos.get(id)) == null) {
                    infos.put(id, new Integer(i));
                } else {
                    final String lmod = doc.get(LuceneManager.LASTMODIFIED);
                    final String lmod_prev = searcher.doc(((Integer) v).intValue()).get("_lastmodified");
                    if (lmod_prev == null || (lmod != null && lmod.compareTo(lmod_prev) > 0)) {
                        infos.put(id, new Integer(i));
                    }
                }
            }
        }

        ArrayList listOfMaps = new ArrayList();

        for (int i = 0; i < numDocs; i++) {
            Document doc = searcher.doc(i);
            String delprop = doc.get(DeletedInfos.DELETED);
            String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE);
            int layer = -1;
            try {
                layer = Integer.parseInt(layerStr);
            } catch (Exception ex) {
                layer = -1;
            }
            final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR
                    + doc.get(LuceneManager.LAYER_OF_SAVE);
            if (delprop != null && "true".equals(delprop)) {
                continue;
            } else if (id != null && deldocs.contains(id)) {
                continue;
            }

            Integer idx = (Integer) infos.get(id);
            if (idx != null && i != idx.intValue()) {
                continue;
            }

            Document ndoc = convertDocument(doc);

            if (ndoc != null) {
                writer.addDocument(ndoc);
            }
        }

    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (Exception ex) {
            }
        }

        if (lmgr != null) {
            lmgr.shutdown();
            lmgr = null;
        }

        indexDir.close();
        SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(indexDir);
        sinfos.clear();
        IndexObjectsFactory.removeDeletedInfos(indexDir);
    }

    Connection conn = null;
    boolean exceptionOccured = false;

    try {
        if (writer != null) {
            conn = DriverManager.getConnection(url);
            conn.setAutoCommit(false);
            writer.close();
            writer.flushCache();
            LuceneManager.commitSegments(null, conn, dbhome, writer.getDirectory());
            writer.finalizeTrans();
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        exceptionOccured = true;
        throw new RuntimeException(ex);
    } finally {
        if (conn != null) {
            try {
                if (!conn.getAutoCommit()) {
                    if (!exceptionOccured) {
                        conn.commit();
                    } else {
                        conn.rollback();
                    }
                }
                conn.close();
            } catch (Exception ex) {
                ex.printStackTrace();
            }
            conn = null;
        }

        nindexDir.close();
        SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(nindexDir);
        sinfos.clear();
        IndexObjectsFactory.removeDeletedInfos(nindexDir);
    }

    File[] files = dbhome.listFiles();
    for (int i = 0; i < files.length; i++) {
        if (!files[i].isDirectory()) {
            files[i].delete();
        }
    }

    files = ndbhome.listFiles();
    for (int i = 0; i < files.length; i++) {
        if (!files[i].isDirectory()) {
            File nfile = new File(dbhome, files[i].getName());
            files[i].renameTo(nfile);
        }
    }

    if (!FileUtils.deleteDir(ndbhome)) {
        throw new Exception("Could not delete " + ndbhome);
    }
}

From source file:axiom.objectmodel.dom.convert.LuceneConvertor.java

License:Open Source License

public void convert(Application app, File dbhome) throws Exception {
    FSDirectory indexDir = FSDirectory.getDirectory(dbhome, false);
    if (indexDir instanceof TransFSDirectory) {
        FSDirectory.setDisableLocks(true);
        TransFSDirectory d = (TransFSDirectory) indexDir;
        TransSource source = app.getTransSource();
        d.setDriverClass(source.getDriverClass());
        d.setUrl(source.getUrl());//from   w w w  .j a va 2 s  . c o m
        d.setUser(source.getUser());
        d.setPassword(source.getPassword());
    }
    File ndbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_tmp");
    File olddbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_old");
    FSDirectory nindexDir = FSDirectory.getDirectory(ndbhome, true);
    if (nindexDir instanceof TransFSDirectory) {
        FSDirectory.setDisableLocks(true);
        TransFSDirectory d = (TransFSDirectory) nindexDir;
        TransSource source = app.getTransSource();
        d.setDriverClass(source.getDriverClass());
        d.setUrl(source.getUrl());
        d.setUser(source.getUser());
        d.setPassword(source.getPassword());
    }

    IndexSearcher searcher = null;
    IndexWriter writer = null;
    LuceneManager lmgr = null;

    try {
        searcher = new IndexSearcher(indexDir);
        PerFieldAnalyzerWrapper a = LuceneManager.buildAnalyzer();
        writer = IndexWriterManager.getWriter(nindexDir, a, true);
        final int numDocs = searcher.getIndexReader().numDocs();

        HashSet deldocs = new HashSet();
        HashMap infos = new HashMap();
        for (int i = 0; i < numDocs; i++) {
            Document doc = searcher.doc(i);
            String delprop = doc.get(DeletedInfos.DELETED);
            String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE);
            int layer = -1;
            try {
                layer = Integer.parseInt(layerStr);
            } catch (Exception ex) {
                layer = -1;
            }
            final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR
                    + doc.get(LuceneManager.LAYER_OF_SAVE);
            if (delprop != null && "true".equals(delprop)/* && layer == DbKey.LIVE_LAYER*/) {
                deldocs.add(id);
            } else {
                Object v;
                if ((v = infos.get(id)) == null) {
                    infos.put(id, new Integer(i));
                } else {
                    final String lmod = doc.get(LuceneManager.LASTMODIFIED);
                    final String lmod_prev = searcher.doc(((Integer) v).intValue()).get("_lastmodified");
                    if (lmod_prev == null || (lmod != null && lmod.compareTo(lmod_prev) > 0)) {
                        infos.put(id, new Integer(i));
                    }
                }
            }
        }

        ArrayList listOfMaps = new ArrayList();

        for (int i = 0; i < numDocs; i++) {
            Document doc = searcher.doc(i);
            String delprop = doc.get(DeletedInfos.DELETED);
            String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE);
            int layer = -1;
            try {
                layer = Integer.parseInt(layerStr);
            } catch (Exception ex) {
                layer = -1;
            }
            final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR
                    + doc.get(LuceneManager.LAYER_OF_SAVE);
            if (delprop != null && "true".equals(delprop)) {
                continue;
            } else if (id != null && deldocs.contains(id)/* && layer == DbKey.LIVE_LAYER*/) {
                continue;
            }

            Integer idx = (Integer) infos.get(id);
            if (idx != null && i != idx.intValue()) {
                continue;
            }

            Document ndoc = convertDocument(doc);

            if (this.recordNodes) {
                listOfMaps.add(LuceneManager.luceneDocumentToMap(doc));
            }

            if (ndoc != null) {
                writer.addDocument(ndoc);
            }
        }

        if (this.recordNodes) {
            lmgr = new LuceneManager(this.app, false, true);
            this.allNodes = new HashMap();
            final int size = listOfMaps.size();
            for (int i = 0; i < size; i++) {
                HashMap m = (HashMap) listOfMaps.get(i);
                INode n = lmgr.mapToNode(m);
                this.allNodes.put(n.getID(), getPath(n));
                n = null;
            }
        }

    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (Exception ex) {
                app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), ex);
            }
        }

        if (lmgr != null) {
            lmgr.shutdown();
            lmgr = null;
        }

        indexDir.close();
        SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(indexDir);
        sinfos.clear();
        IndexObjectsFactory.removeDeletedInfos(indexDir);
    }

    Connection conn = null;
    boolean exceptionOccured = false;

    try {
        if (writer != null) {
            TransSource ts = app.getTransSource();
            conn = ts.getConnection();

            DatabaseMetaData dmd = conn.getMetaData();
            ResultSet rs = dmd.getColumns(null, null, "Lucene", "version");
            if (!rs.next()) {
                final String alterTbl = "ALTER TABLE Lucene ADD version INT NOT NULL DEFAULT 1";
                PreparedStatement pstmt = null;
                try {
                    pstmt = conn.prepareStatement(alterTbl);
                    pstmt.execute();
                } catch (SQLException sqle) {
                    app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), sqle);
                } finally {
                    if (pstmt != null) {
                        pstmt.close();
                        pstmt = null;
                    }
                }
            }
            rs.close();
            rs = null;

            writer.close();
            writer.flushCache();//TODO:writer.writeSegmentsFile();
            LuceneManager.commitSegments(conn, app, writer.getDirectory());
            writer.finalizeTrans();

            this.updateSQL(conn);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        exceptionOccured = true;
        throw new RuntimeException(ex);
    } finally {
        if (conn != null) {
            try {
                if (!conn.getAutoCommit()) {
                    if (!exceptionOccured) {
                        conn.commit();
                    } else {
                        conn.rollback();
                    }
                }
                conn.close();
            } catch (Exception ex) {
                app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), ex);
            }
            conn = null;
        }

        nindexDir.close();
        SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(nindexDir);
        sinfos.clear();
        IndexObjectsFactory.removeDeletedInfos(nindexDir);
    }

    if (!dbhome.renameTo(olddbhome)) {
        throw new Exception("Could not move the old version of the db into " + olddbhome);
    }

    if (!ndbhome.renameTo(dbhome)) {
        throw new Exception("Could not move the newer version of the db into " + dbhome);
    }

    File oldBlobDir = new File(olddbhome, "blob");
    File newBlobDir = new File(ndbhome, "blob");
    oldBlobDir.renameTo(newBlobDir);

    if (!FileUtils.deleteDir(olddbhome)) {
        throw new Exception("Could not delete the old version of the db at " + olddbhome);
    }
}

From source file:back.Indexer.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /*  w  w  w  .j a v  a 2  s.c  om*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:bajavista.IndiceInvertido.java

private static void agregarDoc(IndexWriter w, String idUser, String timestamp, String text, String objective,
        String subjective, String positive, String negative, String need) throws IOException {
    Document doc = new Document();

    doc.add(new StringField("idUser", idUser, Field.Store.YES));
    doc.add(new StringField("timestamp", timestamp, Field.Store.YES));
    doc.add(new TextField("text", text, Field.Store.YES));
    doc.add(new StringField("objective", objective, Field.Store.YES));
    doc.add(new StringField("subjective", subjective, Field.Store.YES));
    doc.add(new StringField("positive", positive, Field.Store.YES));
    doc.add(new StringField("negative", negative, Field.Store.YES));
    doc.add(new StringField("need", need, Field.Store.YES));

    w.addDocument(doc);
}

From source file:be.iRail.BeLaws.Indexer.java

License:Apache License

private void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }//ww  w  . j ava 2 s .  c o m
            }
        } else {
            System.out.println("adding " + file);
            try {
                writer.addDocument(FileDocument.Document(file));
            } // at least on windows, some temporary files raise this exception with an "access denied" message
              // checking if the file can be read doesn't help
            catch (FileNotFoundException fnfe) {
            }
        }
    }
}

From source file:BlockBuilding.AbstractBlockBuilding.java

License:Apache License

protected void indexEntities(IndexWriter index, List<EntityProfile> entities) {
    try {//from ww  w.ja  va2s. c  o m
        int counter = 0;
        for (EntityProfile profile : entities) {
            Document doc = new Document();
            doc.add(new StoredField(DOC_ID, counter++));
            for (Attribute attribute : profile.getAttributes()) {
                getBlockingKeys(attribute.getValue()).stream().filter((key) -> (0 < key.trim().length()))
                        .forEach((key) -> {
                            doc.add(new StringField(VALUE_LABEL, key.trim(), Field.Store.YES));
                        });
            }
            index.addDocument(doc);
        }
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, null, ex);
    }
}

From source file:BlockBuilding.AbstractIndexBasedMethod.java

License:Open Source License

protected void indexEntities(IndexWriter index, List<EntityProfile> entities) {
    try {/*from  w  ww. j  a  va  2 s  .  com*/
        int counter = 0;
        for (EntityProfile profile : entities) {
            Document doc = new Document();
            doc.add(new StoredField(DOC_ID, counter++));
            for (Attribute attribute : profile.getAttributes()) {
                getBlockingKeys(attribute.getValue()).stream().filter((key) -> (0 < key.trim().length()))
                        .forEach((key) -> {
                            doc.add(new StringField(VALUE_LABEL, key.trim(), Field.Store.YES));
                            totalWords++;
                        });
            }
            index.addDocument(doc);
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

From source file:BlockBuilding.AbstractTYPiMatch.java

License:Open Source License

@Override
protected void indexEntities(IndexWriter index, List<EntityProfile> entities) {
    try {/*from   ww w.j a va  2 s.c om*/
        int counter = 0;
        for (EntityProfile profile : entities) {
            Document doc = new Document();
            doc.add(new StoredField(DOC_ID, counter++));

            String entitySuffix = "";
            if (!firstPass) {
                entitySuffix = CLUSTER_PREFIX + entityTypes[entityCounter++] + CLUSTER_SUFFIX;
            }

            for (Attribute attribute : profile.getAttributes()) {
                for (String token : getTokens(attribute.getValue())) {
                    if (0 < token.trim().length()) {
                        doc.add(new StringField(VALUE_LABEL, token.trim() + entitySuffix, Field.Store.YES));
                    }
                }
            }

            index.addDocument(doc);
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

From source file:BlockBuilding.AttributeClusteringBlocking.java

License:Apache License

protected void indexEntities(int sourceId, IndexWriter index, List<EntityProfile> entities) {
    try {/*from   ww w .  ja  v a  2  s . c o  m*/
        int counter = 0;
        for (EntityProfile profile : entities) {
            Document doc = new Document();
            doc.add(new StoredField(DOC_ID, counter++));
            for (Attribute attribute : profile.getAttributes()) {
                Integer clusterId = attributeClusters[sourceId].get(attribute.getName());
                if (clusterId == null) {
                    LOGGER.log(Level.WARNING,
                            "No cluster id found for attribute name\t:\t{0}"
                                    + ".\nCorresponding attribute value\t:\t{1}",
                            new Object[] { attribute.getName(), attribute.getValue() });
                    continue;
                }
                String clusterSuffix = CLUSTER_PREFIX + clusterId + CLUSTER_SUFFIX;
                for (String token : getTokens(attribute.getValue())) {
                    if (0 < token.trim().length()) {
                        doc.add(new StringField(VALUE_LABEL, token.trim() + clusterSuffix, Field.Store.YES));
                    }
                }
            }

            index.addDocument(doc);
        }
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, null, ex);
    }
}

From source file:BlockBuilding.MemoryBased.SchemaBased.AbstractSchemaBasedMethod.java

License:Open Source License

@Override
protected void indexEntities(IndexWriter index, List<EntityProfile> entities) {
    System.out.println("Indexing " + entities.size() + " entities...");
    try {//w  ww . ja v a2 s  . c o m
        int counter = 0;
        for (EntityProfile profile : entities) {
            AbstractProfile aProfile = getAbstractProfile(profile);
            Document doc = new Document();
            doc.add(new StoredField(DOC_ID, counter));
            for (int keyId : blockingKeys) {
                for (String key : getBlockingKeys(keyId, aProfile)) {
                    if (0 < key.trim().length()) {
                        doc.add(new StringField(VALUE_LABEL, key.trim(), Field.Store.YES));
                    }
                }
            }
            index.addDocument(doc);
            counter++;
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}