List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:axiom.db.utils.LuceneManipulator.java
License:Open Source License
public void compress(String dbDir) throws Exception { System.setProperty("org.apache.lucene.FSDirectory.class", "org.apache.lucene.store.TransFSDirectory"); File dbhome = new File(dbDir); String url = getUrl(dbhome);/* www. j a v a 2 s .c om*/ FSDirectory indexDir = FSDirectory.getDirectory(dbhome, false); if (indexDir instanceof TransFSDirectory) { FSDirectory.setDisableLocks(true); TransFSDirectory d = (TransFSDirectory) indexDir; d.setDriverClass(DRIVER_CLASS); d.setUrl(url); d.setUser(null); d.setPassword(null); } File ndbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_tmp"); File olddbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_old"); FSDirectory nindexDir = FSDirectory.getDirectory(ndbhome, true); if (nindexDir instanceof TransFSDirectory) { FSDirectory.setDisableLocks(true); TransFSDirectory d = (TransFSDirectory) nindexDir; d.setDriverClass(DRIVER_CLASS); d.setUrl(url); d.setUser(null); d.setPassword(null); } IndexSearcher searcher = null; IndexWriter writer = null; LuceneManager lmgr = null; try { searcher = new IndexSearcher(indexDir); PerFieldAnalyzerWrapper a = LuceneManager.buildAnalyzer(); writer = IndexWriterManager.getWriter(nindexDir, a, true); final int numDocs = searcher.getIndexReader().numDocs(); HashSet deldocs = new HashSet(); HashMap infos = new HashMap(); for (int i = 0; i < numDocs; i++) { Document doc = searcher.doc(i); String delprop = doc.get(DeletedInfos.DELETED); final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR + doc.get(LuceneManager.LAYER_OF_SAVE); if (delprop != null && "true".equals(delprop)) { deldocs.add(id); } else { Object v; if ((v = infos.get(id)) == null) { infos.put(id, new Integer(i)); } else { final String lmod = doc.get(LuceneManager.LASTMODIFIED); final String lmod_prev = searcher.doc(((Integer) v).intValue()).get("_lastmodified"); if (lmod_prev == null || (lmod != null && lmod.compareTo(lmod_prev) > 0)) { infos.put(id, new Integer(i)); } } } } ArrayList listOfMaps = new ArrayList(); for (int i = 0; i < numDocs; i++) { Document doc = searcher.doc(i); String delprop = doc.get(DeletedInfos.DELETED); String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE); int layer = -1; try { layer = Integer.parseInt(layerStr); } catch (Exception ex) { layer = -1; } final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR + doc.get(LuceneManager.LAYER_OF_SAVE); if (delprop != null && "true".equals(delprop)) { continue; } else if (id != null && deldocs.contains(id)) { continue; } Integer idx = (Integer) infos.get(id); if (idx != null && i != idx.intValue()) { continue; } Document ndoc = convertDocument(doc); if (ndoc != null) { writer.addDocument(ndoc); } } } catch (Exception ex) { ex.printStackTrace(); throw new RuntimeException(ex); } finally { if (searcher != null) { try { searcher.close(); } catch (Exception ex) { } } if (lmgr != null) { lmgr.shutdown(); lmgr = null; } indexDir.close(); SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(indexDir); sinfos.clear(); IndexObjectsFactory.removeDeletedInfos(indexDir); } Connection conn = null; boolean exceptionOccured = false; try { if (writer != null) { conn = DriverManager.getConnection(url); conn.setAutoCommit(false); writer.close(); writer.flushCache(); LuceneManager.commitSegments(null, conn, dbhome, writer.getDirectory()); writer.finalizeTrans(); } } catch (Exception ex) { ex.printStackTrace(); exceptionOccured = true; throw new RuntimeException(ex); } finally { if (conn != null) { try { if (!conn.getAutoCommit()) { if (!exceptionOccured) { conn.commit(); } else { conn.rollback(); } } conn.close(); } catch (Exception ex) { ex.printStackTrace(); } conn = null; } nindexDir.close(); SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(nindexDir); sinfos.clear(); IndexObjectsFactory.removeDeletedInfos(nindexDir); } File[] files = dbhome.listFiles(); for (int i = 0; i < files.length; i++) { if (!files[i].isDirectory()) { files[i].delete(); } } files = ndbhome.listFiles(); for (int i = 0; i < files.length; i++) { if (!files[i].isDirectory()) { File nfile = new File(dbhome, files[i].getName()); files[i].renameTo(nfile); } } if (!FileUtils.deleteDir(ndbhome)) { throw new Exception("Could not delete " + ndbhome); } }
From source file:axiom.objectmodel.dom.convert.LuceneConvertor.java
License:Open Source License
public void convert(Application app, File dbhome) throws Exception { FSDirectory indexDir = FSDirectory.getDirectory(dbhome, false); if (indexDir instanceof TransFSDirectory) { FSDirectory.setDisableLocks(true); TransFSDirectory d = (TransFSDirectory) indexDir; TransSource source = app.getTransSource(); d.setDriverClass(source.getDriverClass()); d.setUrl(source.getUrl());//from w w w .j a va 2 s . c o m d.setUser(source.getUser()); d.setPassword(source.getPassword()); } File ndbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_tmp"); File olddbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_old"); FSDirectory nindexDir = FSDirectory.getDirectory(ndbhome, true); if (nindexDir instanceof TransFSDirectory) { FSDirectory.setDisableLocks(true); TransFSDirectory d = (TransFSDirectory) nindexDir; TransSource source = app.getTransSource(); d.setDriverClass(source.getDriverClass()); d.setUrl(source.getUrl()); d.setUser(source.getUser()); d.setPassword(source.getPassword()); } IndexSearcher searcher = null; IndexWriter writer = null; LuceneManager lmgr = null; try { searcher = new IndexSearcher(indexDir); PerFieldAnalyzerWrapper a = LuceneManager.buildAnalyzer(); writer = IndexWriterManager.getWriter(nindexDir, a, true); final int numDocs = searcher.getIndexReader().numDocs(); HashSet deldocs = new HashSet(); HashMap infos = new HashMap(); for (int i = 0; i < numDocs; i++) { Document doc = searcher.doc(i); String delprop = doc.get(DeletedInfos.DELETED); String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE); int layer = -1; try { layer = Integer.parseInt(layerStr); } catch (Exception ex) { layer = -1; } final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR + doc.get(LuceneManager.LAYER_OF_SAVE); if (delprop != null && "true".equals(delprop)/* && layer == DbKey.LIVE_LAYER*/) { deldocs.add(id); } else { Object v; if ((v = infos.get(id)) == null) { infos.put(id, new Integer(i)); } else { final String lmod = doc.get(LuceneManager.LASTMODIFIED); final String lmod_prev = searcher.doc(((Integer) v).intValue()).get("_lastmodified"); if (lmod_prev == null || (lmod != null && lmod.compareTo(lmod_prev) > 0)) { infos.put(id, new Integer(i)); } } } } ArrayList listOfMaps = new ArrayList(); for (int i = 0; i < numDocs; i++) { Document doc = searcher.doc(i); String delprop = doc.get(DeletedInfos.DELETED); String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE); int layer = -1; try { layer = Integer.parseInt(layerStr); } catch (Exception ex) { layer = -1; } final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR + doc.get(LuceneManager.LAYER_OF_SAVE); if (delprop != null && "true".equals(delprop)) { continue; } else if (id != null && deldocs.contains(id)/* && layer == DbKey.LIVE_LAYER*/) { continue; } Integer idx = (Integer) infos.get(id); if (idx != null && i != idx.intValue()) { continue; } Document ndoc = convertDocument(doc); if (this.recordNodes) { listOfMaps.add(LuceneManager.luceneDocumentToMap(doc)); } if (ndoc != null) { writer.addDocument(ndoc); } } if (this.recordNodes) { lmgr = new LuceneManager(this.app, false, true); this.allNodes = new HashMap(); final int size = listOfMaps.size(); for (int i = 0; i < size; i++) { HashMap m = (HashMap) listOfMaps.get(i); INode n = lmgr.mapToNode(m); this.allNodes.put(n.getID(), getPath(n)); n = null; } } } catch (Exception ex) { ex.printStackTrace(); throw new RuntimeException(ex); } finally { if (searcher != null) { try { searcher.close(); } catch (Exception ex) { app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), ex); } } if (lmgr != null) { lmgr.shutdown(); lmgr = null; } indexDir.close(); SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(indexDir); sinfos.clear(); IndexObjectsFactory.removeDeletedInfos(indexDir); } Connection conn = null; boolean exceptionOccured = false; try { if (writer != null) { TransSource ts = app.getTransSource(); conn = ts.getConnection(); DatabaseMetaData dmd = conn.getMetaData(); ResultSet rs = dmd.getColumns(null, null, "Lucene", "version"); if (!rs.next()) { final String alterTbl = "ALTER TABLE Lucene ADD version INT NOT NULL DEFAULT 1"; PreparedStatement pstmt = null; try { pstmt = conn.prepareStatement(alterTbl); pstmt.execute(); } catch (SQLException sqle) { app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), sqle); } finally { if (pstmt != null) { pstmt.close(); pstmt = null; } } } rs.close(); rs = null; writer.close(); writer.flushCache();//TODO:writer.writeSegmentsFile(); LuceneManager.commitSegments(conn, app, writer.getDirectory()); writer.finalizeTrans(); this.updateSQL(conn); } } catch (Exception ex) { ex.printStackTrace(); exceptionOccured = true; throw new RuntimeException(ex); } finally { if (conn != null) { try { if (!conn.getAutoCommit()) { if (!exceptionOccured) { conn.commit(); } else { conn.rollback(); } } conn.close(); } catch (Exception ex) { app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), ex); } conn = null; } nindexDir.close(); SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(nindexDir); sinfos.clear(); IndexObjectsFactory.removeDeletedInfos(nindexDir); } if (!dbhome.renameTo(olddbhome)) { throw new Exception("Could not move the old version of the db into " + olddbhome); } if (!ndbhome.renameTo(dbhome)) { throw new Exception("Could not move the newer version of the db into " + dbhome); } File oldBlobDir = new File(olddbhome, "blob"); File newBlobDir = new File(ndbhome, "blob"); oldBlobDir.renameTo(newBlobDir); if (!FileUtils.deleteDir(olddbhome)) { throw new Exception("Could not delete the old version of the db at " + olddbhome); } }
From source file:back.Indexer.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * /* w w w .j a v a 2 s.c om*/ * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:bajavista.IndiceInvertido.java
private static void agregarDoc(IndexWriter w, String idUser, String timestamp, String text, String objective, String subjective, String positive, String negative, String need) throws IOException { Document doc = new Document(); doc.add(new StringField("idUser", idUser, Field.Store.YES)); doc.add(new StringField("timestamp", timestamp, Field.Store.YES)); doc.add(new TextField("text", text, Field.Store.YES)); doc.add(new StringField("objective", objective, Field.Store.YES)); doc.add(new StringField("subjective", subjective, Field.Store.YES)); doc.add(new StringField("positive", positive, Field.Store.YES)); doc.add(new StringField("negative", negative, Field.Store.YES)); doc.add(new StringField("need", need, Field.Store.YES)); w.addDocument(doc); }
From source file:be.iRail.BeLaws.Indexer.java
License:Apache License
private void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); }//ww w . j ava 2 s . c o m } } else { System.out.println("adding " + file); try { writer.addDocument(FileDocument.Document(file)); } // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help catch (FileNotFoundException fnfe) { } } } }
From source file:BlockBuilding.AbstractBlockBuilding.java
License:Apache License
protected void indexEntities(IndexWriter index, List<EntityProfile> entities) { try {//from ww w.ja va2s. c o m int counter = 0; for (EntityProfile profile : entities) { Document doc = new Document(); doc.add(new StoredField(DOC_ID, counter++)); for (Attribute attribute : profile.getAttributes()) { getBlockingKeys(attribute.getValue()).stream().filter((key) -> (0 < key.trim().length())) .forEach((key) -> { doc.add(new StringField(VALUE_LABEL, key.trim(), Field.Store.YES)); }); } index.addDocument(doc); } } catch (IOException ex) { LOGGER.log(Level.SEVERE, null, ex); } }
From source file:BlockBuilding.AbstractIndexBasedMethod.java
License:Open Source License
protected void indexEntities(IndexWriter index, List<EntityProfile> entities) { try {/*from w ww. j a va 2 s . com*/ int counter = 0; for (EntityProfile profile : entities) { Document doc = new Document(); doc.add(new StoredField(DOC_ID, counter++)); for (Attribute attribute : profile.getAttributes()) { getBlockingKeys(attribute.getValue()).stream().filter((key) -> (0 < key.trim().length())) .forEach((key) -> { doc.add(new StringField(VALUE_LABEL, key.trim(), Field.Store.YES)); totalWords++; }); } index.addDocument(doc); } } catch (IOException ex) { ex.printStackTrace(); } }
From source file:BlockBuilding.AbstractTYPiMatch.java
License:Open Source License
@Override protected void indexEntities(IndexWriter index, List<EntityProfile> entities) { try {/*from ww w.j a va 2 s.c om*/ int counter = 0; for (EntityProfile profile : entities) { Document doc = new Document(); doc.add(new StoredField(DOC_ID, counter++)); String entitySuffix = ""; if (!firstPass) { entitySuffix = CLUSTER_PREFIX + entityTypes[entityCounter++] + CLUSTER_SUFFIX; } for (Attribute attribute : profile.getAttributes()) { for (String token : getTokens(attribute.getValue())) { if (0 < token.trim().length()) { doc.add(new StringField(VALUE_LABEL, token.trim() + entitySuffix, Field.Store.YES)); } } } index.addDocument(doc); } } catch (IOException ex) { ex.printStackTrace(); } }
From source file:BlockBuilding.AttributeClusteringBlocking.java
License:Apache License
protected void indexEntities(int sourceId, IndexWriter index, List<EntityProfile> entities) { try {/*from ww w . ja v a 2 s . c o m*/ int counter = 0; for (EntityProfile profile : entities) { Document doc = new Document(); doc.add(new StoredField(DOC_ID, counter++)); for (Attribute attribute : profile.getAttributes()) { Integer clusterId = attributeClusters[sourceId].get(attribute.getName()); if (clusterId == null) { LOGGER.log(Level.WARNING, "No cluster id found for attribute name\t:\t{0}" + ".\nCorresponding attribute value\t:\t{1}", new Object[] { attribute.getName(), attribute.getValue() }); continue; } String clusterSuffix = CLUSTER_PREFIX + clusterId + CLUSTER_SUFFIX; for (String token : getTokens(attribute.getValue())) { if (0 < token.trim().length()) { doc.add(new StringField(VALUE_LABEL, token.trim() + clusterSuffix, Field.Store.YES)); } } } index.addDocument(doc); } } catch (IOException ex) { LOGGER.log(Level.SEVERE, null, ex); } }
From source file:BlockBuilding.MemoryBased.SchemaBased.AbstractSchemaBasedMethod.java
License:Open Source License
@Override protected void indexEntities(IndexWriter index, List<EntityProfile> entities) { System.out.println("Indexing " + entities.size() + " entities..."); try {//w ww . ja v a2 s . c o m int counter = 0; for (EntityProfile profile : entities) { AbstractProfile aProfile = getAbstractProfile(profile); Document doc = new Document(); doc.add(new StoredField(DOC_ID, counter)); for (int keyId : blockingKeys) { for (String key : getBlockingKeys(keyId, aProfile)) { if (0 < key.trim().length()) { doc.add(new StringField(VALUE_LABEL, key.trim(), Field.Store.YES)); } } } index.addDocument(doc); counter++; } } catch (IOException ex) { ex.printStackTrace(); } }