List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:cn.jcenterhome.web.action.CpAction.java
private List<String> getKeyWord(String text) throws IOException { List<String> keywords = new ArrayList<String>(); if (!Common.empty(text)) { Map<String, Integer> words = new HashMap<String, Integer>(); Analyzer analyzer = new IKAnalyzer(true); StringReader reader = new StringReader(text); TokenStream tokenStream = analyzer.tokenStream("*", reader); TermAttribute termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class); while (tokenStream.incrementToken()) { String word = termAtt.term(); if (word.length() > 1 && Common.strlen(word) > 2) { Integer count = words.get(word); if (count == null) { count = 0;//from w w w. j a v a2 s .c o m } words.put(word, count + 1); } } if (words.size() > 0) { Directory dir = null; IndexSearcher searcher = null; try { String fieldName = "text"; dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.close(); searcher = new IndexSearcher(dir); searcher.setSimilarity(new IKSimilarity()); Set<String> keys = words.keySet(); Map<String, Float> temps = new HashMap<String, Float>(); for (String key : keys) { int count = words.get(key); Query query = IKQueryParser.parse(fieldName, key); TopDocs topDocs = searcher.search(query, 1); if (topDocs.totalHits > 0) { temps.put(key, topDocs.getMaxScore() * count); } } Entry<String, Float>[] keywordEntry = getSortedHashtableByValue(temps); for (Entry<String, Float> entry : keywordEntry) { if (keywords.size() < 5) { keywords.add(entry.getKey()); } } } catch (Exception e) { e.printStackTrace(); } finally { try { searcher.close(); } catch (IOException e) { e.printStackTrace(); } try { dir.close(); } catch (IOException e) { e.printStackTrace(); } } } } return keywords; }
From source file:cn.larry.search.book.index.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);// w w w. j av a2 s .c o m // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 4 would mean // February 17, 1, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static void generateIndex(String path, String id, String title, String content, List<Map<String, String>> dataList) { try {/*from w w w . j a va 2 s.c o m*/ Directory dir = FSDirectory.open(Paths.get(INDEX_PATH + path)); Analyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (Map<String, String> data : dataList) { Document document = new Document(); Field idField = new IntField("id", Integer.valueOf(data.get(id)), Field.Store.YES); Field indexedContentField = new TextField("indexedContent", data.get(title) + SEPARATOR + data.get(content), Field.Store.YES); document.add(idField); document.add(indexedContentField); writer.addDocument(document); if (logger.isInfoEnabled()) { logger.info("add index for : [" + data.get(title) + "]"); } } writer.close(); } catch (Exception e) { logger.error("add index failed ...", e); } }
From source file:com.agiletec.plugins.jacms.aps.system.services.searchengine.IndexerDAO.java
License:Open Source License
/** * Aggiunge un documento nel db del motore di ricerca. * @param document Il documento da aggiungere. * @throws ApsSystemException In caso di errori in accesso al db. */// w w w . ja v a2 s . c o m private synchronized void add(Document document) throws ApsSystemException { try { IndexWriter writer = new IndexWriter(_dir, this.getAnalyzer(), false, new MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH)); writer.addDocument(document); writer.optimize(); writer.close(); } catch (IOException e) { throw new ApsSystemException("Errore nell'aggiunta di un documento", e); } }
From source file:com.aliasi.lingmed.medline.IndexMedline.java
License:Lingpipe license
private void recordFile(IndexWriter indexWriter, String fileName) throws IOException { // if (mLogger.isDebugEnabled()) // mLogger.debug("record file: " + fileName); Document doc = new Document(); Field tagField = new Field(Fields.MEDLINE_DIST_FIELD, Fields.MEDLINE_DIST_VALUE, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(tagField);/* w w w .ja va 2 s . com*/ Field nameField = new Field(Fields.MEDLINE_FILE_FIELD, fileName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(nameField); indexWriter.addDocument(doc); // if (mLogger.isDebugEnabled()) // mLogger.debug("added doc: " + doc.toString()); }
From source file:com.aliasi.lingmed.medline.SearchableMedlineCodec.java
License:Lingpipe license
public static void main(String[] args) throws Exception { org.apache.lucene.store.RAMDirectory directory = new org.apache.lucene.store.RAMDirectory(); // org.apache.lucene.analysis.SimpleAnalyzer analyzer // = new org.apache.lucene.analysis.SimpleAnalyzer(); // org.apache.lucene.analysis.KeywordAnalyzer analyzer // = new org.apache.lucene.analysis.KeywordAnalyzer(); MedlineCodec codec = new MedlineCodec(); Analyzer analyzer = codec.getAnalyzer(); org.apache.lucene.index.IndexWriterConfig iwConf = new org.apache.lucene.index.IndexWriterConfig( org.apache.lucene.util.Version.LUCENE_36, analyzer); iwConf.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND); org.apache.lucene.index.IndexWriter indexWriter = new org.apache.lucene.index.IndexWriter(directory, iwConf);// w ww .ja v a2 s .c o m Document doc = new Document(); doc.add(new Field(Fields.MESH_MINOR_FIELD, "abc", Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field(Fields.MESH_MINOR_FIELD, " xyz efg", Field.Store.NO, Field.Index.ANALYZED)); indexWriter.addDocument(doc); indexWriter.close(); org.apache.lucene.index.IndexReader reader = org.apache.lucene.index.IndexReader.open(directory); org.apache.lucene.search.IndexSearcher searcher = new org.apache.lucene.search.IndexSearcher(reader); org.apache.lucene.queryParser.QueryParser qp = new org.apache.lucene.queryParser.QueryParser( org.apache.lucene.util.Version.LUCENE_36, "foo", analyzer); org.apache.lucene.search.Query query = qp.parse(Fields.MESH_MINOR_FIELD + ":efg"); org.apache.lucene.search.TopDocs hits = searcher.search(query, 1000); System.out.println("hits.length()=" + hits.scoreDocs.length); org.apache.lucene.analysis.TokenStream ts = analyzer.tokenStream(Fields.MESH_MINOR_FIELD, new java.io.StringReader("abc xyz efg")); org.apache.lucene.analysis.tokenattributes.CharTermAttribute terms = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsets = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class); org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute positions = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class); while (ts.incrementToken()) { int increment = positions.getPositionIncrement(); int start = offsets.startOffset(); int end = offsets.endOffset(); String term = terms.toString(); System.out.println("token=|" + term + "|" + " startOffset=" + start + " endOffset=" + end + " positionIncr=" + increment); } }
From source file:com.aperigeek.dropvault.web.service.IndexService.java
License:Open Source License
public void index(String username, String password, String id, Map<String, String> metadata) throws IndexException { try {/*from ww w.j ava2 s .co m*/ Document document = new Document(); document.add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED)); for (Map.Entry<String, String> e : metadata.entrySet()) { if (e.getValue() != null) { document.add(new Field(e.getKey(), e.getValue(), Field.Store.NO, Field.Index.ANALYZED)); } } IndexWriter index = getIndexWriter(username, password); index.addDocument(document); index.close(); } catch (IOException ex) { throw new IndexException(ex); } }
From source file:com.appeligo.lucene.AddDocumentAction.java
License:Apache License
public void performAction(IndexWriter writer) throws IOException { writer.addDocument(doc); }
From source file:com.appspot.socialinquirer.server.service.impl.AnalysisServiceImpl.java
License:Apache License
@Override public List<Tag> getTermVector(String title, String text) { RAMDirectory directory = null;/*w ww . j a v a2s.c o m*/ IndexReader reader = null; Map<String, Tag> tagsMap = new HashMap<String, Tag>(); try { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.add(new Field("body", stripHtmlTags(text, true), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); writer.addDocument(doc); writer.close(); reader = IndexReader.open(directory, true); int numDocs = reader.maxDoc(); for (int i = 0; i < numDocs; i++) { TermFreqVector termFreqVector = reader.getTermFreqVector(i, "title"); pullTags(termFreqVector, tagsMap); termFreqVector = reader.getTermFreqVector(i, "body"); pullTags(termFreqVector, tagsMap); } } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while pulling tags from text.", e); } finally { closeIndexReader(reader); closeRAMDirectory(directory); } ArrayList<Tag> tagsList = new ArrayList<Tag>(tagsMap.values()); Collections.sort(tagsList, new Comparator<Tag>() { @Override public int compare(Tag o1, Tag o2) { return o2.getFreqency() - o1.getFreqency(); } }); return tagsList; }
From source file:com.aurel.track.lucene.index.associatedFields.AbstractAssociatedFieldIndexer.java
License:Open Source License
/** * Reindexes all/*from w w w. j a v a2s . co m*/ */ @Override public synchronized void reIndexAll() { IndexWriter indexWriter = null; try { LOGGER.debug("Reindexing " + getLuceneFieldName() + "s started..."); //initializes the IndexWriter for recreating the index (deletes the previous index) indexWriter = LuceneIndexer.initWriter(true, getIndexWriterID()); if (indexWriter == null) { LOGGER.error("IndexWriter null by indexing"); return; } List allIndexableEntries = loadAllIndexable(); if (allIndexableEntries != null) { for (Object object : allIndexableEntries) { Document doc = createDocument(object); try { if (doc != null) { indexWriter.addDocument(doc); } } catch (IOException e) { LOGGER.error("Adding entry to the index failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } } LOGGER.debug( "Reindexing " + allIndexableEntries.size() + " " + getLuceneFieldName() + "s completed."); } } catch (Exception e) { LOGGER.error("Reindexing failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } finally { LuceneIndexer.initWriter(false, getIndexWriterID()); } }