Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:cn.jcenterhome.web.action.CpAction.java

private List<String> getKeyWord(String text) throws IOException {
    List<String> keywords = new ArrayList<String>();
    if (!Common.empty(text)) {
        Map<String, Integer> words = new HashMap<String, Integer>();
        Analyzer analyzer = new IKAnalyzer(true);
        StringReader reader = new StringReader(text);
        TokenStream tokenStream = analyzer.tokenStream("*", reader);
        TermAttribute termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
        while (tokenStream.incrementToken()) {
            String word = termAtt.term();
            if (word.length() > 1 && Common.strlen(word) > 2) {
                Integer count = words.get(word);
                if (count == null) {
                    count = 0;//from   w  w  w.  j  a  v a2  s  .c  o m
                }
                words.put(word, count + 1);
            }
        }
        if (words.size() > 0) {
            Directory dir = null;
            IndexSearcher searcher = null;
            try {
                String fieldName = "text";
                dir = new RAMDirectory();
                IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
                Document doc = new Document();
                doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
                writer.addDocument(doc);
                writer.close();
                searcher = new IndexSearcher(dir);
                searcher.setSimilarity(new IKSimilarity());
                Set<String> keys = words.keySet();
                Map<String, Float> temps = new HashMap<String, Float>();
                for (String key : keys) {
                    int count = words.get(key);
                    Query query = IKQueryParser.parse(fieldName, key);
                    TopDocs topDocs = searcher.search(query, 1);
                    if (topDocs.totalHits > 0) {
                        temps.put(key, topDocs.getMaxScore() * count);
                    }
                }
                Entry<String, Float>[] keywordEntry = getSortedHashtableByValue(temps);
                for (Entry<String, Float> entry : keywordEntry) {
                    if (keywords.size() < 5) {
                        keywords.add(entry.getKey());
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                try {
                    searcher.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                try {
                    dir.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
    return keywords;
}

From source file:cn.larry.search.book.index.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//  w  w w.  j av a2 s  .c  o  m

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 4 would mean
        // February 17, 1, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.adanac.module.blog.search.LuceneHelper.java

License:Apache License

private static void generateIndex(String path, String id, String title, String content,
        List<Map<String, String>> dataList) {
    try {/*from w  w  w . j  a va  2 s.c o m*/
        Directory dir = FSDirectory.open(Paths.get(INDEX_PATH + path));
        Analyzer analyzer = new SmartChineseAnalyzer();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        indexWriterConfig.setOpenMode(OpenMode.CREATE);
        IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
        for (Map<String, String> data : dataList) {
            Document document = new Document();
            Field idField = new IntField("id", Integer.valueOf(data.get(id)), Field.Store.YES);
            Field indexedContentField = new TextField("indexedContent",
                    data.get(title) + SEPARATOR + data.get(content), Field.Store.YES);
            document.add(idField);
            document.add(indexedContentField);
            writer.addDocument(document);
            if (logger.isInfoEnabled()) {
                logger.info("add index for : [" + data.get(title) + "]");
            }
        }
        writer.close();
    } catch (Exception e) {
        logger.error("add index failed ...", e);
    }
}

From source file:com.agiletec.plugins.jacms.aps.system.services.searchengine.IndexerDAO.java

License:Open Source License

/**
 * Aggiunge un documento nel db del motore di ricerca.
  * @param document Il documento da aggiungere.
 * @throws ApsSystemException In caso di errori in accesso al db.
 *///  w  w  w  .  ja v a2 s . c  o m
private synchronized void add(Document document) throws ApsSystemException {
    try {
        IndexWriter writer = new IndexWriter(_dir, this.getAnalyzer(), false,
                new MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));
        writer.addDocument(document);
        writer.optimize();
        writer.close();
    } catch (IOException e) {
        throw new ApsSystemException("Errore nell'aggiunta di un documento", e);
    }
}

From source file:com.aliasi.lingmed.medline.IndexMedline.java

License:Lingpipe license

private void recordFile(IndexWriter indexWriter, String fileName) throws IOException {
    //        if (mLogger.isDebugEnabled())
    //            mLogger.debug("record file: " + fileName);
    Document doc = new Document();
    Field tagField = new Field(Fields.MEDLINE_DIST_FIELD, Fields.MEDLINE_DIST_VALUE, Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS);
    doc.add(tagField);/* w w  w  .ja va 2 s  . com*/
    Field nameField = new Field(Fields.MEDLINE_FILE_FIELD, fileName, Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS);
    doc.add(nameField);
    indexWriter.addDocument(doc);
    //        if (mLogger.isDebugEnabled())
    //            mLogger.debug("added doc: " + doc.toString());

}

From source file:com.aliasi.lingmed.medline.SearchableMedlineCodec.java

License:Lingpipe license

public static void main(String[] args) throws Exception {
    org.apache.lucene.store.RAMDirectory directory = new org.apache.lucene.store.RAMDirectory();

    // org.apache.lucene.analysis.SimpleAnalyzer analyzer 
    // = new org.apache.lucene.analysis.SimpleAnalyzer();
    // org.apache.lucene.analysis.KeywordAnalyzer analyzer 
    // = new org.apache.lucene.analysis.KeywordAnalyzer();
    MedlineCodec codec = new MedlineCodec();
    Analyzer analyzer = codec.getAnalyzer();

    org.apache.lucene.index.IndexWriterConfig iwConf = new org.apache.lucene.index.IndexWriterConfig(
            org.apache.lucene.util.Version.LUCENE_36, analyzer);
    iwConf.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    org.apache.lucene.index.IndexWriter indexWriter = new org.apache.lucene.index.IndexWriter(directory,
            iwConf);//  w  ww .ja  v  a2  s  .c  o m

    Document doc = new Document();
    doc.add(new Field(Fields.MESH_MINOR_FIELD, "abc", Field.Store.NO, Field.Index.ANALYZED));
    doc.add(new Field(Fields.MESH_MINOR_FIELD, " xyz efg", Field.Store.NO, Field.Index.ANALYZED));
    indexWriter.addDocument(doc);
    indexWriter.close();

    org.apache.lucene.index.IndexReader reader = org.apache.lucene.index.IndexReader.open(directory);
    org.apache.lucene.search.IndexSearcher searcher = new org.apache.lucene.search.IndexSearcher(reader);

    org.apache.lucene.queryParser.QueryParser qp = new org.apache.lucene.queryParser.QueryParser(
            org.apache.lucene.util.Version.LUCENE_36, "foo", analyzer);
    org.apache.lucene.search.Query query = qp.parse(Fields.MESH_MINOR_FIELD + ":efg");

    org.apache.lucene.search.TopDocs hits = searcher.search(query, 1000);
    System.out.println("hits.length()=" + hits.scoreDocs.length);

    org.apache.lucene.analysis.TokenStream ts = analyzer.tokenStream(Fields.MESH_MINOR_FIELD,
            new java.io.StringReader("abc xyz efg"));
    org.apache.lucene.analysis.tokenattributes.CharTermAttribute terms = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
    org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsets = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
    org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute positions = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class);

    while (ts.incrementToken()) {
        int increment = positions.getPositionIncrement();
        int start = offsets.startOffset();
        int end = offsets.endOffset();
        String term = terms.toString();
        System.out.println("token=|" + term + "|" + " startOffset=" + start + " endOffset=" + end
                + " positionIncr=" + increment);
    }
}

From source file:com.aperigeek.dropvault.web.service.IndexService.java

License:Open Source License

public void index(String username, String password, String id, Map<String, String> metadata)
        throws IndexException {
    try {/*from   ww  w.j ava2 s .co m*/
        Document document = new Document();
        document.add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
        for (Map.Entry<String, String> e : metadata.entrySet()) {
            if (e.getValue() != null) {
                document.add(new Field(e.getKey(), e.getValue(), Field.Store.NO, Field.Index.ANALYZED));
            }
        }

        IndexWriter index = getIndexWriter(username, password);
        index.addDocument(document);
        index.close();
    } catch (IOException ex) {
        throw new IndexException(ex);
    }
}

From source file:com.appeligo.lucene.AddDocumentAction.java

License:Apache License

public void performAction(IndexWriter writer) throws IOException {
    writer.addDocument(doc);
}

From source file:com.appspot.socialinquirer.server.service.impl.AnalysisServiceImpl.java

License:Apache License

@Override
public List<Tag> getTermVector(String title, String text) {
    RAMDirectory directory = null;/*w  ww  .  j  a  v  a2s.c o m*/
    IndexReader reader = null;
    Map<String, Tag> tagsMap = new HashMap<String, Tag>();

    try {
        directory = new RAMDirectory();

        IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true,
                MaxFieldLength.UNLIMITED);
        Document doc = new Document();

        doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
        doc.add(new Field("body", stripHtmlTags(text, true), Field.Store.YES, Field.Index.ANALYZED,
                Field.TermVector.YES));
        writer.addDocument(doc);

        writer.close();
        reader = IndexReader.open(directory, true);
        int numDocs = reader.maxDoc();
        for (int i = 0; i < numDocs; i++) {
            TermFreqVector termFreqVector = reader.getTermFreqVector(i, "title");
            pullTags(termFreqVector, tagsMap);
            termFreqVector = reader.getTermFreqVector(i, "body");
            pullTags(termFreqVector, tagsMap);
        }

    } catch (Exception e) {
        logger.log(Level.SEVERE, "An error occured while pulling tags from text.", e);
    } finally {
        closeIndexReader(reader);
        closeRAMDirectory(directory);
    }
    ArrayList<Tag> tagsList = new ArrayList<Tag>(tagsMap.values());
    Collections.sort(tagsList, new Comparator<Tag>() {
        @Override
        public int compare(Tag o1, Tag o2) {
            return o2.getFreqency() - o1.getFreqency();
        }
    });

    return tagsList;
}

From source file:com.aurel.track.lucene.index.associatedFields.AbstractAssociatedFieldIndexer.java

License:Open Source License

/**
 * Reindexes all/*from   w w  w. j  a  v a2s  . co  m*/
 */
@Override
public synchronized void reIndexAll() {
    IndexWriter indexWriter = null;
    try {
        LOGGER.debug("Reindexing " + getLuceneFieldName() + "s started...");
        //initializes the IndexWriter for recreating the index (deletes the previous index)
        indexWriter = LuceneIndexer.initWriter(true, getIndexWriterID());
        if (indexWriter == null) {
            LOGGER.error("IndexWriter null by indexing");
            return;
        }
        List allIndexableEntries = loadAllIndexable();
        if (allIndexableEntries != null) {
            for (Object object : allIndexableEntries) {
                Document doc = createDocument(object);
                try {
                    if (doc != null) {
                        indexWriter.addDocument(doc);
                    }
                } catch (IOException e) {
                    LOGGER.error("Adding entry to the index failed with " + e.getMessage());
                    LOGGER.debug(ExceptionUtils.getStackTrace(e));
                }
            }
            LOGGER.debug(
                    "Reindexing " + allIndexableEntries.size() + " " + getLuceneFieldName() + "s completed.");
        }
    } catch (Exception e) {
        LOGGER.error("Reindexing failed with " + e.getMessage());
        LOGGER.debug(ExceptionUtils.getStackTrace(e));
    } finally {
        LuceneIndexer.initWriter(false, getIndexWriterID());
    }
}