Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.khepry.frackhem.entities.Toxicities.java

License:Apache License

public void indexViaLucene(String textFilePath, String textColSeparator) throws IOException {

    String message;//  w  w  w  .j  a v a  2 s . c o m

    message = "Start Indexing Toxicities via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textFilePath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);
        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        } else {
            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        } else {
            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            List<String> colHeaders = new ArrayList<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, indexField);
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);

            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;
            Integer rcdCount = 0;
            StringBuilder sb = new StringBuilder();
            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    for (String colHeader : pieces) {
                        colHeaders.add(colHeader.trim());
                    }
                } else {
                    if (pieces.length == colHeaders.size()) {
                        sb.setLength(0);
                        Document document = new Document();
                        for (int i = 0; i < pieces.length; i++) {
                            Field field = new TextField(colHeaders.get(i), pieces[i].trim(), Store.YES);
                            document.add(field);
                            if (mapIndexFields.containsKey(colHeaders.get(i))) {
                                if (!pieces[i].trim().equals("")) {
                                    sb.append(pieces[i].trim());
                                    sb.append(" ");
                                }
                            }
                        }
                        Field field = new TextField("text", sb.toString().trim(), Store.NO);
                        document.add(field);

                        String toxCasEdfId = pieces[0].trim();
                        String toxChemical = pieces[1].trim();

                        // categorize recognized toxicities
                        String toxRecognized = pieces[2].trim();
                        if (!toxRecognized.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxRecognized.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("toxRecognized", "Toxicity", value));
                                }
                            }
                        }

                        // categorize suspected toxicities
                        String toxSuspected = pieces[3].trim();
                        if (!toxSuspected.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxSuspected.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("toxSuspected", "Toxicity", value));
                                }
                            }
                        }

                        // build up "stats" taxonomy categories
                        for (String statsKey : mapStatsFields.keySet()) {
                            if (mapIndexFields.containsKey(statsKey)) {
                                String fieldValue = mapIndexFields.get(statsKey);
                                if (!statsKey.trim().equals("") && !fieldValue.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("Toxicities", statsKey, fieldValue));
                                }
                            }
                        }

                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(document, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }

                        indexWriter.addDocument(document);
                        if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                            message = "Records indexed: " + rcdCount;
                            if (outputToSystemOut) {
                                System.out.println(message);
                            }
                            if (outputToMsgQueue) {
                                progressMessageQueue.send(new MessageInput(message));
                            }
                        }

                        taxonomyCategories.clear();
                    }
                }
            }
            br.close();
            message = "Records indexed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sb.setLength(0);
            sb.trimToSize();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();

            analyzer.close();

            indexDirectory.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
        }
        message = "Ended Indexing Toxicities via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.knowgate.lucene.BugIndexer.java

License:Open Source License

/**
 * Add bug to index/*from ww w . j av a2  s .co  m*/
 * @param oIWrt IndexWriter
 * @param sGuid String Bug GUID
 * @param iNumber int Bug Number
 * @param sWorkArea String GUID of WorkArea to which bug belongs
 * @param sProject String GUID of project to which bug belongs
 * @param sTitle String Title
 * @param sReportedBy String Author
 * @param dtCreated Date Created
 * @param sComments String Comments
 * @param sText String Bug Description
 * @throws ClassNotFoundException
 * @throws IOException
 * @throws IllegalArgumentException
 * @throws NoSuchFieldException
 * @throws IllegalAccessException
 * @throws InstantiationException
 * @throws NullPointerException
 */
public static void addBug(IndexWriter oIWrt, String sGuid, int iNumber, String sWorkArea, String sProject,
        String sTitle, String sWriter, String sReportedBy, Date dtCreated, String sType, Short oPriority,
        Short oSeverity, String sStatus, String sComments, String sText)
        throws ClassNotFoundException, IOException, IllegalArgumentException, NoSuchFieldException,
        IllegalAccessException, InstantiationException, NullPointerException {

    Document oDoc = new Document();
    oDoc.add(new Field("workarea", sWorkArea, Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("container", sProject, Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("guid", sGuid, Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("number", String.valueOf(iNumber), Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("title", sTitle, Field.Store.YES, Field.Index.ANALYZED));
    oDoc.add(new Field("created", DateTools.dateToString(dtCreated, DateTools.Resolution.SECOND),
            Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("writer", sWriter, Field.Store.YES, Field.Index.NOT_ANALYZED));
    if (null != sStatus)
        oDoc.add(new Field("status", sStatus, Field.Store.YES, Field.Index.NOT_ANALYZED));
    if (null != sType)
        oDoc.add(new Field("type", sType, Field.Store.YES, Field.Index.NOT_ANALYZED));
    if (null != oPriority)
        oDoc.add(new Field("priority", oPriority.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
    if (null != oSeverity)
        oDoc.add(new Field("severity", oSeverity.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
    if (null != sReportedBy)
        oDoc.add(new Field("author", sReportedBy, Field.Store.YES, Field.Index.ANALYZED));
    if (null == sComments)
        oDoc.add(new Field("comments", "", Field.Store.NO, Field.Index.ANALYZED));
    else
        oDoc.add(new Field("comments", sComments, Field.Store.NO, Field.Index.ANALYZED));
    if (null == sText) {
        oDoc.add(new Field("text", "", Field.Store.NO, Field.Index.ANALYZED));
        oDoc.add(new Field("abstract", "", Field.Store.YES, Field.Index.ANALYZED));
    } else {
        oDoc.add(new Field("text", sText, Field.Store.NO, Field.Index.ANALYZED));
        if (sText.length() > 80)
            oDoc.add(new Field("abstract", sText.substring(0, 80).replace('\n', ' ').replace('\r', ' '),
                    Field.Store.YES, Field.Index.ANALYZED));
        else
            oDoc.add(new Field("abstract", sText.replace('\n', ' ').replace('\r', ' '), Field.Store.YES,
                    Field.Index.ANALYZED));
    }
    oIWrt.addDocument(oDoc);
}

From source file:com.knowgate.lucene.ContactIndexer.java

License:Open Source License

/**
 * Add contact to index//from w  w  w  .  ja  v  a2s.  c  om
 * @param oIWrt
 * @param sGuid
 * @param sWorkArea
 * @param sName
 * @param sSurname
 * @param sKey
 * @param sValue
 * @param sLevel
 * @param sLanguage
 * @throws ClassNotFoundException
 * @throws IOException
 * @throws IllegalArgumentException
 * @throws NoSuchFieldException
 * @throws IllegalAccessException
 * @throws InstantiationException
 * @throws NullPointerException
 */
public static void addDocument(IndexWriter oIWrt, ContactRecord contact)
        throws ClassNotFoundException, IOException, IllegalArgumentException, NoSuchFieldException,
        IllegalAccessException, InstantiationException, NullPointerException {

    Document oDoc = new Document();
    oDoc.add(new Field("workarea", contact.getWorkarea(), Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("guid", contact.getGui(), Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("author", contact.getAuthor(), Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("value", contact.getValue(), Field.Store.YES, Field.Index.ANALYZED));
    oIWrt.addDocument(oDoc);
}

From source file:com.knowgate.lucene.Crawler.java

License:Open Source License

private void crawlDir(IndexWriter oIWrt, String sBasePath, int iBasePathlen, RegExpFilter oFileFilter)
        throws IOException, FileNotFoundException {

    if (DebugFile.trace) {
        DebugFile.writeln("Begin Crawler.crawlDir(" + sBasePath + ")");
        DebugFile.incIdent();//from  w  ww. java  2 s .c o m
    }

    File oBaseDir = new File(sBasePath);
    String sName;

    if (!oBaseDir.exists())
        throw new FileNotFoundException(sBasePath + " directory does not exist");

    if (!oBaseDir.isDirectory())
        throw new IOException(sBasePath + " is not a directory");

    File[] aFiles = oBaseDir.listFiles();
    int iFiles = aFiles.length;

    int iBuffer;
    char[] aBuffer;
    String sBuffer;

    sBasePath += sSeparator;

    for (int f = 0; f < iFiles; f++) {

        if (aFiles[f].isDirectory()) {

            crawlDir(oIWrt, sBasePath + aFiles[f].getName(), iBasePathlen, oFileFilter);
        }

        else {

            sName = aFiles[f].getName().toLowerCase();

            if (sName.endsWith(".htm") || sName.endsWith(".html") || sName.endsWith(".shtml")
                    || sName.endsWith(".shtm")) {
                iBuffer = new Long(aFiles[f].length()).intValue();

                if (iBuffer > 0) {
                    FileReader oReader = new FileReader(aFiles[f]);
                    aBuffer = new char[iBuffer];
                    oReader.read(aBuffer);
                    sBuffer = new String(aBuffer);

                    oIWrt.addDocument(
                            makeHTMLDocument(sBasePath.substring(iBasePathlen), aFiles[f].getName(), sBuffer));
                } // fi (iBuffer>0)
            } // fi (sName.endsWith(".htm") || sName.endsWith(".html"))
        }
    } // next

    if (DebugFile.trace) {
        DebugFile.decIdent();
        DebugFile.writeln("End Crawler.crawlDir()");
    }
}

From source file:com.knowgate.lucene.Indexer.java

License:Open Source License

public static void add(IndexWriter oIWrt, Map oKeywords, Map oTexts, Map oUnStored)
        throws ClassNotFoundException, IOException, IllegalArgumentException, NoSuchFieldException,
        IllegalAccessException, InstantiationException, NullPointerException {

    String sFieldName;//w  w w  .j  a va 2  s.  c  o m
    Object oFieldValue;
    Document oDoc = new Document();

    // *******************************************
    // Index keywords as stored untokenized fields

    Iterator oKeys = oKeywords.keySet().iterator();
    while (oKeys.hasNext()) {
        sFieldName = (String) oKeys.next();
        oFieldValue = oKeywords.get(sFieldName);
        if (null == oFieldValue)
            oFieldValue = "";

        if (oFieldValue.getClass().getName().equals("java.util.Date"))
            oDoc.add(new Field(sFieldName,
                    DateTools.dateToString((Date) oFieldValue, DateTools.Resolution.SECOND), Field.Store.YES,
                    Field.Index.NOT_ANALYZED));
        else
            oDoc.add(new Field(sFieldName, (String) oFieldValue, Field.Store.YES, Field.Index.NOT_ANALYZED));
    } // wend

    // ******************************************************
    // Index titles, authors, etc. as stored tokenized fields

    Iterator oTxts = oTexts.keySet().iterator();
    while (oTxts.hasNext()) {
        sFieldName = (String) oTxts.next();
        oFieldValue = oTexts.get(sFieldName);
        if (null == oFieldValue)
            oFieldValue = "";
        oDoc.add(new Field(sFieldName, (String) oFieldValue, Field.Store.YES, Field.Index.ANALYZED));
    } // wend

    // *********************************************
    // Index full texts as unstored tokenized fields

    Iterator oUnStor = oUnStored.keySet().iterator();
    while (oUnStor.hasNext()) {
        sFieldName = (String) oUnStor.next();
        oFieldValue = oUnStored.get(sFieldName);
        if (null == oFieldValue)
            oFieldValue = "";
        oDoc.add(new Field(sFieldName, (String) oFieldValue, Field.Store.NO, Field.Index.ANALYZED));
    } // wend
    oIWrt.addDocument(oDoc);
}

From source file:com.knowgate.lucene.MailIndexer.java

License:Open Source License

/**
 * Add a single mail message to the index
 * @param oIWrt IndexWriter/*w  w w.jav  a2s.  c  om*/
 * @param sGuid String GUID of mime message to be indexed (from gu_mimemsg field of table k_mime_msgs)
 * @param dNumber BigDecimal mime message number (from pg_message field of table k_mime_msgs)
 * @param sWorkArea String GUID of WorkArea (from gu_workarea field of table k_mime_msgs)
 * @param sContainer String Name of Category (Folder) where message is stored.
 * This is nm_category field at k_categories table record corresponding to gu_category from k_mime_msgs
 * @param sSubject String Subject
 * @param sAuthor String Display name of message sender
 * @param sRecipients String Recipients list (both display name and e-mails)
 * @param dtSent Date
 * @param sComments String
 * @param oStrm InputStream Full mime message body as an InputStream (from by_content field of table k_mime_msgs)
 * @throws ClassNotFoundException
 * @throws IOException
 * @throws IllegalArgumentException
 * @throws NoSuchFieldException
 * @throws IllegalAccessException
 * @throws InstantiationException
 * @throws NullPointerException
 */
public static void addMail(IndexWriter oIWrt, String sGuid, BigDecimal dNumber, String sWorkArea,
        String sContainer, String sSubject, String sAuthor, String sRecipients, Date dtSent, String sComments,
        InputStream oStrm, int iSize) throws ClassNotFoundException, IOException, IllegalArgumentException,
        NoSuchFieldException, IllegalAccessException, InstantiationException, NullPointerException {

    if (DebugFile.trace) {
        DebugFile.writeln("Begin MailIndexer.addMail([IndexWriter], " + sGuid + ", " + dNumber + ", "
                + sWorkArea + ", " + sContainer + ", " + sSubject + ", " + sAuthor + ", " + sRecipients + ", "
                + dtSent + ", " + sComments + ", [InputStream], " + String.valueOf(iSize) + ")");
        DebugFile.incIdent();
    }

    String sText;
    String sAbstract = sGuid + "" + sSubject + "" + sAuthor + "" + oFmt.format(dtSent) + ""
            + String.valueOf(iSize) + "" + dNumber.toString();
    sSubject = Gadgets.ASCIIEncode(sSubject);
    sAuthor = Gadgets.ASCIIEncode(sAuthor);

    if (null != oStrm) {
        StringBuffer oStrBuff = new StringBuffer();
        try {
            MimeBodyPart oMsgText = new MimeBodyPart(oStrm);
            DBMimePart.parseMimePart(oStrBuff, null, sContainer, "", oMsgText, 0);
        } catch (MessagingException xcpt) {
            if (DebugFile.trace)
                DebugFile.writeln(xcpt.getClass().getName() + " " + xcpt.getMessage() + " indexing message "
                        + sGuid + " - " + sSubject);
        }
        if (oStrBuff.length() > 0) {
            if (Gadgets.indexOfIgnoreCase(oStrBuff.toString(), "<html>") >= 0) {
                Parser oPrsr = Parser.createParser(oStrBuff.toString(), null);
                StringBean oStrs = new StringBean();
                try {
                    oPrsr.visitAllNodesWith(oStrs);
                } catch (ParserException pe) {
                    if (DebugFile.trace)
                        DebugFile.decIdent();
                    throw new IOException(pe.getMessage());
                }

                if (DebugFile.trace)
                    DebugFile.writeln("Gadgets.ASCIIEncode(StringBean.getStrings())");
                sText = Gadgets.ASCIIEncode(oStrs.getStrings());
                if (DebugFile.trace)
                    DebugFile.writeln("StringBean.getStrings() done");
            } // fi (oStrBuff contains <html>)
            else {
                if (DebugFile.trace)
                    DebugFile.writeln("Gadgets.ASCIIEncode(StringBuffer.toString())");
                sText = Gadgets.ASCIIEncode(oStrBuff.toString());
                if (null == sText)
                    sText = "";
                if (DebugFile.trace)
                    DebugFile.writeln("StringBuffer.toString() done");
            }
        } else {
            sText = "";
        }
    } // fi (oStrm)
    else {
        sText = "";
    }

    Document oDoc = new Document();
    oDoc.add(new Field("workarea", sWorkArea, Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("container", sContainer, Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("guid", sGuid, Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("number", dNumber.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("created", DateTools.dateToString(dtSent, DateTools.Resolution.SECOND), Field.Store.YES,
            Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("size", Gadgets.leftPad(String.valueOf(iSize), '0', 10), Field.Store.YES,
            Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("title", Gadgets.ASCIIEncode(sSubject), Field.Store.YES, Field.Index.ANALYZED));
    oDoc.add(new Field("author", Gadgets.ASCIIEncode(sAuthor), Field.Store.YES, Field.Index.ANALYZED));
    oDoc.add(new Field("abstract", sAbstract, Field.Store.YES, Field.Index.ANALYZED));
    oDoc.add(new Field("recipients", sRecipients, Field.Store.YES, Field.Index.ANALYZED));
    oDoc.add(new Field("comments", sComments, Field.Store.NO, Field.Index.ANALYZED));
    oDoc.add(new Field("text", sText, Field.Store.NO, Field.Index.ANALYZED));

    if (DebugFile.trace)
        DebugFile.writeln("IndexWriter.addDocument([Document])");

    oIWrt.addDocument(oDoc);

    if (DebugFile.trace) {
        DebugFile.writeln("End MailIndexer.addMail()");
        DebugFile.decIdent();
    }
}

From source file:com.knowgate.lucene.NewsMessageIndexer.java

License:Open Source License

public static void addNewsMessage(IndexWriter oIWrt, String sGuid, String sThread, String sWorkArea,
        String sContainer, String sTitle, String sAuthor, Date dtCreated, String sText)
        throws ClassNotFoundException, IOException, IllegalArgumentException, NoSuchFieldException,
        IllegalAccessException, InstantiationException, NullPointerException {

    if (null == sGuid)
        throw new NullPointerException("NewsMessageIndexer.addNewsMessage() Message GUID may not be null");
    if (null == sWorkArea)
        throw new NullPointerException("NewsMessageIndexer.addNewsMessage() Message WorkArea may not be null");
    if (null == sContainer)
        throw new NullPointerException("NewsMessageIndexer.addNewsMessage() Message Container may not be null");
    if (null == dtCreated)
        throw new NullPointerException(
                "NewsMessageIndexer.addNewsMessage() Message Creation Date may not be null");

    Document oDoc = new Document();
    oDoc.add(new Field("workarea", sWorkArea, Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("container", sContainer, Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("guid", sGuid, Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("thread", sThread, Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("created", DateTools.dateToString(dtCreated, DateTools.Resolution.SECOND),
            Field.Store.YES, Field.Index.NOT_ANALYZED));
    oDoc.add(new Field("title", null == sTitle ? "" : sTitle, Field.Store.YES, Field.Index.ANALYZED));
    oDoc.add(new Field("author", null == sAuthor ? "" : sAuthor, Field.Store.YES, Field.Index.ANALYZED));
    if (null == sText) {
        oDoc.add(new Field("text", "", Field.Store.NO, Field.Index.ANALYZED));
        oDoc.add(new Field("abstract", "", Field.Store.YES, Field.Index.ANALYZED));
    } else {//www  .j  a va  2s  . c  o m
        oDoc.add(new Field("text", Gadgets.ASCIIEncode(sText).toLowerCase(), Field.Store.NO,
                Field.Index.ANALYZED));
        if (sText.length() > 80)
            oDoc.add(new Field("abstract", sText.substring(0, 80), Field.Store.YES, Field.Index.ANALYZED));
        else
            oDoc.add(new Field("abstract", sText, Field.Store.YES, Field.Index.ANALYZED));
    }
    oIWrt.addDocument(oDoc);
}

From source file:com.knowledgetree.indexer.IndexerManager.java

/**
 * This adds a lucene document//from w  w w .j  a v a 2s .  c o  m
 * 
 * @param documentId
 * @param content
 * @param discussion
 * @param title
 * @param version
 * @throws Exception 
 */
private void addLuceneDocument(int documentId, String content, String discussion, String title, String version)
        throws Exception {
    // create the lucene document

    Document document = new Document();
    document.add(new Field("DocumentID", IndexerManager.longToString(documentId), Field.Store.YES,
            Field.Index.TOKENIZED));
    document.add(new Field("Content", content, Field.Store.YES, Field.Index.TOKENIZED));
    document.add(new Field("Discussion", discussion, Field.Store.YES, Field.Index.TOKENIZED));
    document.add(new Field("Title", title, Field.Store.YES, Field.Index.TOKENIZED));
    document.add(new Field("Version", version, Field.Store.YES, Field.Index.UN_TOKENIZED));

    // add the document to lucene index
    try {
        this.logger.debug("Opening index writer: documentid=" + documentId);
        this.logger.debug("DocumentID: " + IndexerManager.longToString(documentId));
        this.logger.debug("Content: " + content);
        this.logger.debug("Discussion: " + discussion);
        IndexWriter writer = new IndexWriter(this.indexDirectory, this.analyzer, false);
        writer.addDocument(document);
        writer.close();
        this.logger.debug("Closing index writer: documentid=" + documentId);
    } catch (IOException ex) {
        logger.error(
                "Problem indexing document: documentid=" + documentId + " with exception: " + ex.getMessage());
    }

    this.reopenIndex();
}

From source file:com.krawler.esp.indexer.KrawlerIndexCreator.java

License:Open Source License

public int CreateIndex(ArrayList<DocumentFields> DocFields) {
    Document doc = new Document();
    Iterator<DocumentFields> itr = DocFields.iterator();

    while (itr.hasNext()) {
        DocumentFields tempfield = itr.next();
        Field docfield = new Field(tempfield.GetFieldName(), tempfield.GetFieldValue(), Field.Store.YES,
                Field.Index.TOKENIZED);
        doc.add(docfield);/* w ww.j  av a  2  s.  c om*/
    }

    try {
        boolean CreateIndex = true;
        File f = new File(this.indexPath + "/segments");
        if (f.exists()) {
            CreateIndex = false;
        }

        IndexWriter indWriter = new IndexWriter(this.indexPath, this.KWLAnalyzer, CreateIndex);
        indWriter.addDocument(doc);
        indWriter.close();
    } catch (Exception ex) {
        return 0;
    }

    return 1;
}

From source file:com.krawler.luceneSearchService.LuceneSearchImpl.java

License:Open Source License

@Override
public int writeIndex(List<Document> lucendDocs, String indexPath) {
    try {/*from   ww  w. jav a 2s. co m*/
        boolean CreateIndex = true;
        File f = new File(indexPath);
        if (f.exists()) {
            CreateIndex = false;
        }
        IndexWriter indWriter = new IndexWriter(indexPath, this.KWLAnalyzer, CreateIndex);

        for (Document luceneDoc : lucendDocs) {
            indWriter.addDocument(luceneDoc);
        }

        indWriter.optimize();
        indWriter.close();
    } catch (Exception ex) {
        Logger.getLogger(LuceneSearchImpl.class.getName()).log(Level.SEVERE, null, ex);
        return 0;
    }
    return 1;
}