Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.pjaol.search.test.UnitTests.TestCartesian.java

License:Apache License

private void addPoint(IndexWriter writer, String name, double lat, double lng) throws IOException {

    Document doc = new Document();

    doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));

    // convert the lat / long to lucene fields
    doc.add(new Field(latField, NumberUtils.double2sortableStr(lat), Field.Store.YES,
            Field.Index.UN_TOKENIZED));
    doc.add(new Field(lngField, NumberUtils.double2sortableStr(lng), Field.Store.YES,
            Field.Index.UN_TOKENIZED));

    // add a default meta field to make searching all documents easy 
    doc.add(new Field("metafile", "doc", Field.Store.YES, Field.Index.TOKENIZED));

    int ctpsize = ctps.size();
    for (int i = 0; i < ctpsize; i++) {
        CartesianTierPlotter ctp = ctps.get(i);
        doc.add(new Field(ctp.getTierFieldName(), NumberUtils.double2sortableStr(ctp.getTierBoxId(lat, lng)),
                Field.Store.YES, Field.Index.NO_NORMS));
    }/*from   w ww  .  j  a  v  a2  s .c  o  m*/
    writer.addDocument(doc);

}

From source file:com.pjaol.search.test.UnitTests.TestDistance.java

License:Apache License

private void addPoint(IndexWriter writer, String name, double lat, double lng) throws IOException {

    Document doc = new Document();

    doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));

    // convert the lat / long to lucene fields
    doc.add(new Field(latField, NumberUtils.double2sortableStr(lat), Field.Store.YES,
            Field.Index.UN_TOKENIZED));
    doc.add(new Field(lngField, NumberUtils.double2sortableStr(lng), Field.Store.YES,
            Field.Index.UN_TOKENIZED));

    // add a default meta field to make searching all documents easy 
    doc.add(new Field("metafile", "doc", Field.Store.YES, Field.Index.TOKENIZED));
    writer.addDocument(doc);

}

From source file:com.plug.Plug_8_5_2.java

License:Apache License

private void reindexTermbase(DbServer dbServer, HashMap<String, String> companys) throws Exception {
    log.info("Start upgrading Lucene index for termbase");

    TermbaseHandler h = new TermbaseHandler();
    List<Termbase> tbs = dbServer.getDbUtil().query(TermbaseHandler.SQL, h);
    m_analyzer = new NgramAnalyzer(3);

    for (Termbase tb : tbs) {
        if (tb.getCOMPANYID().equals(LuceneConstants.SUPER_COMPANY_ID)) {
            continue;
        }/*from w w w.  j  a v a2s .c  o m*/

        String cname = companys.get(tb.getCOMPANYID());
        File termDir = new File(fileStorageDir, cname + "/TB-" + tb.getTB_NAME());
        // check re-indexed
        if (isIndexedBefore(termDir, tb.getTB_NAME())) {
            logAlreadyIndex(tb.getTB_NAME());
            continue;
        }

        showMsg(cname, tb.getTB_NAME(), false);

        // 1 delete old term base indexes
        logDeleteFile(termDir.getAbsolutePath());
        deleteFile(termDir.getAbsolutePath());
        // 2 create new empty dir
        termDir.mkdirs();

        Definition dif = new Definition(tb.getTB_DEFINITION());
        List<Index> indexs = dif.getIndexes();

        for (Index index : indexs) {
            // 3 write index into ram
            RAMDirectory ramdir = new RAMDirectory();
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, m_analyzer);
            config.setOpenMode(OpenMode.CREATE_OR_APPEND);
            IndexWriter ramIndexWriter = new IndexWriter(ramdir, config);

            if (index != null && "fuzzy".equalsIgnoreCase(index.getType())) {
                String folder = index.getLanguageName() + "-" + index.getLocale() + "-TERM";
                File indexFolder = new File(termDir, folder);
                m_directory = indexFolder.getAbsolutePath();
                m_fsDir = new SimpleFSDirectory(indexFolder);

                String sql = TermHandler.generateSQL(tb.getTBID(), index.getLanguageName());
                TermHandler termH = new TermHandler();
                List<Document> docs = dbServer.getDbUtil().query(sql, termH);
                for (Document doc : docs) {
                    ramIndexWriter.addDocument(doc);
                    ramIndexWriter.commit();
                }

                // 4 write index from ram into disk
                IndexWriter diskwriter = getIndexWriter(true);
                diskwriter.commit();
                if (docs != null && docs.size() > 0) {
                    Directory[] ds = new Directory[] { ramdir };
                    diskwriter.addIndexes(ds);
                    diskwriter.commit();
                }

                // 5 close index writer
                IOUtils.closeWhileHandlingException(ramIndexWriter);
                IOUtils.closeWhileHandlingException(diskwriter);

                ramIndexWriter = null;
                ramdir = null;
            }

        }

        writeTagFile(termDir, tb.getTB_NAME());
    }

    log.info("End upgrading Lucene index for termbase");
}

From source file:com.plug.Plug_8_5_2.java

License:Apache License

private void queryAndIndexTmdata(DbServer dbServer, HashMap<String, GlobalSightLocale> locales, File tmDir,
        long tmId, String selectSQL, boolean isTM3, String tm3Id, String tuvTable, String fuzzyTable,
        int tm3Type) throws SQLException, IOException, Exception {
    log.info("SQL : " + selectSQL);
    Connection conn = dbServer.getDbUtil().getConnection();
    PreparedStatement preSta = conn.prepareStatement(selectSQL);
    ResultSet rs = preSta.executeQuery();
    IndexWriter diskwriter = null;
    m_analyzer = null;/* w ww  . ja  v  a  2s .  c om*/
    GlobalSightLocale lastLocale = null;
    try {
        while (rs.next()) {
            boolean createNew = false;

            long locale_id = rs.getLong("locale_id");
            GlobalSightLocale locale = locales.get(locale_id + "");

            if (m_analyzer == null) {
                createNew = true;
            } else if (!locale.equals(lastLocale)) {
                createNew = true;
            }

            if (createNew) {
                m_analyzer = new GsAnalyzer(locale);
                if (isTM3) {
                    m_analyzer = new GsPerFieldAnalyzer(locale);
                }

                lastLocale = locale;

                if (diskwriter != null) {
                    diskwriter.commit();
                    IOUtils.closeWhileHandlingException(m_fsDir);
                    IOUtils.closeWhileHandlingException(diskwriter);
                }

                File indexFolder = new File(tmDir, locale.toString());
                m_directory = indexFolder.getAbsolutePath();
                m_fsDir = new SimpleFSDirectory(indexFolder);
                diskwriter = getIndexWriter(true);

                log.info("Create new IndexWriter for dir: " + m_directory);
            }

            long tuvId = rs.getLong("tuv_id");
            String segment = rs.getString("segment_string");
            // ignore segment_clob ?
            if (segment == null || segment.length() == 0) {
                continue;
            }

            segment = LuceneUtil.normalizeTuvData(segment, locale);
            GlobalSightLocale srcLocale = locales.get(rs.getString("source_locale_id"));
            String type = rs.getString("type");
            String format = rs.getString("format");
            long tuId = rs.getLong("tu_id");
            boolean isSource = srcLocale.equals(locale);

            Set<String> targetLocales = null;
            if (isTM3) {
                targetLocales = new HashSet<String>();
                PreparedStatement psSelectLocales = null;
                ResultSet rsSelectLocales = null;
                try {
                    String sql = "select localeId from " + tuvTable + " where tmId=" + tm3Id + " and tuId="
                            + tuId + " and id<>" + tuvId;
                    psSelectLocales = conn.prepareStatement(sql);
                    rsSelectLocales = psSelectLocales.executeQuery();

                    while (rsSelectLocales.next()) {
                        long targetlocale_id = rsSelectLocales.getLong("localeId");
                        GlobalSightLocale targetlocale = locales.get(targetlocale_id + "");
                        targetLocales.add(targetlocale.toString());
                    }
                } finally {
                    dbServer.getDbUtil().closeStatement(psSelectLocales);
                    dbServer.getDbUtil().closeResultSet(rsSelectLocales);
                }
            }

            TuvDocument tuvdoc = new TuvDocument(segment, tuvId, tuId, tmId, isSource, targetLocales,
                    m_analyzer);
            Document doc = tuvdoc.getDocument();
            diskwriter.addDocument(doc);

            if (isTM3 && fuzzyTable != null) {
                List<String> tokens = LuceneUtil.createTm3Tokens(segment, locale);
                List<Long> fps = new ArrayList<Long>();
                for (String tok : tokens) {
                    fps.add(Fingerprint.fromString(tok));
                }

                List<Long> fingerprints = new ArrayList<Long>();
                fingerprints.add(BOUNDARY);
                for (Long tok : fps) {
                    fingerprints.add(tok);
                }
                fingerprints.add(BOUNDARY);
                List<Trigram> trigrams = new ArrayList<Trigram>();
                for (int i = 0; i + 2 < fingerprints.size(); i++) {
                    trigrams.add(
                            new Trigram(fingerprints.get(i), fingerprints.get(i + 1), fingerprints.get(i + 2)));
                }

                List<Long> tset = new ArrayList<Long>();
                for (Trigram t : trigrams) {
                    tset.add(t.getValue());
                }

                // add index into database
                PreparedStatement ps2 = null;
                try {
                    List<String> keys = new ArrayList<String>();
                    String sql = null;
                    StringBuilder sb = new StringBuilder("INSERT INTO ").append(fuzzyTable);

                    if (tm3Type == TM3TmType.MULTILINGUAL_SHARED.getId()) {
                        sb.append(" (fingerprint, tuvId, tuId, localeId, tuvCount, isSource) ")
                                .append("VALUES (?, ?, ?, ?, ?, ?)");
                        sql = sb.toString();

                        ps2 = conn.prepareStatement(sql, Statement.NO_GENERATED_KEYS);
                        int tuvCount = tset.size();
                        for (Long fp : tset) {
                            String key = fp + "-" + tuvCount + "-" + locale_id + "-" + (isSource ? "1" : "0")
                                    + "-" + tuvId;

                            if (keys.contains(key)) {
                                continue;
                            }

                            ps2.setObject(1, fp);
                            ps2.setObject(2, tuvId);
                            ps2.setObject(3, tuId);
                            ps2.setObject(4, locale_id);
                            ps2.setObject(5, tuvCount);
                            ps2.setObject(6, isSource);

                            ps2.addBatch();
                            keys.add(key);
                        }
                    } else if (tm3Type == TM3TmType.BILINGUAL.getId()) {
                        sb.append(" (fingerprint, tuvId, tuId, tuvCount, isSource) ")
                                .append("VALUES (?, ?, ?, ?, ?)");
                        sql = sb.toString();

                        ps2 = conn.prepareStatement(sql, Statement.NO_GENERATED_KEYS);
                        int tuvCount = tset.size();
                        for (Long fp : tset) {
                            String key = fp + "-" + tuvCount + "-" + (isSource ? "1" : "0") + "-" + tuvId;

                            if (keys.contains(key)) {
                                continue;
                            }

                            ps2.setObject(1, fp);
                            ps2.setObject(2, tuvId);
                            ps2.setObject(3, tuId);
                            ps2.setObject(4, tuvCount);
                            ps2.setObject(5, isSource);

                            ps2.addBatch();
                            keys.add(key);
                        }
                    }

                    if (ps2 != null) {
                        ps2.executeBatch();
                    }
                } finally {
                    dbServer.getDbUtil().closeStatement(ps2);
                }
            }
        }
    } finally {
        if (diskwriter != null) {
            diskwriter.commit();
        }
        IOUtils.closeWhileHandlingException(m_fsDir);
        IOUtils.closeWhileHandlingException(diskwriter);

        dbServer.getDbUtil().closeConn(conn);
        dbServer.getDbUtil().closeStatement(preSta);
        dbServer.getDbUtil().closeResultSet(rs);
    }
}

From source file:com.ponysdk.sample.client.page.addon.SelectizeAddon.java

License:Apache License

public SelectizeAddon() {
    super(Element.newInput());
    setTerminalHandler(this);

    ///*from   w ww .ja v a 2 s  . c  o m*/
    final Analyzer analyzer = new StandardAnalyzer();
    final Directory directory = new RAMDirectory();

    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer;
    try {
        writer = new IndexWriter(directory, config);
        final Document doc = new Document();
        final String text = "Test de ouf";

        final FieldType fieldType = new FieldType();
        fieldType.setIndexOptions(IndexOptions.NONE);
        fieldType.setStored(true);
        fieldType.setTokenized(false);
        doc.add(new Field("id", "12", fieldType));
        doc.add(new Field("fieldname", text, TextField.TYPE_STORED));

        writer.addDocument(doc);

        addAssetsType(writer);
        addTenor(writer);
        addClients(writer);
        addSide(writer);

        writer.close();
    } catch (final IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }

    try {
        // Now search the index:
        final DirectoryReader ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);
        // Parse a simple query that searches for "text":
        // final QueryParser parser = new QueryParser("fieldname",
        // analyzer);
        // parser.setFuzzyMinSim(2f);

        final Term term = new Term("fieldname", "indesfed");
        final Query query = new FuzzyQuery(term);
        // final TopDocs hits = isearcher.search(query, 1000).scoreDocs;

        // final Query query = parser.parse("indeed");
        final ScoreDoc[] hits = isearcher.search(query, 1000).scoreDocs;
        // Iterate through the results:
        for (final ScoreDoc hit : hits) {
            System.err.println("Score : " + hit.score);
            final Document hitDoc = isearcher.doc(hit.doc);
            System.err.println("Found document" + hitDoc.getField("fieldname").stringValue());
        }
        // ireader.close();
        // directory.close();
    } catch (final Exception exception) {
        exception.printStackTrace();
    }

    // <input type="text" id="input-tags3" class="demo-default"
    // value="science,biology,chemistry,physics">
}

From source file:com.ponysdk.sample.client.page.addon.SelectizeAddon.java

License:Apache License

private void addSide(final IndexWriter writer) throws IOException {
    final Document doc1 = new Document();
    final FieldType fieldType1 = new FieldType();
    fieldType1.setIndexOptions(IndexOptions.NONE);
    fieldType1.setStored(true);/*from www.j  a  va  2s . com*/
    fieldType1.setTokenized(false);
    doc1.add(new Field("id", "sell", fieldType1));
    doc1.add(new Field("fieldname", "Sell", TextField.TYPE_STORED));
    doc1.add(new Field("fieldname", "S", TextField.TYPE_STORED));
    doc1.add(new Field("type", Type.SIDE.name(), TextField.TYPE_STORED));
    doc1.add(new Field("desc", "side", TextField.TYPE_STORED));
    writer.addDocument(doc1);

    final Document doc2 = new Document();
    final FieldType fieldType2 = new FieldType();
    fieldType2.setIndexOptions(IndexOptions.NONE);
    fieldType2.setStored(true);
    fieldType2.setTokenized(false);
    doc2.add(new Field("id", "buy", fieldType2));
    doc2.add(new Field("fieldname", "Buy", TextField.TYPE_STORED));
    doc2.add(new Field("fieldname", "B", TextField.TYPE_STORED));
    doc2.add(new Field("type", Type.SIDE.name(), TextField.TYPE_STORED));
    doc2.add(new Field("desc", "side", TextField.TYPE_STORED));
    writer.addDocument(doc2);

}

From source file:com.ponysdk.sample.client.page.addon.SelectizeAddon.java

License:Apache License

private void addClients(final IndexWriter writer) throws IOException {
    final Document doc = new Document();
    final FieldType fieldType1 = new FieldType();
    fieldType1.setIndexOptions(IndexOptions.NONE);
    fieldType1.setStored(true);//from   w w  w.j a v a  2s  .c o  m
    fieldType1.setTokenized(false);
    doc.add(new Field("id", "pt", fieldType1));
    doc.add(new Field("login", "p.task", TextField.TYPE_STORED));
    doc.add(new Field("fieldname", "Peter Task", TextField.TYPE_STORED));
    doc.add(new Field("desc", "client", TextField.TYPE_STORED));
    doc.add(new Field("type", Type.CLIENT.name(), TextField.TYPE_STORED));
    writer.addDocument(doc);
}

From source file:com.ponysdk.sample.client.page.addon.SelectizeAddon.java

License:Apache License

private void addTenor(final IndexWriter writer) throws IOException {
    final String[] tenors = new String[] { "Y1", "Y2", "Y3", "Y4", "Y5", "Y6", "Y7", "Y8" };

    for (final String tenor : tenors) {
        final Document doc = new Document();
        final FieldType fieldType1 = new FieldType();
        fieldType1.setIndexOptions(IndexOptions.NONE);
        fieldType1.setStored(true);/*  w  w  w .j  a va 2 s .c  om*/
        fieldType1.setTokenized(false);
        doc.add(new Field("id", tenor, fieldType1));

        final FieldType fieldType2 = new FieldType();
        fieldType2.setIndexOptions(IndexOptions.DOCS);
        fieldType2.setStored(true);
        fieldType2.setTokenized(false);
        doc.add(new Field("fieldname", tenor, fieldType2));
        doc.add(new Field("desc", "tenor", TextField.TYPE_STORED));
        doc.add(new Field("type", Type.TENOR.name(), TextField.TYPE_STORED));

        writer.addDocument(doc);
    }

}

From source file:com.ponysdk.sample.client.page.addon.SelectizeAddon.java

License:Apache License

private void addAssetsType(final IndexWriter iwriter) throws IOException {
    final Document doc1 = new Document();
    final FieldType fieldType1 = new FieldType();
    fieldType1.setIndexOptions(IndexOptions.NONE);
    fieldType1.setStored(true);// w ww .ja va 2 s .c  o m
    fieldType1.setTokenized(false);
    doc1.add(new Field("id", "-1", fieldType1));
    doc1.add(new Field("fieldname", "Asset SWAP", TextField.TYPE_STORED));
    doc1.add(new Field("fieldname", "SWAP", TextField.TYPE_STORED));
    doc1.add(new Field("desc", "asset class", TextField.TYPE_STORED));
    doc1.add(new Field("type", Type.CLASS.name(), TextField.TYPE_STORED));
    iwriter.addDocument(doc1);

    final Document doc2 = new Document();
    final FieldType fieldType2 = new FieldType();
    fieldType2.setIndexOptions(IndexOptions.NONE);
    fieldType2.setStored(true);
    fieldType2.setTokenized(false);
    doc2.add(new Field("id", "-2", fieldType2));
    doc2.add(new Field("fieldname", "Single IRS", TextField.TYPE_STORED));
    doc2.add(new Field("fieldname", "IRS", TextField.TYPE_STORED));
    doc2.add(new Field("desc", "asset class", TextField.TYPE_STORED));
    doc2.add(new Field("type", Type.CLASS.name(), TextField.TYPE_STORED));
    iwriter.addDocument(doc2);
}

From source file:com.redhat.satellite.search.index.IndexManager.java

License:Open Source License

/**
 * Adds a document to an index/*from w  w  w.j  ava 2  s.co  m*/
 *
 * @param indexName index to use
 * @param doc Document to be indexed.
 * @param lang language.
 * @throws IndexingException something went wrong adding the document
 */
public void addToIndex(String indexName, Document doc, String lang) throws IndexingException {

    try {
        IndexWriter writer = getIndexWriter(indexName, lang);
        try {
            writer.addDocument(doc);
            writer.flush();
        } finally {
            try {
                writer.close();
            } finally {
                // unlock it if it is locked.
                unlockIndex(indexName);
            }
        }
    } catch (CorruptIndexException e) {
        throw new IndexingException(e);
    } catch (LockObtainFailedException e) {
        throw new IndexingException(e);
    } catch (IOException e) {
        throw new IndexingException(e);
    }
}