Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.bah.lucene.BaseDirectoryTestSuite.java

License:Apache License

private void addDocuments(IndexWriter writer, int numDocs) throws IOException {
    for (int i = 0; i < numDocs; i++) {
        writer.addDocument(getDoc(i));
    }//w w w.  j  a v  a  2s.  co  m
}

From source file:com.bah.lucene.blockcache_v2.CacheDirectoryTest.java

License:Apache License

@Test
public void test3() throws IOException, InterruptedException {
    // Thread.sleep(30000);
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
    IndexWriter writer = new IndexWriter(_cacheDirectory, conf);
    int docs = 100000;
    for (int i = 0; i < docs; i++) {
        if (i % 500 == 0) {
            System.out.println(i);
        }//ww  w.  ja  v a  2  s .  c om
        writer.addDocument(newDoc());
        // Thread.sleep(1);
    }
    writer.close();
    System.out.println("done writing");

    DirectoryReader reader = DirectoryReader.open(_cacheDirectory);
    System.out.println("done opening");
    assertEquals(docs, reader.numDocs());

    Document document = reader.document(0);
    System.out.println("done fetching");
    System.out.println(document);

    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("test", "test")), 10);
    System.out.println("done searching");
    assertEquals(docs, topDocs.totalHits);

    reader.close();
}

From source file:com.baidu.rigel.biplatform.tesseract.isservice.netty.service.IndexServerHandler.java

License:Open Source License

public void messageReceived_00(ChannelHandlerContext ctx, Object msg) throws Exception {
    logger.info(String.format(LogInfoConstants.INFO_PATTERN_MESSAGE_RECEIVED_BEGIN, "IndexServerHandler"));
    IndexMessage indexMsg = (IndexMessage) msg;
    // ??/*from  ww  w  .  ja v  a2  s.com*/
    File idxFile = new File(indexMsg.getIdxPath());
    File idxServiceFile = new File(indexMsg.getIdxServicePath());

    if (indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_UPDATE)
            || indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_INITINDEX)) {
        // ??
        // ?
        FileUtils.deleteFile(idxFile);
        if (indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_UPDATE)
                && idxServiceFile.exists()) {
            // ?
            FileUtils.copyFolder(indexMsg.getIdxServicePath(), indexMsg.getIdxPath());
        }
    }

    IndexWriter idxWriter = IndexWriterFactory.getIndexWriterWithSingleSlot(indexMsg.getIdxPath());

    TesseractResultSet data = indexMsg.getDataBody();
    long currDiskSize = FileUtils.getDiskSize(indexMsg.getIdxPath());
    BigDecimal currMaxId = null;
    // ??
    if (currDiskSize < indexMsg.getBlockSize()) {
        while (data.next() && currDiskSize < indexMsg.getBlockSize()) {
            Document doc = new Document();
            String[] fieldNameArr = data.getFieldNameArray();
            for (String select : fieldNameArr) {
                if (select.equals(indexMsg.getIdName())) {
                    currMaxId = data.getBigDecimal(select);
                }

                doc.add(new StringField(select, data.getString(select), Field.Store.NO));
            }

            idxWriter.addDocument(doc);
        }
        idxWriter.commit();
        idxWriter.close();

    }

    String feedBackIndexServicePath = null;
    String feedBackIndexFilePath = null;

    // ? or ???indexWriter\?
    long totalDiskSize = FileUtils.getDiskSize(indexMsg.getIdxPath());
    if (totalDiskSize > indexMsg.getBlockSize() || indexMsg.isLastPiece()) {
        IndexWriterFactory.destoryWriters(indexMsg.getIdxPath());
        feedBackIndexServicePath = indexMsg.getIdxPath();
        feedBackIndexFilePath = indexMsg.getIdxServicePath();
    } else {
        feedBackIndexServicePath = indexMsg.getIdxServicePath();
        feedBackIndexFilePath = indexMsg.getIdxPath();
    }

    MessageHeader messageHeader = new MessageHeader(NettyAction.NETTY_ACTION_INDEX_FEEDBACK);

    IndexMessage indexFeedbackMsg = new IndexMessage(messageHeader, indexMsg.getDataBody());
    indexFeedbackMsg.setBlockSize(indexMsg.getBlockSize());
    indexFeedbackMsg.setDiskSize(totalDiskSize);
    indexFeedbackMsg.setIdxServicePath(feedBackIndexServicePath);
    indexFeedbackMsg.setIdxPath(feedBackIndexFilePath);
    indexFeedbackMsg.setIdName(indexMsg.getIdName());
    indexFeedbackMsg.setMaxId(currMaxId);
    ctx.writeAndFlush(indexFeedbackMsg);
    ctx.channel().close();
    logger.info(String.format(LogInfoConstants.INFO_PATTERN_MESSAGE_RECEIVED_END, "IndexServerHandler"));
}

From source file:com.baidu.rigel.biplatform.tesseract.isservice.netty.service.IndexServerHandler.java

License:Open Source License

@Override
public void messageReceived(ChannelHandlerContext ctx, Object msg) throws Exception {
    logger.info(String.format(LogInfoConstants.INFO_PATTERN_MESSAGE_RECEIVED_BEGIN, "IndexServerHandler"));
    IndexMessage indexMsg = (IndexMessage) msg;
    // ??/*from ww w .j a  va 2 s  .c  o m*/
    File idxFile = new File(indexMsg.getIdxPath());
    File idxServiceFile = new File(indexMsg.getIdxServicePath());

    if (indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_UPDATE)
            || indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_INITINDEX)) {
        // ??
        // ?
        FileUtils.deleteFile(idxFile);
        if (indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_UPDATE)
                && idxServiceFile.exists()) {
            // ?
            FileUtils.copyFolder(indexMsg.getIdxServicePath(), indexMsg.getIdxPath());
        }
    }

    IndexWriter idxWriter = IndexWriterFactory.getIndexWriter(indexMsg.getIdxPath());

    TesseractResultSet data = indexMsg.getDataBody();
    long currDiskSize = FileUtils.getDiskSize(indexMsg.getIdxPath());
    BigDecimal currMaxId = null;
    // ??
    if (currDiskSize < indexMsg.getBlockSize()) {
        while (data.next() && currDiskSize < indexMsg.getBlockSize()) {
            Document doc = new Document();
            String[] fieldNameArr = data.getFieldNameArray();
            for (String select : fieldNameArr) {
                if (select.equals(indexMsg.getIdName())) {
                    currMaxId = data.getBigDecimal(select);
                }

                doc.add(new StringField(select, data.getString(select), Field.Store.NO));
            }

            idxWriter.addDocument(doc);

            if ((currDiskSize + idxWriter.ramBytesUsed()) > indexMsg.getBlockSize()) {
                // ??????
                idxWriter.commit();
                // ??
                currDiskSize = FileUtils.getDiskSize(indexMsg.getIdxPath());
            }
        }
        idxWriter.commit();

    }

    String feedBackIndexServicePath = null;
    String feedBackIndexFilePath = null;
    // ? or ???indexWriter\?
    long totalDiskSize = FileUtils.getDiskSize(indexMsg.getIdxPath());
    if (totalDiskSize > indexMsg.getBlockSize() || indexMsg.isLastPiece()) {
        IndexWriterFactory.destoryWriters(indexMsg.getIdxPath());
        feedBackIndexServicePath = indexMsg.getIdxPath();
        feedBackIndexFilePath = indexMsg.getIdxServicePath();
    } else {
        feedBackIndexServicePath = indexMsg.getIdxServicePath();
        feedBackIndexFilePath = indexMsg.getIdxPath();
    }

    MessageHeader messageHeader = new MessageHeader(NettyAction.NETTY_ACTION_INDEX_FEEDBACK);

    IndexMessage indexFeedbackMsg = new IndexMessage(messageHeader, indexMsg.getDataBody());
    indexFeedbackMsg.setBlockSize(indexMsg.getBlockSize());
    indexFeedbackMsg.setDiskSize(totalDiskSize);
    indexFeedbackMsg.setIdxServicePath(feedBackIndexServicePath);
    indexFeedbackMsg.setIdxPath(feedBackIndexFilePath);
    indexFeedbackMsg.setIdName(indexMsg.getIdName());
    indexFeedbackMsg.setMaxId(currMaxId);
    ctx.writeAndFlush(indexFeedbackMsg);
    ctx.channel().close();
    logger.info(String.format(LogInfoConstants.INFO_PATTERN_MESSAGE_RECEIVED_END, "IndexServerHandler"));
}

From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java

License:Apache License

@BeforeClass
public static void oneTimeSetup() throws IOException, ParseException {
    LuceneQueryToolTest.showOutput = false; // for debugging tests
    Directory dir = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(dir, config);
    Document doc = new Document();
    doc.add(new Field("longest-mention", "Bill Clinton", StringField.TYPE_STORED));
    doc.add(new Field("context", "Hillary Clinton Arkansas", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("longest-mention", "George W. Bush", StringField.TYPE_STORED));
    doc.add(new Field("context", "Texas Laura Bush", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);//from www .  ja v  a  2s .com
    doc = new Document();
    doc.add(new Field("longest-mention", "George H. W. Bush", StringField.TYPE_STORED));
    doc.add(new Field("context", "Barbara Bush Texas", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("bbb", "foo", StringField.TYPE_STORED));
    doc.add(new Field("bbb", "bar", StringField.TYPE_STORED));
    doc.add(new Field("aaa", "foo", StringField.TYPE_STORED));
    FieldType typeUnindexed = new FieldType(StringField.TYPE_STORED);
    typeUnindexed.setIndexOptions(IndexOptions.NONE);
    doc.add(new Field("zzz", "foo", typeUnindexed));
    writer.addDocument(doc);
    writer.close();
    reader = DirectoryReader.open(dir);
}

From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java

License:Apache License

@Test
public void testBinaryField() throws IOException, ParseException {
    Directory dir = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(dir, config);
    Document doc = new Document();
    doc.add(new Field("id", "1", StringField.TYPE_STORED));
    doc.add(new Field("binary-field", "ABC".getBytes(Charsets.UTF_8), StoredField.TYPE));
    writer.addDocument(doc);
    writer.close();// w ww  .j ava2  s  .  c om
    reader = DirectoryReader.open(dir);

    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
    PrintStream out = new PrintStream(bytes);
    LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
    lqt.run(new String[] { "id:1" });
    String result = Joiner.on('\n').join(getOutput(bytes));
    assertTrue(result.contains("0x414243")); // binary rep of "ABC"
}

From source file:com.bdaum.zoom.lal.internal.lire.IndexingJob.java

License:Open Source License

private boolean doUpdateDocument(IProgressMonitor monitor, boolean create, IndexWriter iw,
        DocumentBuilder builder, Asset asset, List<String> done) {
    String assetId = asset.getStringId();
    if (!create)/*  ww  w .j a  v  a2  s .  com*/
        try {
            iw.deleteDocuments(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, assetId));
        } catch (CorruptIndexException e) {
            // ignore for now
        } catch (IOException e) {
            addErrorStatus(Messages.IndexingJob_ioerror_updating_lucene_index, e);
            // Abort
            return true;
        }
    monitor.worked(1);
    try (ByteArrayInputStream in = new ByteArrayInputStream(asset.getJpegThumbnail())) {
        BufferedImage bi = null;
        for (int i = 0; i < 15; i++) {
            bi = ImageIO.read(in);
            if (bi != null)
                break;
            try {
                Thread.sleep(1000L);
            } catch (InterruptedException e) {
                break;
            }
        }
        monitor.worked(1);
        if (bi == null) {
            addWarningStatus(NLS.bind(Messages.IndexingJob_thumbnail_corrupt, asset.getName()));
            return monitor.isCanceled();
        }
        Document doc;
        try {
            doc = builder.createDocument(bi, assetId);
        } catch (ProviderException e) {
            addErrorStatus(NLS.bind(Messages.IndexingJob_indexing_stopped, e.getMessage()), e);
            return true;
        }
        monitor.worked(7);
        try {
            iw.addDocument(doc);
            done.add(assetId);
        } catch (CorruptIndexException e) {
            addErrorStatus(NLS.bind(Messages.IndexingJob_lucene_index_is_corrupt, dbManager.getIndexPath()), e);
            return true;
        } catch (IOException e) {
            addErrorStatus(Messages.IndexingJob_ioerror_updating_lucene_index, e);
            return true;
        }
        try {
            Thread.sleep(30);
        } catch (InterruptedException e) {
            // do nothing
        }
    } catch (IOException e) {
        addErrorStatus(NLS.bind(Messages.IndexingJob_io_error_when_generating_index_data, asset.getName()), e);
    } catch (Exception e) {
        addErrorStatus(NLS.bind(Messages.IndexingJob_internal_error, asset.getName()), e);
        return true;
    }
    monitor.worked(1);
    return monitor.isCanceled();
}

From source file:com.bdaum.zoom.lal.internal.lucene.Lucene.java

License:Open Source License

public void addDocument(Object writerToken, BufferedImage image, String assetid) throws IOException {
    IndexWriter indexWriter = writerMap.get(writerToken);
    if (indexWriter != null) {
        DocumentBuilder documentBuilder = builderMap.get(writerToken);
        if (documentBuilder == null)
            builderMap.put(writerToken,//from w  ww .  j a  v  a  2  s .  co m
                    documentBuilder = LireActivator.getDefault().constructFullDocumentBuilder());
        indexWriter.addDocument(documentBuilder.createDocument(image, assetid));
    }
}

From source file:com.berico.clavin.index.IndexDirectoryBuilder.java

License:Apache License

/**
 * Adds entries to the Lucene index for each unique name associated
 * with a {@link GeoName} object.//  ww w  .j a  va 2  s .  c  om
 * 
 * @param indexWriter   the object that actually builds the Lucene index
 * @param geonameEntry   single record from GeoNames gazetteer
 * @throws IOException
 */
private static void addToIndex(IndexWriter indexWriter, String geonameEntry) throws IOException {

    // create a GeoName object from a single gazetteer record
    GeoName geoname = GeoName.parseFromGeoNamesRecord(geonameEntry);

    // add the primary (UTF-8) name for this location
    if (geoname.name.length() > 0)
        indexWriter.addDocument(buildDoc(geoname.name, geonameEntry, geoname.geonameID, geoname.population));

    // add the ASCII name if it's different from the primary name
    if (geoname.asciiName.length() > 0 && !geoname.asciiName.equals(geoname.name))
        indexWriter
                .addDocument(buildDoc(geoname.asciiName, geonameEntry, geoname.geonameID, geoname.population));

    // add alternate names (if any) if they differ from the primary
    // and alternate names
    for (String altName : geoname.alternateNames)
        if (altName.length() > 0 && !altName.equals(geoname.name) && !altName.equals(geoname.name))
            indexWriter.addDocument(buildDoc(altName, geonameEntry, geoname.geonameID, geoname.population));
}

From source file:com.bewsia.script.LuceneHandler.java

License:Open Source License

protected void createEntity(SEntity src) {
    if (src.getId().length() == 0)
        return;/*from w ww  . j  a  v  a 2s. c o m*/
    if (src.getKind().length() == 0)
        return;

    try {
        if (!src.getKind().equals(KIND_QUOTA)) {
            if (!quotaCreate(src))
                return;
        }
        backup(src);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
        Document doc = new Document();
        write(src, doc);
        writer.addDocument(doc);
        writer.close();
    } catch (Exception e) {
    }
}