List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:com.bah.lucene.BaseDirectoryTestSuite.java
License:Apache License
private void addDocuments(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { writer.addDocument(getDoc(i)); }//w w w. j a v a 2s. co m }
From source file:com.bah.lucene.blockcache_v2.CacheDirectoryTest.java
License:Apache License
@Test public void test3() throws IOException, InterruptedException { // Thread.sleep(30000); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); IndexWriter writer = new IndexWriter(_cacheDirectory, conf); int docs = 100000; for (int i = 0; i < docs; i++) { if (i % 500 == 0) { System.out.println(i); }//ww w. ja v a 2 s . c om writer.addDocument(newDoc()); // Thread.sleep(1); } writer.close(); System.out.println("done writing"); DirectoryReader reader = DirectoryReader.open(_cacheDirectory); System.out.println("done opening"); assertEquals(docs, reader.numDocs()); Document document = reader.document(0); System.out.println("done fetching"); System.out.println(document); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("test", "test")), 10); System.out.println("done searching"); assertEquals(docs, topDocs.totalHits); reader.close(); }
From source file:com.baidu.rigel.biplatform.tesseract.isservice.netty.service.IndexServerHandler.java
License:Open Source License
public void messageReceived_00(ChannelHandlerContext ctx, Object msg) throws Exception { logger.info(String.format(LogInfoConstants.INFO_PATTERN_MESSAGE_RECEIVED_BEGIN, "IndexServerHandler")); IndexMessage indexMsg = (IndexMessage) msg; // ??/*from ww w . ja v a2 s.com*/ File idxFile = new File(indexMsg.getIdxPath()); File idxServiceFile = new File(indexMsg.getIdxServicePath()); if (indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_UPDATE) || indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_INITINDEX)) { // ?? // ? FileUtils.deleteFile(idxFile); if (indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_UPDATE) && idxServiceFile.exists()) { // ? FileUtils.copyFolder(indexMsg.getIdxServicePath(), indexMsg.getIdxPath()); } } IndexWriter idxWriter = IndexWriterFactory.getIndexWriterWithSingleSlot(indexMsg.getIdxPath()); TesseractResultSet data = indexMsg.getDataBody(); long currDiskSize = FileUtils.getDiskSize(indexMsg.getIdxPath()); BigDecimal currMaxId = null; // ?? if (currDiskSize < indexMsg.getBlockSize()) { while (data.next() && currDiskSize < indexMsg.getBlockSize()) { Document doc = new Document(); String[] fieldNameArr = data.getFieldNameArray(); for (String select : fieldNameArr) { if (select.equals(indexMsg.getIdName())) { currMaxId = data.getBigDecimal(select); } doc.add(new StringField(select, data.getString(select), Field.Store.NO)); } idxWriter.addDocument(doc); } idxWriter.commit(); idxWriter.close(); } String feedBackIndexServicePath = null; String feedBackIndexFilePath = null; // ? or ???indexWriter\? long totalDiskSize = FileUtils.getDiskSize(indexMsg.getIdxPath()); if (totalDiskSize > indexMsg.getBlockSize() || indexMsg.isLastPiece()) { IndexWriterFactory.destoryWriters(indexMsg.getIdxPath()); feedBackIndexServicePath = indexMsg.getIdxPath(); feedBackIndexFilePath = indexMsg.getIdxServicePath(); } else { feedBackIndexServicePath = indexMsg.getIdxServicePath(); feedBackIndexFilePath = indexMsg.getIdxPath(); } MessageHeader messageHeader = new MessageHeader(NettyAction.NETTY_ACTION_INDEX_FEEDBACK); IndexMessage indexFeedbackMsg = new IndexMessage(messageHeader, indexMsg.getDataBody()); indexFeedbackMsg.setBlockSize(indexMsg.getBlockSize()); indexFeedbackMsg.setDiskSize(totalDiskSize); indexFeedbackMsg.setIdxServicePath(feedBackIndexServicePath); indexFeedbackMsg.setIdxPath(feedBackIndexFilePath); indexFeedbackMsg.setIdName(indexMsg.getIdName()); indexFeedbackMsg.setMaxId(currMaxId); ctx.writeAndFlush(indexFeedbackMsg); ctx.channel().close(); logger.info(String.format(LogInfoConstants.INFO_PATTERN_MESSAGE_RECEIVED_END, "IndexServerHandler")); }
From source file:com.baidu.rigel.biplatform.tesseract.isservice.netty.service.IndexServerHandler.java
License:Open Source License
@Override public void messageReceived(ChannelHandlerContext ctx, Object msg) throws Exception { logger.info(String.format(LogInfoConstants.INFO_PATTERN_MESSAGE_RECEIVED_BEGIN, "IndexServerHandler")); IndexMessage indexMsg = (IndexMessage) msg; // ??/*from ww w .j a va 2 s .c o m*/ File idxFile = new File(indexMsg.getIdxPath()); File idxServiceFile = new File(indexMsg.getIdxServicePath()); if (indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_UPDATE) || indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_INITINDEX)) { // ?? // ? FileUtils.deleteFile(idxFile); if (indexMsg.getMessageHeader().getAction().equals(NettyAction.NETTY_ACTION_UPDATE) && idxServiceFile.exists()) { // ? FileUtils.copyFolder(indexMsg.getIdxServicePath(), indexMsg.getIdxPath()); } } IndexWriter idxWriter = IndexWriterFactory.getIndexWriter(indexMsg.getIdxPath()); TesseractResultSet data = indexMsg.getDataBody(); long currDiskSize = FileUtils.getDiskSize(indexMsg.getIdxPath()); BigDecimal currMaxId = null; // ?? if (currDiskSize < indexMsg.getBlockSize()) { while (data.next() && currDiskSize < indexMsg.getBlockSize()) { Document doc = new Document(); String[] fieldNameArr = data.getFieldNameArray(); for (String select : fieldNameArr) { if (select.equals(indexMsg.getIdName())) { currMaxId = data.getBigDecimal(select); } doc.add(new StringField(select, data.getString(select), Field.Store.NO)); } idxWriter.addDocument(doc); if ((currDiskSize + idxWriter.ramBytesUsed()) > indexMsg.getBlockSize()) { // ?????? idxWriter.commit(); // ?? currDiskSize = FileUtils.getDiskSize(indexMsg.getIdxPath()); } } idxWriter.commit(); } String feedBackIndexServicePath = null; String feedBackIndexFilePath = null; // ? or ???indexWriter\? long totalDiskSize = FileUtils.getDiskSize(indexMsg.getIdxPath()); if (totalDiskSize > indexMsg.getBlockSize() || indexMsg.isLastPiece()) { IndexWriterFactory.destoryWriters(indexMsg.getIdxPath()); feedBackIndexServicePath = indexMsg.getIdxPath(); feedBackIndexFilePath = indexMsg.getIdxServicePath(); } else { feedBackIndexServicePath = indexMsg.getIdxServicePath(); feedBackIndexFilePath = indexMsg.getIdxPath(); } MessageHeader messageHeader = new MessageHeader(NettyAction.NETTY_ACTION_INDEX_FEEDBACK); IndexMessage indexFeedbackMsg = new IndexMessage(messageHeader, indexMsg.getDataBody()); indexFeedbackMsg.setBlockSize(indexMsg.getBlockSize()); indexFeedbackMsg.setDiskSize(totalDiskSize); indexFeedbackMsg.setIdxServicePath(feedBackIndexServicePath); indexFeedbackMsg.setIdxPath(feedBackIndexFilePath); indexFeedbackMsg.setIdName(indexMsg.getIdName()); indexFeedbackMsg.setMaxId(currMaxId); ctx.writeAndFlush(indexFeedbackMsg); ctx.channel().close(); logger.info(String.format(LogInfoConstants.INFO_PATTERN_MESSAGE_RECEIVED_END, "IndexServerHandler")); }
From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java
License:Apache License
@BeforeClass public static void oneTimeSetup() throws IOException, ParseException { LuceneQueryToolTest.showOutput = false; // for debugging tests Directory dir = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); Document doc = new Document(); doc.add(new Field("longest-mention", "Bill Clinton", StringField.TYPE_STORED)); doc.add(new Field("context", "Hillary Clinton Arkansas", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("longest-mention", "George W. Bush", StringField.TYPE_STORED)); doc.add(new Field("context", "Texas Laura Bush", TextField.TYPE_NOT_STORED)); writer.addDocument(doc);//from www . ja v a 2s .com doc = new Document(); doc.add(new Field("longest-mention", "George H. W. Bush", StringField.TYPE_STORED)); doc.add(new Field("context", "Barbara Bush Texas", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("bbb", "foo", StringField.TYPE_STORED)); doc.add(new Field("bbb", "bar", StringField.TYPE_STORED)); doc.add(new Field("aaa", "foo", StringField.TYPE_STORED)); FieldType typeUnindexed = new FieldType(StringField.TYPE_STORED); typeUnindexed.setIndexOptions(IndexOptions.NONE); doc.add(new Field("zzz", "foo", typeUnindexed)); writer.addDocument(doc); writer.close(); reader = DirectoryReader.open(dir); }
From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java
License:Apache License
@Test public void testBinaryField() throws IOException, ParseException { Directory dir = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); Document doc = new Document(); doc.add(new Field("id", "1", StringField.TYPE_STORED)); doc.add(new Field("binary-field", "ABC".getBytes(Charsets.UTF_8), StoredField.TYPE)); writer.addDocument(doc); writer.close();// w ww .j ava2 s . c om reader = DirectoryReader.open(dir); ByteArrayOutputStream bytes = new ByteArrayOutputStream(); PrintStream out = new PrintStream(bytes); LuceneQueryTool lqt = new LuceneQueryTool(reader, out); lqt.run(new String[] { "id:1" }); String result = Joiner.on('\n').join(getOutput(bytes)); assertTrue(result.contains("0x414243")); // binary rep of "ABC" }
From source file:com.bdaum.zoom.lal.internal.lire.IndexingJob.java
License:Open Source License
private boolean doUpdateDocument(IProgressMonitor monitor, boolean create, IndexWriter iw, DocumentBuilder builder, Asset asset, List<String> done) { String assetId = asset.getStringId(); if (!create)/* ww w .j a v a2 s . com*/ try { iw.deleteDocuments(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, assetId)); } catch (CorruptIndexException e) { // ignore for now } catch (IOException e) { addErrorStatus(Messages.IndexingJob_ioerror_updating_lucene_index, e); // Abort return true; } monitor.worked(1); try (ByteArrayInputStream in = new ByteArrayInputStream(asset.getJpegThumbnail())) { BufferedImage bi = null; for (int i = 0; i < 15; i++) { bi = ImageIO.read(in); if (bi != null) break; try { Thread.sleep(1000L); } catch (InterruptedException e) { break; } } monitor.worked(1); if (bi == null) { addWarningStatus(NLS.bind(Messages.IndexingJob_thumbnail_corrupt, asset.getName())); return monitor.isCanceled(); } Document doc; try { doc = builder.createDocument(bi, assetId); } catch (ProviderException e) { addErrorStatus(NLS.bind(Messages.IndexingJob_indexing_stopped, e.getMessage()), e); return true; } monitor.worked(7); try { iw.addDocument(doc); done.add(assetId); } catch (CorruptIndexException e) { addErrorStatus(NLS.bind(Messages.IndexingJob_lucene_index_is_corrupt, dbManager.getIndexPath()), e); return true; } catch (IOException e) { addErrorStatus(Messages.IndexingJob_ioerror_updating_lucene_index, e); return true; } try { Thread.sleep(30); } catch (InterruptedException e) { // do nothing } } catch (IOException e) { addErrorStatus(NLS.bind(Messages.IndexingJob_io_error_when_generating_index_data, asset.getName()), e); } catch (Exception e) { addErrorStatus(NLS.bind(Messages.IndexingJob_internal_error, asset.getName()), e); return true; } monitor.worked(1); return monitor.isCanceled(); }
From source file:com.bdaum.zoom.lal.internal.lucene.Lucene.java
License:Open Source License
public void addDocument(Object writerToken, BufferedImage image, String assetid) throws IOException { IndexWriter indexWriter = writerMap.get(writerToken); if (indexWriter != null) { DocumentBuilder documentBuilder = builderMap.get(writerToken); if (documentBuilder == null) builderMap.put(writerToken,//from w ww . j a v a 2 s . co m documentBuilder = LireActivator.getDefault().constructFullDocumentBuilder()); indexWriter.addDocument(documentBuilder.createDocument(image, assetid)); } }
From source file:com.berico.clavin.index.IndexDirectoryBuilder.java
License:Apache License
/** * Adds entries to the Lucene index for each unique name associated * with a {@link GeoName} object.// ww w .j a va 2 s . c om * * @param indexWriter the object that actually builds the Lucene index * @param geonameEntry single record from GeoNames gazetteer * @throws IOException */ private static void addToIndex(IndexWriter indexWriter, String geonameEntry) throws IOException { // create a GeoName object from a single gazetteer record GeoName geoname = GeoName.parseFromGeoNamesRecord(geonameEntry); // add the primary (UTF-8) name for this location if (geoname.name.length() > 0) indexWriter.addDocument(buildDoc(geoname.name, geonameEntry, geoname.geonameID, geoname.population)); // add the ASCII name if it's different from the primary name if (geoname.asciiName.length() > 0 && !geoname.asciiName.equals(geoname.name)) indexWriter .addDocument(buildDoc(geoname.asciiName, geonameEntry, geoname.geonameID, geoname.population)); // add alternate names (if any) if they differ from the primary // and alternate names for (String altName : geoname.alternateNames) if (altName.length() > 0 && !altName.equals(geoname.name) && !altName.equals(geoname.name)) indexWriter.addDocument(buildDoc(altName, geonameEntry, geoname.geonameID, geoname.population)); }
From source file:com.bewsia.script.LuceneHandler.java
License:Open Source License
protected void createEntity(SEntity src) { if (src.getId().length() == 0) return;/*from w ww . j a v a 2s. c o m*/ if (src.getKind().length() == 0) return; try { if (!src.getKind().equals(KIND_QUOTA)) { if (!quotaCreate(src)) return; } backup(src); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc); Document doc = new Document(); write(src, doc); writer.addDocument(doc); writer.close(); } catch (Exception e) { } }