List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:Demo2.MyServlet.java
private static void addDoc(IndexWriter w, String Name, String age, String Gender, String Race) throws IOException { Document doc = new Document(); // A text field will be tokenized doc.add(new TextField("Classes", Name, Field.Store.YES)); // We use a string field for isbn because we don\'t want it tokenized doc.add(new StringField("Number", Gender, Field.Store.YES)); doc.add(new StringField("Time", age, Field.Store.YES)); doc.add(new StringField("Department", Race, Field.Store.YES)); w.addDocument(doc); }
From source file:di.uniba.it.nlpita.index.BuildSeoDwarfIndex.java
private static void indexOntologyElement(IndexWriter writer, OntologyElementToken e, Collection<String> domainOf, Collection<String> rangeOf, Collection<String> extendedDomain) throws Exception { Document doc = new Document(); doc.add(new Field("label", e.getLabel(), TextField.TYPE_NOT_STORED)); doc.add(new IntField("id", e.getId(), IntField.TYPE_STORED)); doc.add(new Field("type", e.getType(), StringField.TYPE_NOT_STORED)); if (domainOf != null) { for (String d : domainOf) { //the first element is the URI doc.add(new Field("domainOfProperty", d, StringField.TYPE_NOT_STORED)); }//ww w . j a v a 2s.co m } if (rangeOf != null) { for (String r : rangeOf) { //the first element is the URI doc.add(new Field("rangeOfProperty", r, StringField.TYPE_NOT_STORED)); } } if (extendedDomain != null) { for (String d : extendedDomain) { //the first element is the URI doc.add(new Field("propertyDomain", d, StringField.TYPE_NOT_STORED)); } } writer.addDocument(doc); }
From source file:di.uniba.it.tri.TemporalSpaceUtils.java
License:Open Source License
public static IndexReader index(VectorReader vreader) throws IOException { Iterator<String> keys = vreader.getKeys(); RAMDirectory ramDir = new RAMDirectory(); IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)); IndexWriter writer = new IndexWriter(ramDir, iwconfig); while (keys.hasNext()) { String word = keys.next(); Document doc = new Document(); doc.add(new Field("word", word, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); writer.addDocument(doc); }// w w w .j av a2 s . co m writer.close(); return IndexReader.open(ramDir); }
From source file:dk.dbc.opensearch.fedora.search.PidCollectorTest.java
License:Open Source License
private AtomicReader populateIndexAndGetIndexReader(Document... docs) throws IOException { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_41, new SimpleAnalyzer(Version.LUCENE_41)); IndexWriter indexWriter = new IndexWriter(index, config); for (Document doc : docs) { indexWriter.addDocument(doc); }/* ww w .java 2 s . co m*/ indexWriter.commit(); indexWriter.close(); return SlowCompositeReaderWrapper.wrap(DirectoryReader.open(index)); }
From source file:dk.dma.msinm.lucene.AbstractLuceneIndex.java
License:Open Source License
/** * Adds the given entity to the index// ww w . j av a 2s . c om * * @param entity the entity to add */ protected void addEntityToIndex(IndexWriter writer, T entity) { Document doc = new Document(); // ID field doc.add(new StringField(ID_FIELD, entity.getId().toString(), Field.Store.YES)); // Add the entity specific fields addEntityToDocument(doc, entity); // Add the document to the index try { writer.addDocument(doc); } catch (IOException ex) { log.error("Error adding entity " + entity.getId() + " to the Lucene index: " + ex.getMessage(), ex); } }
From source file:dk.dma.msinm.lucene.CommitUserDataTest.java
License:Open Source License
@Test public void test() throws IOException { File indexFolder = Files.createTempDir(); Directory directory = FSDirectory.open(indexFolder); // Create an index writer IndexWriterConfig iwc = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new StandardAnalyzer(LuceneUtils.LUCENE_VERSION)); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter indexWriter = new IndexWriter(directory, iwc); // Write a document Document doc = new Document(); doc.add(new IntField("id", 100, Field.Store.YES)); indexWriter.addDocument(doc); // Add user data Map<String, String> userData = new HashMap<>(); userData.put("A", "B"); indexWriter.setCommitData(userData); indexWriter.close();/*w ww . j a v a 2 s. c o m*/ // Check if we can read user data DirectoryReader indexReader = DirectoryReader.open(FSDirectory.open(indexFolder)); assertEquals("B", indexReader.getIndexCommit().getUserData().get("A")); }
From source file:dk.dma.msinm.lucene.SpatialLuceneTest.java
License:Open Source License
@Test public void testSpatialSearch() throws IOException, ParseException { int maxLevels = 11;//results in sub-meter precision for geohash SpatialPrefixTree grid = new GeohashPrefixTree(ctx, maxLevels); strategy = new RecursivePrefixTreeStrategy(grid, "myGeoField"); Directory directory = new RAMDirectory(); IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_47, null); IndexWriter indexWriter = new IndexWriter(directory, iwConfig); indexWriter.addDocument(newSampleDocument(2, ctx.makePoint(-80.93, 33.77))); indexWriter.addDocument(newSampleDocument(4, ctx.readShapeFromWkt("POINT(60.9289094 -50.7693246)"))); indexWriter.addDocument(newSampleDocument(20, ctx.makePoint(0.1, 0.1), ctx.makePoint(0, 0))); indexWriter.addDocument(newSampleDocument(30, JtsSpatialContext.GEO.readShapeFromWkt("POLYGON((0 0, -90 0, -90 40, 0 40, 0 0))"))); indexWriter.close();//from ww w . j a va2 s.c o m IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); Sort idSort = new Sort(new SortField("id", SortField.Type.INT)); // Search 1 SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects, ctx.makeCircle(-80.0, 33.0, DistanceUtils.dist2Degrees(200, DistanceUtils.EARTH_MEAN_RADIUS_KM))); TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), strategy.makeFilter(args), 10, idSort); assertDocMatchedIds(indexSearcher, docs, 2, 30); // Search 2 args = new SpatialArgs(SpatialOperation.Intersects, JtsSpatialContext.GEO.readShapeFromWkt("POLYGON((-10 10, -20 0, -20 20, -10 20, -10 10))")); docs = indexSearcher.search(new MatchAllDocsQuery(), strategy.makeFilter(args), 10, idSort); assertDocMatchedIds(indexSearcher, docs, 30); }
From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java
License:Apache License
private static File generateIndex(int documents) throws IOException { final File INDEX = new File("target/testindex.deletefreely." + documents); final long seed = new Random().nextLong(); Random random = new Random(seed); log.info("Testing with random seed" + seed); Analyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); final FieldType SINGLE_F = new FieldType(); SINGLE_F.setIndexed(true);/*from w ww .j a va2 s . c om*/ SINGLE_F.setStored(true); final FieldType MULTI_F = new FieldType(); MULTI_F.setIndexed(true); MULTI_F.setStored(true); final FieldType SEARCH_F = new FieldType(); SEARCH_F.setIndexed(true); final FieldType LONG_F = new FieldType(); LONG_F.setIndexed(true); LONG_F.setStored(true); LONG_F.setNumericType(FieldType.NumericType.LONG); final FieldType DOUBLE_F = new FieldType(); DOUBLE_F.setIndexed(true); DOUBLE_F.setStored(true); DOUBLE_F.setNumericType(FieldType.NumericType.DOUBLE); IndexWriter indexWriter = new IndexWriter(MMapDirectory.open(INDEX), new IndexWriterConfig(LUCENE_VERSION, analyzer)); for (int docID = 0; docID < documents; docID++) { Document document = new Document(); document.add(new Field(ID, Integer.toString(docID), SINGLE_F)); document.add(new Field(SEARCH, SEARCH_CONTENT + "_" + docID, SEARCH_F)); if (random.nextInt(5) > 0) { document.add(new Field(SINGLE, SINGLE_CONTENT + "_r" + random.nextInt(), SINGLE_F)); } if (random.nextInt(5) > 0) { document.add(new Field(MULTI, MULTI_CONTENT_1 + "_" + docID, MULTI_F)); if (random.nextInt(3) > 0) { document.add(new Field(MULTI, MULTI_CONTENT_2 + "_random" + random.nextInt(5), MULTI_F)); } } if (random.nextInt(5) > 0) { document.add(new LongField(LONG, random.nextLong(), LONG_F)); } if (random.nextInt(5) > 0) { document.add(new DoubleField(DOUBLE, random.nextDouble(), DOUBLE_F)); } indexWriter.addDocument(document); if (docID == documents / 3) { indexWriter.commit(); // Ensure multi-segment } } indexWriter.commit(); indexWriter.close(); return INDEX; }
From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java
License:Apache License
public static File generateIndex() throws IOException { final File INDEX = new File("target/testindex.deletefreely"); Analyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); final FieldType SINGLE_F = new FieldType(); SINGLE_F.setIndexed(true);/*from w w w.j ava2 s . c o m*/ SINGLE_F.setStored(true); final FieldType MULTI_F = new FieldType(); MULTI_F.setIndexed(true); MULTI_F.setStored(true); final FieldType SEARCH_F = new FieldType(); SEARCH_F.setIndexed(true); final FieldType LONG_F = new FieldType(); LONG_F.setIndexed(true); LONG_F.setStored(true); LONG_F.setNumericType(FieldType.NumericType.LONG); /* final FieldType DOUBLE_F = new FieldType(); DOUBLE_F.setIndexed(true); DOUBLE_F.setStored(true); DOUBLE_F.setNumericType(FieldType.NumericType.DOUBLE); final FieldType FLOAT_F = new FieldType(); FLOAT_F.setIndexed(true); FLOAT_F.setStored(true); FLOAT_F.setNumericType(FieldType.NumericType.FLOAT); */ /* final FieldType STR_DV = new FieldType(); STR_DV.setIndexed(true); STR_DV.setStored(true); STR_DV.setDocValueType(FieldInfo.DocValuesType.SORTED);*/ IndexWriter indexWriter = new IndexWriter(MMapDirectory.open(INDEX), new IndexWriterConfig(LUCENE_VERSION, analyzer)); { Document document = new Document(); document.add(new Field(ID, "1", MULTI_F)); document.add(new Field(SEARCH, SEARCH_CONTENT, SEARCH_F)); document.add(new Field(SINGLE, SINGLE_CONTENT, MULTI_F)); document.add(new Field(MULTI, MULTI_CONTENT_1, MULTI_F)); document.add(new Field(MULTI, MULTI_CONTENT_2, MULTI_F)); document.add(new LongField(LONG, LONG_CONTENT, LONG_F)); // document.add(new DoubleField(DOUBLE, DOUBLE_CONTENT, DOUBLE_F)); // document.add(new FloatField(FLOAT, FLOAT_CONTENT, FLOAT_F)); document.add(new SortedDocValuesField(DV, new BytesRef(DV_CONTENT))); indexWriter.addDocument(document); } indexWriter.commit(); indexWriter.close(); return INDEX; }
From source file:Dl4j.Doc2VecWithAutoEncoder.java
public static void main(String[] args) throws FileNotFoundException, IOException { if (args.length < 1) { args = new String[1]; args[0] = "/home/procheta/NetBeansProjects/Dl4jTest/src/dl4jtest/init.properties"; }/*from w ww.j a v a2 s.co m*/ String[] docs = { "The cat sat on the mat", "The dog sat on the mat", "The chicken ate the corn", "The corn was sweet", "The milk was sweet", "The dog sat on the mat", "The cat drank the milk", "The dog ate the bone" }; try { Properties prop = new Properties(); prop.load(new FileReader(args[0])); LuceneDocFetcher luceneDocFetcher; // test loading a simple collection of docs... // Create in-memory index RAMDirectory ramdir = new RAMDirectory(); IndexWriterConfig iwcfg = new IndexWriterConfig(new EnglishAnalyzer()); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(ramdir, iwcfg); for (String doc : docs) { try { Document lDoc = new Document(); lDoc.add(new Field(LuceneDocFetcher.CONTENET_FIELD_NAME, doc, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); writer.addDocument(lDoc); } catch (Exception e) { } } writer.close(); Path path = Paths.get(prop.getProperty("index")); Directory dir = FSDirectory.open(path); Doc2VecWithAutoEncoder dva = new Doc2VecWithAutoEncoder(); System.out.println(prop.getProperty("depth")); ArrayList<String> docIds; dva.getDocIds(prop.getProperty("qid"), prop.getProperty("qrel")); // docIds = dva.subsample(Integer.parseInt(prop.getProperty("depth")), prop.getProperty("fileList"), prop.getProperty("qid"), prop.getProperty("folderPath")); // dva.saveSampleDocId(docIds, prop.getProperty("sampleOutput")); // pass the in-mem index reader to the vectorizer // luceneDocFetcher = new LuceneDocFetcher(dir, dva.docIds); luceneDocFetcher = new LuceneDocFetcher(dir, dva.docIds, dva.labels); DataSetIterator iter = new BaseDatasetIterator(1, 50, luceneDocFetcher); while (iter.hasNext()) { DataSet v = iter.next(); System.out.println(v.getFeatures()); } // test auto-encoding final int vocabSize = luceneDocFetcher.getDimension(); //int seed = Random.nextInt(vocabSize); int iterations = 2; int listenerFreq = iterations / 5; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() //.seed(seed) .iterations(iterations).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .list(2) .layer(0, new RBM.Builder().nIn(vocabSize).nOut(5) .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) .layer(1, new RBM.Builder().nIn(5).nOut(10).lossFunction(LossFunctions.LossFunction.RMSE_XENT) .build()) //.pretrain(true) //.backprop(true) //.layer(2, new RBM.Builder().nIn(500).nOut(250).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) //.layer(3, new RBM.Builder().nIn(250).nOut(100).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) //.layer(4, new RBM.Builder().nIn(100).nOut(30).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) /* //encoding stops .layer(5, new RBM.Builder().nIn(30).nOut(100).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) //decoding starts .layer(6, new RBM.Builder().nIn(100).nOut(250).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) .layer(7, new RBM.Builder().nIn(250).nOut(500).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) .layer(8, new RBM.Builder().nIn(500).nOut(1000).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) .layer(9, new OutputLayer.Builder(LossFunctions.LossFunction.RMSE_XENT).nIn(1000).nOut(vocabSize).build()) .pretrain(true).backprop(true) */ .build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq))); model.fit(iter); System.out.println("Output layer: "); iter.reset(); while (iter.hasNext()) { DataSet v = iter.next(); // System.out.println(model.output(v.getFeatures())); } //++Procheta iter.reset(); dva.saveModel(iter, prop.getProperty("output"), model);//*/ } catch (Exception ex) { ex.printStackTrace(); } }