Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:Demo2.MyServlet.java

private static void addDoc(IndexWriter w, String Name, String age, String Gender, String Race)
        throws IOException {
    Document doc = new Document();
    // A text field will be tokenized
    doc.add(new TextField("Classes", Name, Field.Store.YES));
    // We use a string field for isbn because we don\'t want it tokenized
    doc.add(new StringField("Number", Gender, Field.Store.YES));
    doc.add(new StringField("Time", age, Field.Store.YES));
    doc.add(new StringField("Department", Race, Field.Store.YES));
    w.addDocument(doc);
}

From source file:di.uniba.it.nlpita.index.BuildSeoDwarfIndex.java

private static void indexOntologyElement(IndexWriter writer, OntologyElementToken e,
        Collection<String> domainOf, Collection<String> rangeOf, Collection<String> extendedDomain)
        throws Exception {
    Document doc = new Document();
    doc.add(new Field("label", e.getLabel(), TextField.TYPE_NOT_STORED));
    doc.add(new IntField("id", e.getId(), IntField.TYPE_STORED));
    doc.add(new Field("type", e.getType(), StringField.TYPE_NOT_STORED));
    if (domainOf != null) {
        for (String d : domainOf) { //the first element is the URI
            doc.add(new Field("domainOfProperty", d, StringField.TYPE_NOT_STORED));
        }//ww  w  .  j a v  a  2s.co  m
    }
    if (rangeOf != null) {
        for (String r : rangeOf) { //the first element is the URI
            doc.add(new Field("rangeOfProperty", r, StringField.TYPE_NOT_STORED));
        }
    }
    if (extendedDomain != null) {
        for (String d : extendedDomain) { //the first element is the URI
            doc.add(new Field("propertyDomain", d, StringField.TYPE_NOT_STORED));
        }
    }
    writer.addDocument(doc);
}

From source file:di.uniba.it.tri.TemporalSpaceUtils.java

License:Open Source License

public static IndexReader index(VectorReader vreader) throws IOException {
    Iterator<String> keys = vreader.getKeys();
    RAMDirectory ramDir = new RAMDirectory();
    IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_36,
            new StandardAnalyzer(Version.LUCENE_36));
    IndexWriter writer = new IndexWriter(ramDir, iwconfig);
    while (keys.hasNext()) {
        String word = keys.next();
        Document doc = new Document();
        doc.add(new Field("word", word, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
        writer.addDocument(doc);
    }//  w  w w .j  av a2 s  .  co  m
    writer.close();
    return IndexReader.open(ramDir);
}

From source file:dk.dbc.opensearch.fedora.search.PidCollectorTest.java

License:Open Source License

private AtomicReader populateIndexAndGetIndexReader(Document... docs) throws IOException {
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_41, new SimpleAnalyzer(Version.LUCENE_41));
    IndexWriter indexWriter = new IndexWriter(index, config);
    for (Document doc : docs) {
        indexWriter.addDocument(doc);
    }/*  ww  w .java 2  s . co m*/
    indexWriter.commit();
    indexWriter.close();
    return SlowCompositeReaderWrapper.wrap(DirectoryReader.open(index));
}

From source file:dk.dma.msinm.lucene.AbstractLuceneIndex.java

License:Open Source License

/**
 * Adds the given entity to the index//  ww  w  . j av a 2s  . c  om
 *
 * @param entity the entity to add
 */
protected void addEntityToIndex(IndexWriter writer, T entity) {
    Document doc = new Document();

    // ID field
    doc.add(new StringField(ID_FIELD, entity.getId().toString(), Field.Store.YES));

    // Add the entity specific fields
    addEntityToDocument(doc, entity);

    // Add the document to the index
    try {
        writer.addDocument(doc);
    } catch (IOException ex) {
        log.error("Error adding entity " + entity.getId() + " to the Lucene index: " + ex.getMessage(), ex);
    }
}

From source file:dk.dma.msinm.lucene.CommitUserDataTest.java

License:Open Source License

@Test
public void test() throws IOException {

    File indexFolder = Files.createTempDir();
    Directory directory = FSDirectory.open(indexFolder);

    // Create an index writer
    IndexWriterConfig iwc = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION,
            new StandardAnalyzer(LuceneUtils.LUCENE_VERSION));
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter indexWriter = new IndexWriter(directory, iwc);

    // Write a document
    Document doc = new Document();
    doc.add(new IntField("id", 100, Field.Store.YES));
    indexWriter.addDocument(doc);

    // Add user data
    Map<String, String> userData = new HashMap<>();
    userData.put("A", "B");
    indexWriter.setCommitData(userData);
    indexWriter.close();/*w ww .  j a v a 2 s. c o  m*/

    // Check if we can read user data
    DirectoryReader indexReader = DirectoryReader.open(FSDirectory.open(indexFolder));
    assertEquals("B", indexReader.getIndexCommit().getUserData().get("A"));

}

From source file:dk.dma.msinm.lucene.SpatialLuceneTest.java

License:Open Source License

@Test
public void testSpatialSearch() throws IOException, ParseException {

    int maxLevels = 11;//results in sub-meter precision for geohash
    SpatialPrefixTree grid = new GeohashPrefixTree(ctx, maxLevels);

    strategy = new RecursivePrefixTreeStrategy(grid, "myGeoField");
    Directory directory = new RAMDirectory();

    IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_47, null);
    IndexWriter indexWriter = new IndexWriter(directory, iwConfig);
    indexWriter.addDocument(newSampleDocument(2, ctx.makePoint(-80.93, 33.77)));
    indexWriter.addDocument(newSampleDocument(4, ctx.readShapeFromWkt("POINT(60.9289094 -50.7693246)")));
    indexWriter.addDocument(newSampleDocument(20, ctx.makePoint(0.1, 0.1), ctx.makePoint(0, 0)));
    indexWriter.addDocument(newSampleDocument(30,
            JtsSpatialContext.GEO.readShapeFromWkt("POLYGON((0 0, -90 0, -90 40, 0 40, 0 0))")));
    indexWriter.close();//from ww  w . j a va2  s.c o m

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    Sort idSort = new Sort(new SortField("id", SortField.Type.INT));

    // Search 1
    SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
            ctx.makeCircle(-80.0, 33.0, DistanceUtils.dist2Degrees(200, DistanceUtils.EARTH_MEAN_RADIUS_KM)));
    TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), strategy.makeFilter(args), 10, idSort);
    assertDocMatchedIds(indexSearcher, docs, 2, 30);

    // Search 2
    args = new SpatialArgs(SpatialOperation.Intersects,
            JtsSpatialContext.GEO.readShapeFromWkt("POLYGON((-10 10, -20 0, -20 20, -10 20, -10 10))"));
    docs = indexSearcher.search(new MatchAllDocsQuery(), strategy.makeFilter(args), 10, idSort);
    assertDocMatchedIds(indexSearcher, docs, 30);
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

private static File generateIndex(int documents) throws IOException {
    final File INDEX = new File("target/testindex.deletefreely." + documents);
    final long seed = new Random().nextLong();
    Random random = new Random(seed);
    log.info("Testing with random seed" + seed);
    Analyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);

    final FieldType SINGLE_F = new FieldType();
    SINGLE_F.setIndexed(true);/*from w  ww .j a va2 s  . c om*/
    SINGLE_F.setStored(true);

    final FieldType MULTI_F = new FieldType();
    MULTI_F.setIndexed(true);
    MULTI_F.setStored(true);

    final FieldType SEARCH_F = new FieldType();
    SEARCH_F.setIndexed(true);

    final FieldType LONG_F = new FieldType();
    LONG_F.setIndexed(true);
    LONG_F.setStored(true);
    LONG_F.setNumericType(FieldType.NumericType.LONG);

    final FieldType DOUBLE_F = new FieldType();
    DOUBLE_F.setIndexed(true);
    DOUBLE_F.setStored(true);
    DOUBLE_F.setNumericType(FieldType.NumericType.DOUBLE);

    IndexWriter indexWriter = new IndexWriter(MMapDirectory.open(INDEX),
            new IndexWriterConfig(LUCENE_VERSION, analyzer));
    for (int docID = 0; docID < documents; docID++) {
        Document document = new Document();
        document.add(new Field(ID, Integer.toString(docID), SINGLE_F));
        document.add(new Field(SEARCH, SEARCH_CONTENT + "_" + docID, SEARCH_F));
        if (random.nextInt(5) > 0) {
            document.add(new Field(SINGLE, SINGLE_CONTENT + "_r" + random.nextInt(), SINGLE_F));
        }
        if (random.nextInt(5) > 0) {
            document.add(new Field(MULTI, MULTI_CONTENT_1 + "_" + docID, MULTI_F));
            if (random.nextInt(3) > 0) {
                document.add(new Field(MULTI, MULTI_CONTENT_2 + "_random" + random.nextInt(5), MULTI_F));
            }
        }
        if (random.nextInt(5) > 0) {
            document.add(new LongField(LONG, random.nextLong(), LONG_F));
        }
        if (random.nextInt(5) > 0) {
            document.add(new DoubleField(DOUBLE, random.nextDouble(), DOUBLE_F));
        }
        indexWriter.addDocument(document);
        if (docID == documents / 3) {
            indexWriter.commit(); // Ensure multi-segment
        }
    }
    indexWriter.commit();
    indexWriter.close();
    return INDEX;
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

public static File generateIndex() throws IOException {
    final File INDEX = new File("target/testindex.deletefreely");
    Analyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);

    final FieldType SINGLE_F = new FieldType();
    SINGLE_F.setIndexed(true);/*from   w  w w.j ava2 s  . c o m*/
    SINGLE_F.setStored(true);

    final FieldType MULTI_F = new FieldType();
    MULTI_F.setIndexed(true);
    MULTI_F.setStored(true);

    final FieldType SEARCH_F = new FieldType();
    SEARCH_F.setIndexed(true);

    final FieldType LONG_F = new FieldType();
    LONG_F.setIndexed(true);
    LONG_F.setStored(true);
    LONG_F.setNumericType(FieldType.NumericType.LONG);

    /*        final FieldType DOUBLE_F = new FieldType();
            DOUBLE_F.setIndexed(true);
            DOUBLE_F.setStored(true);
            DOUBLE_F.setNumericType(FieldType.NumericType.DOUBLE);
            
            final FieldType FLOAT_F = new FieldType();
            FLOAT_F.setIndexed(true);
            FLOAT_F.setStored(true);
            FLOAT_F.setNumericType(FieldType.NumericType.FLOAT);
      */

    /*        final FieldType STR_DV = new FieldType();
            STR_DV.setIndexed(true);
            STR_DV.setStored(true);
            STR_DV.setDocValueType(FieldInfo.DocValuesType.SORTED);*/

    IndexWriter indexWriter = new IndexWriter(MMapDirectory.open(INDEX),
            new IndexWriterConfig(LUCENE_VERSION, analyzer));
    {
        Document document = new Document();
        document.add(new Field(ID, "1", MULTI_F));
        document.add(new Field(SEARCH, SEARCH_CONTENT, SEARCH_F));
        document.add(new Field(SINGLE, SINGLE_CONTENT, MULTI_F));
        document.add(new Field(MULTI, MULTI_CONTENT_1, MULTI_F));
        document.add(new Field(MULTI, MULTI_CONTENT_2, MULTI_F));
        document.add(new LongField(LONG, LONG_CONTENT, LONG_F));
        //            document.add(new DoubleField(DOUBLE, DOUBLE_CONTENT, DOUBLE_F));
        //            document.add(new FloatField(FLOAT, FLOAT_CONTENT, FLOAT_F));
        document.add(new SortedDocValuesField(DV, new BytesRef(DV_CONTENT)));
        indexWriter.addDocument(document);
    }
    indexWriter.commit();
    indexWriter.close();
    return INDEX;
}

From source file:Dl4j.Doc2VecWithAutoEncoder.java

public static void main(String[] args) throws FileNotFoundException, IOException {

    if (args.length < 1) {
        args = new String[1];
        args[0] = "/home/procheta/NetBeansProjects/Dl4jTest/src/dl4jtest/init.properties";
    }/*from   w ww.j  a v  a2 s.co m*/
    String[] docs = { "The cat sat on the mat", "The dog sat on the mat", "The chicken ate the corn",
            "The corn was sweet", "The milk was sweet", "The dog sat on the mat", "The cat drank the milk",
            "The dog ate the bone" };

    try {
        Properties prop = new Properties();
        prop.load(new FileReader(args[0]));
        LuceneDocFetcher luceneDocFetcher;

        // test loading a simple collection of docs...
        // Create in-memory index
        RAMDirectory ramdir = new RAMDirectory();

        IndexWriterConfig iwcfg = new IndexWriterConfig(new EnglishAnalyzer());
        iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        IndexWriter writer = new IndexWriter(ramdir, iwcfg);
        for (String doc : docs) {
            try {
                Document lDoc = new Document();
                lDoc.add(new Field(LuceneDocFetcher.CONTENET_FIELD_NAME, doc, Field.Store.NO,
                        Field.Index.ANALYZED, Field.TermVector.YES));
                writer.addDocument(lDoc);
            } catch (Exception e) {
            }
        }
        writer.close();
        Path path = Paths.get(prop.getProperty("index"));
        Directory dir = FSDirectory.open(path);

        Doc2VecWithAutoEncoder dva = new Doc2VecWithAutoEncoder();
        System.out.println(prop.getProperty("depth"));
        ArrayList<String> docIds;
        dva.getDocIds(prop.getProperty("qid"), prop.getProperty("qrel"));
        //   docIds = dva.subsample(Integer.parseInt(prop.getProperty("depth")), prop.getProperty("fileList"), prop.getProperty("qid"), prop.getProperty("folderPath"));
        //  dva.saveSampleDocId(docIds, prop.getProperty("sampleOutput"));
        // pass the in-mem index reader to the vectorizer
        //  luceneDocFetcher = new LuceneDocFetcher(dir, dva.docIds);
        luceneDocFetcher = new LuceneDocFetcher(dir, dva.docIds, dva.labels);

        DataSetIterator iter = new BaseDatasetIterator(1, 50, luceneDocFetcher);
        while (iter.hasNext()) {
            DataSet v = iter.next();

            System.out.println(v.getFeatures());
        }

        // test auto-encoding
        final int vocabSize = luceneDocFetcher.getDimension();
        //int seed = Random.nextInt(vocabSize);
        int iterations = 2;
        int listenerFreq = iterations / 5;

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                //.seed(seed)
                .iterations(iterations).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                .list(2)
                .layer(0,
                        new RBM.Builder().nIn(vocabSize).nOut(5)
                                .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                .layer(1,
                        new RBM.Builder().nIn(5).nOut(10).lossFunction(LossFunctions.LossFunction.RMSE_XENT)
                                .build())
                //.pretrain(true)
                //.backprop(true)

                //.layer(2, new RBM.Builder().nIn(500).nOut(250).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                //.layer(3, new RBM.Builder().nIn(250).nOut(100).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                //.layer(4, new RBM.Builder().nIn(100).nOut(30).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) 

                /*
                 //encoding stops
                 .layer(5, new RBM.Builder().nIn(30).nOut(100).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())    
                        
                 //decoding starts
                 .layer(6, new RBM.Builder().nIn(100).nOut(250).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                 .layer(7, new RBM.Builder().nIn(250).nOut(500).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                 .layer(8, new RBM.Builder().nIn(500).nOut(1000).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                 .layer(9, new OutputLayer.Builder(LossFunctions.LossFunction.RMSE_XENT).nIn(1000).nOut(vocabSize).build())
                 .pretrain(true).backprop(true)
                 */
                .build();

        MultiLayerNetwork model = new MultiLayerNetwork(conf);
        model.init();

        model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq)));
        model.fit(iter);

        System.out.println("Output layer: ");
        iter.reset();
        while (iter.hasNext()) {
            DataSet v = iter.next();

            // System.out.println(model.output(v.getFeatures()));
        }
        //++Procheta
        iter.reset();
        dva.saveModel(iter, prop.getProperty("output"), model);//*/
    } catch (Exception ex) {
        ex.printStackTrace();
    }

}