Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:com.browseengine.bobo.test.TestPathMultiVal.java

License:Apache License

@Override
protected void setUp() throws Exception {
    directory = new RAMDirectory();
    analyzer = new WhitespaceAnalyzer();
    IndexWriter writer = new IndexWriter(directory, analyzer, true, MaxFieldLength.UNLIMITED);
    Document doc = new Document();
    addMetaDataField(doc, PathHandlerName, new String[] { "/a/b/c", "/a/b/d" });
    writer.addDocument(doc);
    writer.commit();/*  ww w  .  ja  v  a  2 s.  c  o m*/

    PathFacetHandler pathHandler = new PathFacetHandler("path", true);
    facetHandlers.add(pathHandler);
}

From source file:com.bsiag.smartfield.server.services.custom.lucene.indexwriter.IndexWriterService.java

License:Open Source License

public void createZipIndex(RemoteFile file) {
    try {//  w  ww  .  j  a  v a  2  s.  c  o  m
        Set<String> zipCache = new HashSet<String>();

        String path = BundleContextUtility
                .resolve("${workspace_loc}/com.bsiag.smartfield.server/resources/index");
        IOUtility.deleteDirectory(path);
        Directory index = new SimpleFSDirectory(new File(path));
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34,
                new StandardAnalyzer(Version.LUCENE_34));

        IndexWriter w;
        w = new IndexWriter(index, config);

        file.setCharsetName("UTF-8");
        BufferedReader reader = new BufferedReader(file.getDecompressedReader());
        String line;
        while ((line = reader.readLine()) != null) {
            String[] tokens = line.split("\t");
            if (tokens.length > 1) {
                String zip = tokens[0].trim();
                String city = tokens[1].trim();
                if (StringUtility.hasText(city) && StringUtility.hasText(zip)) {
                    w.addDocument(createDocoment(zip, city));
                    zipCache.add(zip + city);
                }
            }
        }

        w.close();

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.bugull.mongo.lucene.backend.IndexInsertJob.java

License:Apache License

@Override
public void doJob() {
    Class<?> clazz = obj.getClass();
    String name = MapperUtil.getEntityName(clazz);
    IndexWriterHolder holder = IndexWriterHolder.getInstance();
    IndexWriter writer = holder.get(name);
    Document doc = new Document();
    IndexCreator creator = new IndexCreator(obj, "");
    creator.create(doc);/*from   w  ww  .  jav a 2  s  . c om*/
    try {
        writer.addDocument(doc);
    } catch (CorruptIndexException ex) {
        logger.error("IndexWriter can not add a document to the lucene index", ex);
    } catch (IOException ex) {
        logger.error("IndexWriter can not add a document to the lucene index", ex);
    }
}

From source file:com.bugull.mongo.lucene.backend.IndexInsertTask.java

License:Apache License

@Override
public void run() {
    Class<?> clazz = obj.getClass();
    String name = MapperUtil.getEntityName(clazz);
    IndexWriterCache cache = IndexWriterCache.getInstance();
    IndexWriter writer = cache.get(name);
    Document doc = new Document();
    IndexCreator creator = new IndexCreator(obj, "");
    creator.create(doc);/*from www  .  jav a  2  s  .c om*/
    try {
        writer.addDocument(doc);
    } catch (CorruptIndexException ex) {
        logger.error("IndexWriter can not add a document to the lucene index", ex);
    } catch (IOException ex) {
        logger.error("IndexWriter can not add a document to the lucene index", ex);
    }
}

From source file:com.burkeware.search.api.internal.lucene.DefaultIndexer.java

License:Open Source License

/**
 * Write json representation of a single object as a single document entry inside Lucene index.
 *
 * @param jsonObject the json object to be written to the index
 * @param resource   the configuration to transform json to lucene document
 * @param writer     the lucene index writer
 * @throws java.io.IOException when writing document failed
 *///from   ww  w .  j  av  a2  s.c  o m
private void writeObject(final Object jsonObject, final Resource resource, final IndexWriter writer)
        throws IOException {

    Document document = new Document();
    document.add(new Field(DEFAULT_FIELD_JSON, jsonObject.toString(), Field.Store.YES, Field.Index.NO));
    document.add(new Field(DEFAULT_FIELD_UUID, UUID.randomUUID().toString(), Field.Store.YES,
            Field.Index.ANALYZED_NO_NORMS));
    document.add(new Field(DEFAULT_FIELD_CLASS, resource.getResourceObject().getName(), Field.Store.YES,
            Field.Index.ANALYZED_NO_NORMS));
    document.add(new Field(DEFAULT_FIELD_RESOURCE, resource.getName(), Field.Store.YES,
            Field.Index.ANALYZED_NO_NORMS));

    for (SearchableField searchableField : resource.getSearchableFields()) {
        Object value = JsonPath.read(jsonObject, searchableField.getExpression());
        document.add(new Field(searchableField.getName(), String.valueOf(value), Field.Store.YES,
                Field.Index.ANALYZED_NO_NORMS));
    }

    if (getLogger().isDebugEnabled())
        getLogger().debug(this.getClass().getSimpleName(), "Writing document: " + document);

    writer.addDocument(document);
}

From source file:com.cep.darkstar.onramp.djnews.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //from   w ww  .jav  a  2s.c om
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setOmitTermFreqAndPositions(true);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.chenyi.langeasy.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        count++;/*from   ww  w.  j  a  v a  2 s  . c  o  m*/
        if (count % 500 == 499) {
            System.out.println(count + "/" + new Date());
        }
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path". Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter). This indexes to milli-second resolution,
        // which
        // is often too fine. You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you
        // require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents". Specify
        // a Reader,
        // so that the text of the file is tokenized and indexed, but not
        // stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will
        // fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can
            // be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been
            // indexed) so
            // we use updateDocument instead to replace the old one matching
            // the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.chimpler.example.FacetLuceneIndexer.java

License:Apache License

public static void main(String args[]) throws Exception {
    //      if (args.length != 3) {
    //         System.err.println("Parameters: [index directory] [taxonomy directory] [json file]");
    //         System.exit(1);
    //      }// w ww .  j  a  v a  2 s.c  om

    String indexDirectory = "index";
    String taxonomyDirectory = "taxonomy";
    String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json";

    IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION,
            new WhitespaceAnalyzer(LUCENE_VERSION));
    writerConfig.setOpenMode(OpenMode.APPEND);
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig);

    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)),
            OpenMode.APPEND);

    TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory)));

    String content = IOUtils.toString(new FileInputStream(jsonFileName));
    JSONArray bookArray = new JSONArray(content);

    Field idField = new IntField("id", 0, Store.YES);
    Field titleField = new TextField("title", "", Store.YES);
    Field authorsField = new TextField("authors", "", Store.YES);
    Field bookCategoryField = new TextField("book_category", "", Store.YES);

    indexWriter.deleteAll();

    FacetFields facetFields = new FacetFields(taxonomyWriter);

    for (int i = 0; i < bookArray.length(); i++) {
        Document document = new Document();

        JSONObject book = bookArray.getJSONObject(i);
        int id = book.getInt("id");
        String title = book.getString("title");
        String bookCategory = book.getString("book_category");

        List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>();

        String authorsString = "";
        JSONArray authors = book.getJSONArray("authors");
        for (int j = 0; j < authors.length(); j++) {
            String author = authors.getString(j);
            if (j > 0) {
                authorsString += ", ";
            }
            categoryPaths.add(new CategoryPath("author", author));
            authorsString += author;
        }
        categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/'));

        idField.setIntValue(id);
        titleField.setStringValue(title);
        authorsField.setStringValue(authorsString);
        bookCategoryField.setStringValue(bookCategory);

        facetFields.addFields(document, categoryPaths);

        document.add(idField);
        document.add(titleField);
        document.add(authorsField);
        document.add(bookCategoryField);

        indexWriter.addDocument(document);

        System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory,
                authors);
    }

    taxonomyWriter.prepareCommit();
    try {
        taxonomyWriter.commit();
    } catch (Exception e) {
        taxonomyWriter.rollback();
    }

    //      taxonomyWriter.close();
    //      
    //      indexWriter.commit();
    //      indexWriter.close();

    String query = "story";

    IndexReader indexReader = DirectoryReader.open(indexWriter, false);
    IndexReader indexReader2 = DirectoryReader.open(indexWriter, false);
    System.out.println(indexReader == indexReader2);

    IndexSearcher indexSearcher = new IndexSearcher(indexReader);

    TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader);
    if (newTaxonomyReader != null) {
        TaxonomyReader tmp = taxonomyReader;
        taxonomyReader = newTaxonomyReader;
        tmp.close();
    } else {
        System.out.println("null");
    }

    ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
    facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100));
    facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100));

    FacetSearchParams searchParams = new FacetSearchParams(facetRequests);

    ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title",
            new StandardAnalyzer(LUCENE_VERSION));
    Query luceneQuery = queryParser.parse(query);

    // Collectors to get top results and facets
    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true);
    FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader);
    indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector));
    System.out.println("Found:");

    for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) {
        Document document = indexReader.document(scoreDoc.doc);
        System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n",
                document.get("id"), document.get("title"), document.get("book_category"),
                document.get("authors"), scoreDoc.score);
    }

    System.out.println("Facets:");
    for (FacetResult facetResult : facetsCollector.getFacetResults()) {
        System.out.println("- " + facetResult.getFacetResultNode().label);
        for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) {
            System.out.printf("    - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value);
            for (FacetResultNode subFacetResultNode : facetResultNode.subResults) {
                System.out.printf("        - %s (%f)\n", subFacetResultNode.label.toString(),
                        subFacetResultNode.value);
            }
        }
    }
    taxonomyReader.close();
    indexReader.close();

    taxonomyWriter.commit();
    taxonomyWriter.close();

    indexWriter.commit();
    indexWriter.close();

}

From source file:com.codecrate.shard.search.ObjectIndexer.java

License:Apache License

public void save(Serializable id, Object entity) {
    removeDocuments(id);/*  w  ww. jav a  2  s.co  m*/

    IndexWriter writer = null;
    try {
        writer = new IndexWriter(directory, analyzer, DO_NOT_CREATE_INDEX);
        Document document = new Document();
        document.add(Field.Keyword(HibernateObjectSearcher.FIELD_CLASS, entity.getClass().getName()));
        document.add(Field.Keyword(HibernateObjectSearcher.FIELD_ID, id.toString()));
        document.add(Field.Text(HibernateObjectSearcher.FIELD_TEXT, entity.toString()));

        LOG.debug("saving " + document);
        writer.addDocument(document);
    } catch (IOException e) {
        LOG.error("Error updating index for object " + entity, e);
    } finally {
        closeWriter(writer);
    }
}

From source file:com.codenvy.test.lucene.DeleteFilesWithSameName.java

License:Open Source License

private static void indexDocs(IndexWriter writer, Path file) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        Document doc = new Document();

        System.out.println("file path " + file.toAbsolutePath().toString());
        Field pathField = new StringField(PATH, file.toAbsolutePath().toString(), Field.Store.YES);
        doc.add(pathField);/*  w  w  w .  jav a  2 s. c  om*/

        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {

            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            System.out.println("updating " + file);
            writer.updateDocument(new Term(PATH, file.toString()), doc);
        }
    }
}