Example usage for org.apache.lucene.document Document Document

List of usage examples for org.apache.lucene.document Document Document

Introduction

In this page you can find the example usage for org.apache.lucene.document Document Document.

Prototype

public Document() 

Source Link

Document

Constructs a new document with no fields.

Usage

From source file:com.aliasi.lingmed.medline.IndexMedline.java

License:Lingpipe license

private void recordFile(IndexWriter indexWriter, String fileName) throws IOException {
    //        if (mLogger.isDebugEnabled())
    //            mLogger.debug("record file: " + fileName);
    Document doc = new Document();
    Field tagField = new Field(Fields.MEDLINE_DIST_FIELD, Fields.MEDLINE_DIST_VALUE, Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS);
    doc.add(tagField);// www .  jav a  2s .  co  m
    Field nameField = new Field(Fields.MEDLINE_FILE_FIELD, fileName, Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS);
    doc.add(nameField);
    indexWriter.addDocument(doc);
    //        if (mLogger.isDebugEnabled())
    //            mLogger.debug("added doc: " + doc.toString());

}

From source file:com.aliasi.lingmed.medline.MedlineCodec.java

License:Lingpipe license

public Document toDocument(MedlineCitation citation) {
    Document doc = new Document();

    // index pubmed id (as keyword)
    Field idField = new Field(Fields.ID_FIELD, citation.pmid(), Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS);
    doc.add(idField);//from  w w w  . j a va2s  .  c  o  m

    // store raw XML
    Field xmlField = new Field(Fields.XML_FIELD, CompressionTools.compressString(citation.xmlString()),
            Field.Store.YES);
    doc.add(xmlField);
    return doc;
}

From source file:com.aliasi.lingmed.medline.SearchableMedlineCodec.java

License:Lingpipe license

public static void main(String[] args) throws Exception {
    org.apache.lucene.store.RAMDirectory directory = new org.apache.lucene.store.RAMDirectory();

    // org.apache.lucene.analysis.SimpleAnalyzer analyzer 
    // = new org.apache.lucene.analysis.SimpleAnalyzer();
    // org.apache.lucene.analysis.KeywordAnalyzer analyzer 
    // = new org.apache.lucene.analysis.KeywordAnalyzer();
    MedlineCodec codec = new MedlineCodec();
    Analyzer analyzer = codec.getAnalyzer();

    org.apache.lucene.index.IndexWriterConfig iwConf = new org.apache.lucene.index.IndexWriterConfig(
            org.apache.lucene.util.Version.LUCENE_36, analyzer);
    iwConf.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    org.apache.lucene.index.IndexWriter indexWriter = new org.apache.lucene.index.IndexWriter(directory,
            iwConf);/*from w w  w. jav  a2 s.c o  m*/

    Document doc = new Document();
    doc.add(new Field(Fields.MESH_MINOR_FIELD, "abc", Field.Store.NO, Field.Index.ANALYZED));
    doc.add(new Field(Fields.MESH_MINOR_FIELD, " xyz efg", Field.Store.NO, Field.Index.ANALYZED));
    indexWriter.addDocument(doc);
    indexWriter.close();

    org.apache.lucene.index.IndexReader reader = org.apache.lucene.index.IndexReader.open(directory);
    org.apache.lucene.search.IndexSearcher searcher = new org.apache.lucene.search.IndexSearcher(reader);

    org.apache.lucene.queryParser.QueryParser qp = new org.apache.lucene.queryParser.QueryParser(
            org.apache.lucene.util.Version.LUCENE_36, "foo", analyzer);
    org.apache.lucene.search.Query query = qp.parse(Fields.MESH_MINOR_FIELD + ":efg");

    org.apache.lucene.search.TopDocs hits = searcher.search(query, 1000);
    System.out.println("hits.length()=" + hits.scoreDocs.length);

    org.apache.lucene.analysis.TokenStream ts = analyzer.tokenStream(Fields.MESH_MINOR_FIELD,
            new java.io.StringReader("abc xyz efg"));
    org.apache.lucene.analysis.tokenattributes.CharTermAttribute terms = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
    org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsets = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
    org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute positions = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class);

    while (ts.incrementToken()) {
        int increment = positions.getPositionIncrement();
        int start = offsets.startOffset();
        int end = offsets.endOffset();
        String term = terms.toString();
        System.out.println("token=|" + term + "|" + " startOffset=" + start + " endOffset=" + end
                + " positionIncr=" + increment);
    }
}

From source file:com.aliasi.lingmed.omim.OmimCodec.java

License:Lingpipe license

public Document toDocument(OmimRecord rec) {
    Document doc = new Document();
    // index Omim id (as keyword)
    Field idField = new Field(Fields.ID_FIELD, String.valueOf(rec.getMimId()), Field.Store.YES,
            Field.Index.TOKENIZED);
    doc.add(idField);/*from   ww w  .  j  ava2s  .c  o m*/
    Field rawTextField = new Field(Fields.RAW_TEXT_FIELD, rec.getRawText(), Field.Store.COMPRESS,
            Field.Index.NO);
    doc.add(rawTextField);
    return doc;
}

From source file:com.andreig.jetty.WriteServlet.java

License:GNU General Public License

@Override
protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {

    log.fine("doPost()");

    if (!can_write(req)) {
        res.sendError(SC_UNAUTHORIZED);/*w ww  .j ava  2  s  .co m*/
        return;
    }

    InputStream is = req.getInputStream();
    String db_name = req.getParameter("dbname");
    String col_name = req.getParameter("colname");
    if (db_name == null || col_name == null) {
        String names[] = req2mongonames(req);
        if (names != null) {
            db_name = names[0];
            col_name = names[1];
        }
        if (db_name == null || col_name == null) {
            error(res, SC_BAD_REQUEST, Status.get("param name missing"));
            return;
        }
    }

    boolean upsert = Boolean.parseBoolean(req.getParameter("upsert"));
    boolean multi = Boolean.parseBoolean(req.getParameter("multi"));

    DB db = mongo.getDB(db_name);

    // mongo auth
    String user = req.getParameter("user");
    String passwd = req.getParameter("passwd");
    if (user != null && passwd != null && (!db.isAuthenticated())) {
        boolean auth = db.authenticate(user, passwd.toCharArray());
        if (!auth) {
            res.sendError(SC_UNAUTHORIZED);
            return;
        }
    }

    DBCollection col = db.getCollection(col_name);

    BufferedReader r = null;
    DBObject q = null, o = null;
    try {

        r = new BufferedReader(new InputStreamReader(is));
        String q_s = r.readLine();
        if (q_s == null) {
            error(res, SC_BAD_REQUEST, Status.get("no data"));
            return;
        }
        String o_s = r.readLine();
        if (o_s == null) {
            error(res, SC_BAD_REQUEST, Status.get("obj to update missing"));
            return;
        }
        try {
            q = (DBObject) JSON.parse(q_s);
            o = (DBObject) JSON.parse(o_s);
        } catch (JSONParseException e) {
            error(res, SC_BAD_REQUEST, Status.get("can not parse data"));
            return;
        }

    } finally {
        if (r != null)
            r.close();
    }
    //
    // search
    if (do_search) {

        String fn = col.getFullName();
        DBCursor c = col.find(q);
        int cnt = c.count();
        if (!multi)
            c.limit(1);
        long l = multi ? cnt : 1;
        String toupdate[] = new String[(int) l];
        int n = 0;
        boolean insert = false;

        if (upsert && !multi && cnt == 0)
            insert = true;

        while (c.hasNext()) {

            DBObject _o = c.next();
            ObjectId oid = (ObjectId) _o.get("_id");
            String id = oid.toStringMongod();
            toupdate[n++] = id;

        }
        c.close();

        List<String> flds = Config.search_index_fields.get(fn);
        boolean commit = false;
        Document doc = null;
        Search _writer = search.get_writer();
        if (flds != null && flds.size() > 0) {
            doc = new Document();
            try {
                for (String fld : flds) {
                    String val = (String) o.get(fld);
                    if (val == null)
                        continue;
                    Search.add_searchable_s(doc, fld, val);
                    commit = true;
                }
                if (commit)
                    _writer.commit(doc);
            } catch (ClassCastException e) {
                error(res, SC_BAD_REQUEST, Status.get("searchable fields must be type String"));
                return;
            } catch (CorruptIndexException e) {
                error(res, SC_BAD_REQUEST, Status.get("Search corrupt index" + e));
                return;
            }
        }
        if (commit && insert)
            log.warning("upsert with search not implemented yet");
        else
            _writer.update(toupdate, doc);

    }

    WriteResult wr = col.update(q, o, upsert, multi, write_concern);

    // return operation status
    if (do_return) {
        out_str(req, wr.toString());
        if (wr.getError() == null) {
            res.setStatus(SC_BAD_REQUEST);
            return;
        }
    }

    res.setStatus(SC_CREATED);

}

From source file:com.andreig.jetty.WriteServlet.java

License:GNU General Public License

@Override
protected void doPut(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {

    log.fine("doPut()");

    if (!can_write(req)) {
        res.sendError(SC_UNAUTHORIZED);/*ww  w.  ja  va2 s.c  o m*/
        return;
    }

    InputStream is = req.getInputStream();
    String db_name = req.getParameter("dbname");
    String col_name = req.getParameter("colname");
    if (db_name == null || col_name == null) {
        String names[] = req2mongonames(req);
        if (names != null) {
            db_name = names[0];
            col_name = names[1];
        }
        if (db_name == null || col_name == null) {
            error(res, SC_BAD_REQUEST, Status.get("param name missing"));
            return;
        }
    }
    DB db = mongo.getDB(db_name);

    // mongo auth
    String user = req.getParameter("user");
    String passwd = req.getParameter("passwd");
    if (user != null && passwd != null && (!db.isAuthenticated())) {
        boolean auth = db.authenticate(user, passwd.toCharArray());
        if (!auth) {
            res.sendError(SC_UNAUTHORIZED);
            return;
        }
    }

    DBCollection col = db.getCollection(col_name);

    BufferedReader r = null;
    ArrayList<DBObject> ar = new ArrayList<DBObject>();
    try {

        r = new BufferedReader(new InputStreamReader(is));
        String data;
        while ((data = r.readLine()) != null) {
            if (data != null) {
                DBObject o;
                try {
                    o = (DBObject) JSON.parse(data);
                    ar.add(o);
                } catch (JSONParseException e) {
                    error(res, SC_BAD_REQUEST, Status.get("can not parse data"));
                    return;
                }
            }
        }

    } finally {
        if (r != null)
            r.close();
    }

    if (ar.size() == 0) {
        error(res, SC_BAD_REQUEST, Status.get("can not parse data"));
        return;
    }

    WriteResult wr = col.insert(ar, write_concern);

    // search
    if (do_search) {
        String fn = col.getFullName();
        List<String> flds = Config.search_index_fields.get(fn);
        if (flds != null && flds.size() > 0) {
            Search _writer = search.get_writer();
            try {
                for (DBObject o : ar) {
                    boolean commit = false;
                    Document doc = new Document();
                    for (String fld : flds) {
                        String val = (String) o.get(fld);
                        if (val == null)
                            continue;
                        Search.add_searchable_s(doc, fld, val);
                        commit = true;
                    }
                    if (commit) {
                        ObjectId id = (ObjectId) o.get("_id");
                        String sid = id.toStringMongod();
                        Search.add_storable(doc, "_id", sid);
                        Search.add_searchable_s(doc, "_dbid_", fn);
                        _writer.commit(doc);
                    }
                }
            } catch (ClassCastException e) {
                error(res, SC_BAD_REQUEST, Status.get("searchable fields must be type String"));
                return;
            } catch (CorruptIndexException e) {
                error(res, SC_BAD_REQUEST, Status.get("Search corrupt index" + e));
                return;
            }
        }
    }

    // return operation status
    if (do_return) {
        out_str(req, wr.toString());
        if (wr.getError() == null) {
            res.setStatus(SC_BAD_REQUEST);
            return;
        }
    }

    res.setStatus(SC_CREATED);

}

From source file:com.aperigeek.dropvault.web.service.IndexService.java

License:Open Source License

public void index(String username, String password, String id, Map<String, String> metadata)
        throws IndexException {
    try {//  w ww . j  av a2  s . c  o  m
        Document document = new Document();
        document.add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
        for (Map.Entry<String, String> e : metadata.entrySet()) {
            if (e.getValue() != null) {
                document.add(new Field(e.getKey(), e.getValue(), Field.Store.NO, Field.Index.ANALYZED));
            }
        }

        IndexWriter index = getIndexWriter(username, password);
        index.addDocument(document);
        index.close();
    } catch (IOException ex) {
        throw new IndexException(ex);
    }
}

From source file:com.appeligo.amazon.AmazonIndexer.java

License:Apache License

public void addAmazonItem(AmazonItem item, String programId) {
    Document doc = new Document();
    doc.add(new Field("asin", item.getId(), Store.YES, Index.UN_TOKENIZED));
    if (item.getTitle() != null) {
        doc.add(new Field("title", item.getTitle(), Store.YES, Index.TOKENIZED));
    }//from  www  . j  a  va  2  s . c o  m
    if (item.getDetailsUrl() != null) {
        doc.add(new Field("detailsUrl", item.getDetailsUrl(), Store.YES, Index.NO));
    }
    if (item.getSmallImageUrl() != null) {
        doc.add(new Field("smallImageUrl", item.getSmallImageUrl(), Store.YES, Index.NO));
        doc.add(new Field("smallImageWidth", Integer.toString(item.getSmallImageWidth()), Store.YES, Index.NO));
        doc.add(new Field("smallImageHeight", Integer.toString(item.getSmallImageHeight()), Store.YES,
                Index.NO));
    }
    doc.add(new Field("programId", programId, Store.YES, Index.UN_TOKENIZED));
    doc.add(new Field("storeTime", DateTools.dateToString(new Date(), Resolution.MINUTE), Store.YES,
            Index.UN_TOKENIZED));

    queue.addDocument(doc);
}

From source file:com.appeligo.amazon.ProgramIndexer.java

License:Apache License

/**
 * Adds a marker program so it won't query for this programId until the time expires.
 * @param programId the programId to add a marker for
 * @throws IOException/*from  ww  w. j  a  v  a  2s. c o m*/
 */
private void addMarkerProgram(String programId) throws IOException {
    Document doc = new Document();
    doc.add(new Field("type", "marker", Store.NO, Index.UN_TOKENIZED));
    doc.add(new Field("programId", programId, Store.YES, Index.UN_TOKENIZED));
    doc.add(new Field("storeTime", DateTools.dateToString(new Date(), Resolution.DAY), Store.YES,
            Index.UN_TOKENIZED));
    writer.addDocument(doc);
}

From source file:com.appeligo.amazon.ProgramIndexer.java

License:Apache License

protected Document createProductDocument(AmazonItem item, String programId) {
    Document doc = new Document();
    doc.add(new Field("type", "product", Store.NO, Index.UN_TOKENIZED));
    doc.add(new Field("asin", item.getId(), Store.YES, Index.UN_TOKENIZED));
    if (item.getTitle() != null) {
        doc.add(new Field("title", item.getTitle(), Store.YES, Index.TOKENIZED));
    }/*from w w  w  .j a v  a2s  . c o  m*/
    if (item.getDetailsUrl() != null) {
        doc.add(new Field("detailsUrl", item.getDetailsUrl(), Store.YES, Index.NO));
    }
    if (item.getSmallImageUrl() != null) {
        doc.add(new Field("smallImageUrl", item.getSmallImageUrl(), Store.YES, Index.NO));
        doc.add(new Field("smallImageWidth", Integer.toString(item.getSmallImageWidth()), Store.YES, Index.NO));
        doc.add(new Field("smallImageHeight", Integer.toString(item.getSmallImageHeight()), Store.YES,
                Index.NO));
    }
    doc.add(new Field("programId", programId, Store.YES, Index.UN_TOKENIZED));
    doc.add(new Field("storeTime", DateTools.dateToString(new Date(), Resolution.DAY), Store.YES,
            Index.UN_TOKENIZED));

    return doc;
}