Example usage for org.apache.lucene.document Document Document

Introduction

In this page you can find the example usage for org.apache.lucene.document Document Document.

Prototype

public Document()

Source Link

Document

Constructs a new document with no fields.

Usage

From source file:com.aliasi.lingmed.medline.IndexMedline.java

License:Lingpipe license

private void recordFile(IndexWriter indexWriter, String fileName) throws IOException {
    //        if (mLogger.isDebugEnabled())
    //            mLogger.debug("record file: " + fileName);
    Document doc = new Document();
    Field tagField = new Field(Fields.MEDLINE_DIST_FIELD, Fields.MEDLINE_DIST_VALUE, Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS);
    doc.add(tagField);// www .  jav a  2s .  co  m
    Field nameField = new Field(Fields.MEDLINE_FILE_FIELD, fileName, Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS);
    doc.add(nameField);
    indexWriter.addDocument(doc);
    //        if (mLogger.isDebugEnabled())
    //            mLogger.debug("added doc: " + doc.toString());

}

From source file:com.aliasi.lingmed.medline.MedlineCodec.java

License:Lingpipe license

public Document toDocument(MedlineCitation citation) {
    Document doc = new Document();

    // index pubmed id (as keyword)
    Field idField = new Field(Fields.ID_FIELD, citation.pmid(), Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS);
    doc.add(idField);//from  w w w  . j a va2s  .  c  o  m

    // store raw XML
    Field xmlField = new Field(Fields.XML_FIELD, CompressionTools.compressString(citation.xmlString()),
            Field.Store.YES);
    doc.add(xmlField);
    return doc;
}

From source file:com.aliasi.lingmed.medline.SearchableMedlineCodec.java

License:Lingpipe license

public static void main(String[] args) throws Exception {
    org.apache.lucene.store.RAMDirectory directory = new org.apache.lucene.store.RAMDirectory();

    // org.apache.lucene.analysis.SimpleAnalyzer analyzer 
    // = new org.apache.lucene.analysis.SimpleAnalyzer();
    // org.apache.lucene.analysis.KeywordAnalyzer analyzer 
    // = new org.apache.lucene.analysis.KeywordAnalyzer();
    MedlineCodec codec = new MedlineCodec();
    Analyzer analyzer = codec.getAnalyzer();

    org.apache.lucene.index.IndexWriterConfig iwConf = new org.apache.lucene.index.IndexWriterConfig(
            org.apache.lucene.util.Version.LUCENE_36, analyzer);
    iwConf.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    org.apache.lucene.index.IndexWriter indexWriter = new org.apache.lucene.index.IndexWriter(directory,
            iwConf);/*from w w  w. jav  a2 s.c o  m*/

    Document doc = new Document();
    doc.add(new Field(Fields.MESH_MINOR_FIELD, "abc", Field.Store.NO, Field.Index.ANALYZED));
    doc.add(new Field(Fields.MESH_MINOR_FIELD, " xyz efg", Field.Store.NO, Field.Index.ANALYZED));
    indexWriter.addDocument(doc);
    indexWriter.close();

    org.apache.lucene.index.IndexReader reader = org.apache.lucene.index.IndexReader.open(directory);
    org.apache.lucene.search.IndexSearcher searcher = new org.apache.lucene.search.IndexSearcher(reader);

    org.apache.lucene.queryParser.QueryParser qp = new org.apache.lucene.queryParser.QueryParser(
            org.apache.lucene.util.Version.LUCENE_36, "foo", analyzer);
    org.apache.lucene.search.Query query = qp.parse(Fields.MESH_MINOR_FIELD + ":efg");

    org.apache.lucene.search.TopDocs hits = searcher.search(query, 1000);
    System.out.println("hits.length()=" + hits.scoreDocs.length);

    org.apache.lucene.analysis.TokenStream ts = analyzer.tokenStream(Fields.MESH_MINOR_FIELD,
            new java.io.StringReader("abc xyz efg"));
    org.apache.lucene.analysis.tokenattributes.CharTermAttribute terms = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
    org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsets = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
    org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute positions = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class);

    while (ts.incrementToken()) {
        int increment = positions.getPositionIncrement();
        int start = offsets.startOffset();
        int end = offsets.endOffset();
        String term = terms.toString();
        System.out.println("token=|" + term + "|" + " startOffset=" + start + " endOffset=" + end
                + " positionIncr=" + increment);
    }
}

From source file:com.aliasi.lingmed.omim.OmimCodec.java

License:Lingpipe license

public Document toDocument(OmimRecord rec) {
    Document doc = new Document();
    // index Omim id (as keyword)
    Field idField = new Field(Fields.ID_FIELD, String.valueOf(rec.getMimId()), Field.Store.YES,
            Field.Index.TOKENIZED);
    doc.add(idField);/*from   ww w  .  j  ava2s  .c  o m*/
    Field rawTextField = new Field(Fields.RAW_TEXT_FIELD, rec.getRawText(), Field.Store.COMPRESS,
            Field.Index.NO);
    doc.add(rawTextField);
    return doc;
}

From source file:com.andreig.jetty.WriteServlet.java

License:GNU General Public License

@Override
protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {

    log.fine("doPost()");

    if (!can_write(req)) {
        res.sendError(SC_UNAUTHORIZED);/*w ww  .j ava  2  s  .co m*/
        return;
    }

    InputStream is = req.getInputStream();
    String db_name = req.getParameter("dbname");
    String col_name = req.getParameter("colname");
    if (db_name == null || col_name == null) {
        String names[] = req2mongonames(req);
        if (names != null) {
            db_name = names[0];
            col_name = names[1];
        }
        if (db_name == null || col_name == null) {
            error(res, SC_BAD_REQUEST, Status.get("param name missing"));
            return;
        }
    }

    boolean upsert = Boolean.parseBoolean(req.getParameter("upsert"));
    boolean multi = Boolean.parseBoolean(req.getParameter("multi"));

    DB db = mongo.getDB(db_name);

    // mongo auth
    String user = req.getParameter("user");
    String passwd = req.getParameter("passwd");
    if (user != null && passwd != null && (!db.isAuthenticated())) {
        boolean auth = db.authenticate(user, passwd.toCharArray());
        if (!auth) {
            res.sendError(SC_UNAUTHORIZED);
            return;
        }
    }

    DBCollection col = db.getCollection(col_name);

    BufferedReader r = null;
    DBObject q = null, o = null;
    try {

        r = new BufferedReader(new InputStreamReader(is));
        String q_s = r.readLine();
        if (q_s == null) {
            error(res, SC_BAD_REQUEST, Status.get("no data"));
            return;
        }
        String o_s = r.readLine();
        if (o_s == null) {
            error(res, SC_BAD_REQUEST, Status.get("obj to update missing"));
            return;
        }
        try {
            q = (DBObject) JSON.parse(q_s);
            o = (DBObject) JSON.parse(o_s);
        } catch (JSONParseException e) {
            error(res, SC_BAD_REQUEST, Status.get("can not parse data"));
            return;
        }

    } finally {
        if (r != null)
            r.close();
    }
    //
    // search
    if (do_search) {

        String fn = col.getFullName();
        DBCursor c = col.find(q);
        int cnt = c.count();
        if (!multi)
            c.limit(1);
        long l = multi ? cnt : 1;
        String toupdate[] = new String[(int) l];
        int n = 0;
        boolean insert = false;

        if (upsert && !multi && cnt == 0)
            insert = true;

        while (c.hasNext()) {

            DBObject _o = c.next();
            ObjectId oid = (ObjectId) _o.get("_id");
            String id = oid.toStringMongod();
            toupdate[n++] = id;

        }
        c.close();

        List<String> flds = Config.search_index_fields.get(fn);
        boolean commit = false;
        Document doc = null;
        Search _writer = search.get_writer();
        if (flds != null && flds.size() > 0) {
            doc = new Document();
            try {
                for (String fld : flds) {
                    String val = (String) o.get(fld);
                    if (val == null)
                        continue;
                    Search.add_searchable_s(doc, fld, val);
                    commit = true;
                }
                if (commit)
                    _writer.commit(doc);
            } catch (ClassCastException e) {
                error(res, SC_BAD_REQUEST, Status.get("searchable fields must be type String"));
                return;
            } catch (CorruptIndexException e) {
                error(res, SC_BAD_REQUEST, Status.get("Search corrupt index" + e));
                return;
            }
        }
        if (commit && insert)
            log.warning("upsert with search not implemented yet");
        else
            _writer.update(toupdate, doc);

    }

    WriteResult wr = col.update(q, o, upsert, multi, write_concern);

    // return operation status
    if (do_return) {
        out_str(req, wr.toString());
        if (wr.getError() == null) {
            res.setStatus(SC_BAD_REQUEST);
            return;
        }
    }

    res.setStatus(SC_CREATED);

}

From source file:com.andreig.jetty.WriteServlet.java

License:GNU General Public License

@Override
protected void doPut(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {

    log.fine("doPut()");

    if (!can_write(req)) {
        res.sendError(SC_UNAUTHORIZED);/*ww  w.  ja  va2 s.c  o m*/
        return;
    }

    InputStream is = req.getInputStream();
    String db_name = req.getParameter("dbname");
    String col_name = req.getParameter("colname");
    if (db_name == null || col_name == null) {
        String names[] = req2mongonames(req);
        if (names != null) {
            db_name = names[0];
            col_name = names[1];
        }
        if (db_name == null || col_name == null) {
            error(res, SC_BAD_REQUEST, Status.get("param name missing"));
            return;
        }
    }
    DB db = mongo.getDB(db_name);

    // mongo auth
    String user = req.getParameter("user");
    String passwd = req.getParameter("passwd");
    if (user != null && passwd != null && (!db.isAuthenticated())) {
        boolean auth = db.authenticate(user, passwd.toCharArray());
        if (!auth) {
            res.sendError(SC_UNAUTHORIZED);
            return;
        }
    }

    DBCollection col = db.getCollection(col_name);

    BufferedReader r = null;
    ArrayList<DBObject> ar = new ArrayList<DBObject>();
    try {

        r = new BufferedReader(new InputStreamReader(is));
        String data;
        while ((data = r.readLine()) != null) {
            if (data != null) {
                DBObject o;
                try {
                    o = (DBObject) JSON.parse(data);
                    ar.add(o);
                } catch (JSONParseException e) {
                    error(res, SC_BAD_REQUEST, Status.get("can not parse data"));
                    return;
                }
            }
        }

    } finally {
        if (r != null)
            r.close();
    }

    if (ar.size() == 0) {
        error(res, SC_BAD_REQUEST, Status.get("can not parse data"));
        return;
    }

    WriteResult wr = col.insert(ar, write_concern);

    // search
    if (do_search) {
        String fn = col.getFullName();
        List<String> flds = Config.search_index_fields.get(fn);
        if (flds != null && flds.size() > 0) {
            Search _writer = search.get_writer();
            try {
                for (DBObject o : ar) {
                    boolean commit = false;
                    Document doc = new Document();
                    for (String fld : flds) {
                        String val = (String) o.get(fld);
                        if (val == null)
                            continue;
                        Search.add_searchable_s(doc, fld, val);
                        commit = true;
                    }
                    if (commit) {
                        ObjectId id = (ObjectId) o.get("_id");
                        String sid = id.toStringMongod();
                        Search.add_storable(doc, "_id", sid);
                        Search.add_searchable_s(doc, "_dbid_", fn);
                        _writer.commit(doc);
                    }
                }
            } catch (ClassCastException e) {
                error(res, SC_BAD_REQUEST, Status.get("searchable fields must be type String"));
                return;
            } catch (CorruptIndexException e) {
                error(res, SC_BAD_REQUEST, Status.get("Search corrupt index" + e));
                return;
            }
        }
    }

    // return operation status
    if (do_return) {
        out_str(req, wr.toString());
        if (wr.getError() == null) {
            res.setStatus(SC_BAD_REQUEST);
            return;
        }
    }

    res.setStatus(SC_CREATED);

}

From source file:com.aperigeek.dropvault.web.service.IndexService.java

License:Open Source License

public void index(String username, String password, String id, Map<String, String> metadata)
        throws IndexException {
    try {//  w ww . j  av a2  s . c  o  m
        Document document = new Document();
        document.add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
        for (Map.Entry<String, String> e : metadata.entrySet()) {
            if (e.getValue() != null) {
                document.add(new Field(e.getKey(), e.getValue(), Field.Store.NO, Field.Index.ANALYZED));
            }
        }

        IndexWriter index = getIndexWriter(username, password);
        index.addDocument(document);
        index.close();
    } catch (IOException ex) {
        throw new IndexException(ex);
    }
}

From source file:com.appeligo.amazon.AmazonIndexer.java

License:Apache License

public void addAmazonItem(AmazonItem item, String programId) {
    Document doc = new Document();
    doc.add(new Field("asin", item.getId(), Store.YES, Index.UN_TOKENIZED));
    if (item.getTitle() != null) {
        doc.add(new Field("title", item.getTitle(), Store.YES, Index.TOKENIZED));
    }//from  www  . j  a  va  2  s . c o  m
    if (item.getDetailsUrl() != null) {
        doc.add(new Field("detailsUrl", item.getDetailsUrl(), Store.YES, Index.NO));
    }
    if (item.getSmallImageUrl() != null) {
        doc.add(new Field("smallImageUrl", item.getSmallImageUrl(), Store.YES, Index.NO));
        doc.add(new Field("smallImageWidth", Integer.toString(item.getSmallImageWidth()), Store.YES, Index.NO));
        doc.add(new Field("smallImageHeight", Integer.toString(item.getSmallImageHeight()), Store.YES,
                Index.NO));
    }
    doc.add(new Field("programId", programId, Store.YES, Index.UN_TOKENIZED));
    doc.add(new Field("storeTime", DateTools.dateToString(new Date(), Resolution.MINUTE), Store.YES,
            Index.UN_TOKENIZED));

    queue.addDocument(doc);
}

From source file:com.appeligo.amazon.ProgramIndexer.java

License:Apache License

/**
 * Adds a marker program so it won't query for this programId until the time expires.
 * @param programId the programId to add a marker for
 * @throws IOException/*from  ww  w. j  a  v  a  2s. c o m*/
 */
private void addMarkerProgram(String programId) throws IOException {
    Document doc = new Document();
    doc.add(new Field("type", "marker", Store.NO, Index.UN_TOKENIZED));
    doc.add(new Field("programId", programId, Store.YES, Index.UN_TOKENIZED));
    doc.add(new Field("storeTime", DateTools.dateToString(new Date(), Resolution.DAY), Store.YES,
            Index.UN_TOKENIZED));
    writer.addDocument(doc);
}

From source file:com.appeligo.amazon.ProgramIndexer.java

License:Apache License

protected Document createProductDocument(AmazonItem item, String programId) {
    Document doc = new Document();
    doc.add(new Field("type", "product", Store.NO, Index.UN_TOKENIZED));
    doc.add(new Field("asin", item.getId(), Store.YES, Index.UN_TOKENIZED));
    if (item.getTitle() != null) {
        doc.add(new Field("title", item.getTitle(), Store.YES, Index.TOKENIZED));
    }/*from w w  w  .j a v  a2s  . c o  m*/
    if (item.getDetailsUrl() != null) {
        doc.add(new Field("detailsUrl", item.getDetailsUrl(), Store.YES, Index.NO));
    }
    if (item.getSmallImageUrl() != null) {
        doc.add(new Field("smallImageUrl", item.getSmallImageUrl(), Store.YES, Index.NO));
        doc.add(new Field("smallImageWidth", Integer.toString(item.getSmallImageWidth()), Store.YES, Index.NO));
        doc.add(new Field("smallImageHeight", Integer.toString(item.getSmallImageHeight()), Store.YES,
                Index.NO));
    }
    doc.add(new Field("programId", programId, Store.YES, Index.UN_TOKENIZED));
    doc.add(new Field("storeTime", DateTools.dateToString(new Date(), Resolution.DAY), Store.YES,
            Index.UN_TOKENIZED));

    return doc;
}