1   /*
2    *  LuceneSearch.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Rosen Marinov, 19/Apr/2002
12   *
13   */
14  
15  package gate.creole.ir.lucene;
16  
17  import java.util.List;
18  import java.util.Vector;
19  
20  import org.apache.lucene.analysis.SimpleAnalyzer;
21  import org.apache.lucene.queryParser.QueryParser;
22  import org.apache.lucene.search.*;
23  
24  import gate.creole.ir.*;
25  
26  /** This class represents Lucene implementation of serching in index. */
27  public class LuceneSearch implements Search {
28  
29    /** An instance of indexed corpus*/
30    private IndexedCorpus indexedCorpus;
31  
32    /** Set the indexed corpus resource for searching. */
33    public void setCorpus(IndexedCorpus ic){
34      this.indexedCorpus = ic;
35    }
36  
37    /** Search in corpus with this query. Unlimited result length.*/
38    public QueryResultList search(String query)
39                                           throws IndexException, SearchException{
40      return search(query, -1);
41    }
42  
43    /** Search in corpus with this query.
44     *  Size of the result list is limited. */
45    public QueryResultList search(String query, int limit)
46                                           throws IndexException, SearchException{
47      return search(query, limit, null);
48    }
49  
50    /** Search in corpus with this query.
51     *  In each QueryResult will be added values of theise fields.
52     *  Result length is unlimited. */
53    public QueryResultList search(String query, List fieldNames)
54                                           throws IndexException, SearchException{
55      return search(query, -1, fieldNames);
56    }
57  
58    /** Search in corpus with this query.
59     *  In each QueryResult will be added values of theise fields.
60     *  Result length is limited. */
61    public QueryResultList search(String query, int limit, List fieldNames)
62                                           throws IndexException, SearchException{
63      Vector result = new Vector();
64  
65      try {
66        IndexSearcher searcher = new IndexSearcher(indexedCorpus.getIndexDefinition().getIndexLocation());
67        Query luceneQuery = QueryParser.parse(query, "body", new SimpleAnalyzer());
68  
69        Hits hits = searcher.search(luceneQuery);
70        int resultlength = hits.length();
71        if (limit>-1) {
72          resultlength = Math.min(limit,resultlength);
73        }
74  
75        Vector fieldValues = null;
76        for (int i=0; i<resultlength; i++) {
77  
78          if (fieldNames != null){
79            fieldValues = new Vector();
80            for (int j=0; j<fieldNames.size(); j++){
81              fieldValues.add(new gate.creole.ir.Term( fieldNames.get(j).toString(), hits.doc(i).get(fieldNames.get(j).toString())));
82            }
83          }
84  
85          result.add(new QueryResult(hits.doc(i).get(LuceneIndexManager.DOCUMENT_ID),hits.score(i),fieldValues));
86        }// for (all search hints)
87  
88        searcher.close();
89  
90        return new QueryResultList(query, indexedCorpus, result);
91      }
92      catch (java.io.IOException ioe) {
93        throw new IndexException(ioe.getMessage());
94      }
95      catch (org.apache.lucene.queryParser.ParseException pe) {
96        throw new SearchException(pe.getMessage());
97      }
98    }
99  }