1   /*
2    *  LuceneIndexManager.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Rosen Marinov, 19/Apr/2002
12   *
13   */
14  
15  package gate.creole.ir.lucene;
16  
17  import java.io.File;
18  import java.util.Iterator;
19  import java.util.List;
20  
21  import org.apache.lucene.analysis.SimpleAnalyzer;
22  import org.apache.lucene.document.Field;
23  import org.apache.lucene.index.IndexReader;
24  import org.apache.lucene.index.IndexWriter;
25  
26  import gate.Corpus;
27  import gate.creole.ir.*;
28  import gate.util.GateRuntimeException;
29  
30  /** This class represents Lucene implementation of IndexManeager interface.*/
31  public class LuceneIndexManager implements IndexManager{
32  
33    /** used in Lucene Documents as a key for gate document ID value. */
34    public final static String DOCUMENT_ID = "DOCUMENT_ID";
35  
36    /** IndexDefinition - location, type, fields, etc.*/
37    private IndexDefinition indexDefinition;
38  
39    /** An corpus for indexing*/
40    private Corpus corpus;
41  
42    /* Niraj */
43    /** constant that ensures that corpus is indexed with IR plugin */
44    public final static String CORPUS_INDEX_FEATURE = "CorpusIndexFeature";
45    public final static String CORPUS_INDEX_FEATURE_VALUE = "IR";
46    /* End */
47  
48    /** Constructor of the class. */
49    public LuceneIndexManager(){
50    }
51  
52    /** Creates index directory and indexing all
53     *  documents in the corpus. */
54    public void createIndex() throws IndexException{
55      if(indexDefinition == null)
56        throw new GateRuntimeException("Index definition is null!");
57      if(corpus == null)
58        throw new GateRuntimeException("Corpus is null!");
59  
60      String location = indexDefinition.getIndexLocation();
61      try {
62        File file = new File(location);
63        if (file.exists()){
64          if (file.isDirectory() && file.listFiles().length>0) {
65            throw new IndexException(location+ " is not empty directory");
66          }
67          if (!file.isDirectory()){
68            throw new IndexException("Only empty directory can be index path");
69          }
70        }
71  
72        /* Niraj */
73        // ok so lets put the corpus index feature
74        corpus.getFeatures().put(CORPUS_INDEX_FEATURE, CORPUS_INDEX_FEATURE_VALUE);
75        /* End */
76  
77        IndexWriter writer = new IndexWriter(location,
78                                             new SimpleAnalyzer(), true);
79  
80        for(int i = 0; i<corpus.size(); i++) {
81          boolean isLoaded = corpus.isDocumentLoaded(i);
82          gate.Document gateDoc = (gate.Document) corpus.get(i);
83          writer.addDocument(getLuceneDoc(gateDoc));
84          if (!isLoaded) {
85            corpus.unloadDocument(gateDoc);
86            gate.Factory.deleteResource(gateDoc);
87          }
88        }//for (all documents)
89  
90        writer.close();
91        corpus.sync();
92      } catch (java.io.IOException ioe){
93        throw new IndexException(ioe.getMessage());
94      } catch (gate.persist.PersistenceException pe){
95        pe.printStackTrace();
96      } catch (gate.security.SecurityException se){
97        se.printStackTrace();
98      }
99    }
100 
101   /** Optimize existing index. */
102   public void optimizeIndex() throws IndexException{
103     if(indexDefinition == null)
104       throw new GateRuntimeException("Index definition is null!");
105     try {
106       IndexWriter writer = new IndexWriter(indexDefinition.getIndexLocation(),
107                                          new SimpleAnalyzer(), false);
108       writer.optimize();
109       writer.close();
110     } catch (java.io.IOException ioe){
111       throw new IndexException(ioe.getMessage());
112     }
113   }
114 
115   /** Delete index. */
116   public void deleteIndex() throws IndexException{
117     if(indexDefinition == null)
118       throw new GateRuntimeException("Index definition is null!");
119     boolean isDeleted = true;
120     File dir = new File(indexDefinition.getIndexLocation());
121     if (dir.exists() && dir.isDirectory()) {
122       File[] files = dir.listFiles();
123       for (int i =0; i<files.length; i++){
124         File f = files[i];
125         isDeleted = f.delete();
126       }
127     }
128     dir.delete();
129     if (!isDeleted) {
130       throw new IndexException("Can't delete directory"
131                                + indexDefinition.getIndexLocation());
132     }
133   }
134 
135   /** Reindexing changed documents, removing removed documents and
136    *  add to the index new corpus documents. */
137   public void sync(List added, List removedIDs, List changed) throws IndexException{
138     String location = indexDefinition.getIndexLocation();
139     try {
140 
141       IndexReader reader = IndexReader.open(location);
142 
143       for (int i = 0; i<removedIDs.size(); i++) {
144         String id = removedIDs.get(i).toString();
145         org.apache.lucene.index.Term term =
146                                new org.apache.lucene.index.Term(DOCUMENT_ID,id);
147         reader.delete(term);
148       }//for (remove all removed documents)
149 
150       for (int i = 0; i<changed.size(); i++) {
151         gate.Document gateDoc = (gate.Document) changed.get(i);
152         String id = gateDoc.getLRPersistenceId().toString();
153         org.apache.lucene.index.Term term =
154                                new org.apache.lucene.index.Term(DOCUMENT_ID,id);
155         reader.delete(term);
156       }//for (remove all changed documents)
157 
158       reader.close();
159 
160       IndexWriter writer = new IndexWriter(location,
161                                           new SimpleAnalyzer(), false);
162 
163       for(int i = 0; i<added.size(); i++) {
164         gate.Document gateDoc = (gate.Document) added.get(i);
165         writer.addDocument(getLuceneDoc(gateDoc));
166       }//for (add all added documents)
167 
168       for(int i = 0; i<changed.size(); i++) {
169         gate.Document gateDoc = (gate.Document) changed.get(i);
170         writer.addDocument(getLuceneDoc(gateDoc));
171       }//for (add all changed documents)
172 
173       writer.close();
174     } catch (java.io.IOException ioe) {
175       throw new IndexException(ioe.getMessage());
176     }
177   }
178 
179   private org.apache.lucene.document.Document getLuceneDoc(gate.Document gateDoc){
180     org.apache.lucene.document.Document luceneDoc =
181                                      new org.apache.lucene.document.Document();
182     Iterator fields = indexDefinition.getIndexFields();
183 
184     luceneDoc.add(Field.Keyword(DOCUMENT_ID,
185                                 gateDoc.getLRPersistenceId().toString()));
186 
187     while (fields.hasNext()) {
188       IndexField field = (IndexField) fields.next();
189       String valueForIndexing;
190 
191       if (field.getReader() == null){
192         valueForIndexing = gateDoc.getFeatures().get(field.getName()).toString();
193       } else {
194         valueForIndexing = field.getReader().getPropertyValue(gateDoc);
195       } //if-else reader or feature
196 
197       if (field.isPreseved()) {
198         luceneDoc.add(Field.Keyword(field.getName(),valueForIndexing));
199       } else {
200         luceneDoc.add(Field.UnStored(field.getName(),valueForIndexing));
201       } // if-else keyword or text
202 
203     }// while (add all fields)
204 
205     return luceneDoc;
206   }
207 
208   public Corpus getCorpus() {
209     return corpus;
210   }
211   public void setCorpus(Corpus corpus) {
212     this.corpus = corpus;
213   }
214   public IndexDefinition getIndexDefinition() {
215     return indexDefinition;
216   }
217   public void setIndexDefinition(IndexDefinition indexDefinition) {
218     this.indexDefinition = indexDefinition;
219   }
220 
221 }