1
14
15 package gate.creole.ir.lucene;
16
17 import java.io.File;
18 import java.util.Iterator;
19 import java.util.List;
20
21 import org.apache.lucene.analysis.SimpleAnalyzer;
22 import org.apache.lucene.document.Field;
23 import org.apache.lucene.index.IndexReader;
24 import org.apache.lucene.index.IndexWriter;
25
26 import gate.Corpus;
27 import gate.creole.ir.*;
28 import gate.util.GateRuntimeException;
29
30
31 public class LuceneIndexManager implements IndexManager{
32
33
34 public final static String DOCUMENT_ID = "DOCUMENT_ID";
35
36
37 private IndexDefinition indexDefinition;
38
39
40 private Corpus corpus;
41
42
43
44 public final static String CORPUS_INDEX_FEATURE = "CorpusIndexFeature";
45 public final static String CORPUS_INDEX_FEATURE_VALUE = "IR";
46
47
48
49 public LuceneIndexManager(){
50 }
51
52
54 public void createIndex() throws IndexException{
55 if(indexDefinition == null)
56 throw new GateRuntimeException("Index definition is null!");
57 if(corpus == null)
58 throw new GateRuntimeException("Corpus is null!");
59
60 String location = indexDefinition.getIndexLocation();
61 try {
62 File file = new File(location);
63 if (file.exists()){
64 if (file.isDirectory() && file.listFiles().length>0) {
65 throw new IndexException(location+ " is not empty directory");
66 }
67 if (!file.isDirectory()){
68 throw new IndexException("Only empty directory can be index path");
69 }
70 }
71
72
73 corpus.getFeatures().put(CORPUS_INDEX_FEATURE, CORPUS_INDEX_FEATURE_VALUE);
75
76
77 IndexWriter writer = new IndexWriter(location,
78 new SimpleAnalyzer(), true);
79
80 for(int i = 0; i<corpus.size(); i++) {
81 boolean isLoaded = corpus.isDocumentLoaded(i);
82 gate.Document gateDoc = (gate.Document) corpus.get(i);
83 writer.addDocument(getLuceneDoc(gateDoc));
84 if (!isLoaded) {
85 corpus.unloadDocument(gateDoc);
86 gate.Factory.deleteResource(gateDoc);
87 }
88 }
90 writer.close();
91 corpus.sync();
92 } catch (java.io.IOException ioe){
93 throw new IndexException(ioe.getMessage());
94 } catch (gate.persist.PersistenceException pe){
95 pe.printStackTrace();
96 } catch (gate.security.SecurityException se){
97 se.printStackTrace();
98 }
99 }
100
101
102 public void optimizeIndex() throws IndexException{
103 if(indexDefinition == null)
104 throw new GateRuntimeException("Index definition is null!");
105 try {
106 IndexWriter writer = new IndexWriter(indexDefinition.getIndexLocation(),
107 new SimpleAnalyzer(), false);
108 writer.optimize();
109 writer.close();
110 } catch (java.io.IOException ioe){
111 throw new IndexException(ioe.getMessage());
112 }
113 }
114
115
116 public void deleteIndex() throws IndexException{
117 if(indexDefinition == null)
118 throw new GateRuntimeException("Index definition is null!");
119 boolean isDeleted = true;
120 File dir = new File(indexDefinition.getIndexLocation());
121 if (dir.exists() && dir.isDirectory()) {
122 File[] files = dir.listFiles();
123 for (int i =0; i<files.length; i++){
124 File f = files[i];
125 isDeleted = f.delete();
126 }
127 }
128 dir.delete();
129 if (!isDeleted) {
130 throw new IndexException("Can't delete directory"
131 + indexDefinition.getIndexLocation());
132 }
133 }
134
135
137 public void sync(List added, List removedIDs, List changed) throws IndexException{
138 String location = indexDefinition.getIndexLocation();
139 try {
140
141 IndexReader reader = IndexReader.open(location);
142
143 for (int i = 0; i<removedIDs.size(); i++) {
144 String id = removedIDs.get(i).toString();
145 org.apache.lucene.index.Term term =
146 new org.apache.lucene.index.Term(DOCUMENT_ID,id);
147 reader.delete(term);
148 }
150 for (int i = 0; i<changed.size(); i++) {
151 gate.Document gateDoc = (gate.Document) changed.get(i);
152 String id = gateDoc.getLRPersistenceId().toString();
153 org.apache.lucene.index.Term term =
154 new org.apache.lucene.index.Term(DOCUMENT_ID,id);
155 reader.delete(term);
156 }
158 reader.close();
159
160 IndexWriter writer = new IndexWriter(location,
161 new SimpleAnalyzer(), false);
162
163 for(int i = 0; i<added.size(); i++) {
164 gate.Document gateDoc = (gate.Document) added.get(i);
165 writer.addDocument(getLuceneDoc(gateDoc));
166 }
168 for(int i = 0; i<changed.size(); i++) {
169 gate.Document gateDoc = (gate.Document) changed.get(i);
170 writer.addDocument(getLuceneDoc(gateDoc));
171 }
173 writer.close();
174 } catch (java.io.IOException ioe) {
175 throw new IndexException(ioe.getMessage());
176 }
177 }
178
179 private org.apache.lucene.document.Document getLuceneDoc(gate.Document gateDoc){
180 org.apache.lucene.document.Document luceneDoc =
181 new org.apache.lucene.document.Document();
182 Iterator fields = indexDefinition.getIndexFields();
183
184 luceneDoc.add(Field.Keyword(DOCUMENT_ID,
185 gateDoc.getLRPersistenceId().toString()));
186
187 while (fields.hasNext()) {
188 IndexField field = (IndexField) fields.next();
189 String valueForIndexing;
190
191 if (field.getReader() == null){
192 valueForIndexing = gateDoc.getFeatures().get(field.getName()).toString();
193 } else {
194 valueForIndexing = field.getReader().getPropertyValue(gateDoc);
195 }
197 if (field.isPreseved()) {
198 luceneDoc.add(Field.Keyword(field.getName(),valueForIndexing));
199 } else {
200 luceneDoc.add(Field.UnStored(field.getName(),valueForIndexing));
201 }
203 }
205 return luceneDoc;
206 }
207
208 public Corpus getCorpus() {
209 return corpus;
210 }
211 public void setCorpus(Corpus corpus) {
212 this.corpus = corpus;
213 }
214 public IndexDefinition getIndexDefinition() {
215 return indexDefinition;
216 }
217 public void setIndexDefinition(IndexDefinition indexDefinition) {
218 this.indexDefinition = indexDefinition;
219 }
220
221 }