1
15
16 package gate.corpora;
17
18 import java.io.*;
19 import java.net.URL;
20 import java.util.*;
21
22 import gate.*;
23 import gate.creole.AbstractLanguageResource;
24 import gate.creole.ResourceInstantiationException;
25 import gate.event.*;
26 import gate.util.Err;
27 import gate.util.Strings;
28
29
32 public class CorpusImpl extends AbstractLanguageResource
33 implements Corpus, CreoleListener {
34
35
36 private static final boolean DEBUG = false;
37
38 public CorpusImpl(){
39 supportList = Collections.synchronizedList(new VerboseList());
40 Gate.getCreoleRegister().addCreoleListener(this);
41 }
42
43
44
49 public List getDocumentNames(){
50 ArrayList res = new ArrayList(supportList.size());
51 Iterator docIter = supportList.iterator();
52 while(docIter.hasNext()){
53 res.add(((Document)docIter.next()).getName());
54 }
55 return res;
56 }
57
58
64 public String getDocumentName(int index){
65 return ((Document)supportList.get(index)).getName();
66 }
67
68
72 public void unloadDocument(Document doc) {
73 return;
74 }
75
76
77
80 protected List supportList = null;
81
82
89 protected class VerboseList extends AbstractList implements Serializable{
90
91
92
93 VerboseList(){
94 data = new ArrayList();
95 }
96
97 public Object get(int index){
98 return data.get(index);
99 }
100
101 public int size(){
102 return data.size();
103 }
104
105 public Object set(int index, Object element){
106 if(element instanceof Document){
107 Document oldDoc = (Document)data.set(index, element);
108 Document newDoc = (Document)element;
109
110 fireDocumentRemoved(new CorpusEvent(CorpusImpl.this,
112 oldDoc,
113 index,
114 CorpusEvent.DOCUMENT_REMOVED));
115 fireDocumentAdded(new CorpusEvent(CorpusImpl.this,
116 newDoc,
117 index,
118 CorpusEvent.DOCUMENT_ADDED));
119 return oldDoc;
120 }else{
121 throw new UnsupportedOperationException(
122 getClass().getName() +
123 " only accepts gate.Document values as members!\n" +
124 element.getClass().getName() + " is not a gate.Document");
125 }
126 }
127
128 public void add(int index, Object element){
129 if(element instanceof Document){
130 data.add(index, element);
131
132 fireDocumentAdded(new CorpusEvent(CorpusImpl.this,
134 (Document)element,
135 index,
136 CorpusEvent.DOCUMENT_ADDED));
137 }else{
138 throw new UnsupportedOperationException(
139 getClass().getName() +
140 " only accepts gate.Document values as members!\n" +
141 element.getClass().getName() + " is not a gate.Document");
142 }
143 }
144
145 public Object remove(int index){
146 Document oldDoc = (Document)data.remove(index);
147
148 fireDocumentRemoved(new CorpusEvent(CorpusImpl.this,
149 oldDoc,
150 index,
151 CorpusEvent.DOCUMENT_REMOVED));
152 return oldDoc;
153 }
154
155
158 ArrayList data;
159 }
160
161
164 public boolean isDocumentLoaded(int index) {
165 return true;
166 }
167
168
169 protected void clearDocList() {
170 if (supportList == null)
171 return;
172 supportList.clear();
173 }
174
175
176
179 public int size() {
180 return supportList.size();
181 }
182
183 public boolean isEmpty() {
184 return supportList.isEmpty();
185 }
186
187 public boolean contains(Object o){
188 return supportList.contains(o);
189 }
190
191 public Iterator iterator(){
192 return supportList.iterator();
193 }
194
195 public Object[] toArray(){
196 return supportList.toArray();
197 }
198
199 public Object[] toArray(Object[] a){
200 return supportList.toArray(a);
201 }
202
203 public boolean add(Object o){
204 return supportList.add(o);
205 }
206
207 public boolean remove(Object o){
208 return supportList.remove(o);
209 }
210
211 public boolean containsAll(Collection c){
212 return supportList.containsAll(c);
213 }
214
215 public boolean addAll(Collection c){
216 return supportList.addAll(c);
217 }
218
219 public boolean addAll(int index, Collection c){
220 return supportList.addAll(index, c);
221 }
222
223 public boolean removeAll(Collection c){
224 return supportList.removeAll(c);
225 }
226
227 public boolean retainAll(Collection c){
228 return supportList.retainAll(c);
229 }
230
231 public void clear(){
232 supportList.clear();
233 }
234
235 public boolean equals(Object o){
236 if (! (o instanceof CorpusImpl))
237 return false;
238
239 return supportList.equals(o);
240 }
241
242 public int hashCode(){
243 return supportList.hashCode();
244 }
245
246 public Object get(int index){
247 return supportList.get(index);
248 }
249
250 public Object set(int index, Object element){
251 return supportList.set(index, element);
252 }
253
254 public void add(int index, Object element){
255 supportList.add(index, element);
256 }
257
258 public Object remove(int index){
259 return supportList.remove(index);
260 }
261
262 public int indexOf(Object o){
263 return supportList.indexOf(o);
264 }
265
266 public int lastIndexOf(Object o){
267 return lastIndexOf(o);
268 }
269
270 public ListIterator listIterator(){
271 return supportList.listIterator();
272 }
273
274 public ListIterator listIterator(int index){
275 return supportList.listIterator(index);
276 }
277
278 public List subList(int fromIndex, int toIndex){
279 return supportList.subList(fromIndex, toIndex);
280 }
281
282
283
284
285 public void cleanup(){
286 Gate.getCreoleRegister().removeCreoleListener(this);
287 }
288
289
290 public Resource init() {
291 if(documentsList != null && !documentsList.isEmpty()){
292 addAll(documentsList);
293 }
294 return this;
295 }
297
298
316 public static void populate(Corpus corpus, URL directory, FileFilter filter,
317 String encoding, boolean recurseDirectories)
318 throws IOException {
319 if(!directory.getProtocol().equalsIgnoreCase("file"))
321 throw new IllegalArgumentException(
322 "The URL provided is not of type \"file:\"!");
323
324 File dir = new File(directory.getPath());
325 if(!dir.exists())
326 throw new FileNotFoundException(dir.toString());
327
328 if(!dir.isDirectory())
329 throw new IllegalArgumentException(
330 dir.getAbsolutePath() + " is not a directory!");
331
332 File[] files = dir.listFiles(filter);
334 if(files != null){
335 for(int i = 0; i < files.length; i++){
336 File aFile = files[i];
337 if(aFile.isDirectory()){
338 if(recurseDirectories){
340 populate(corpus, aFile.toURL(), filter,
341 encoding, recurseDirectories);
342 }
343 }else{
344 StatusListener sListener = (StatusListener)
346 gate.gui.MainFrame.getListeners().
347 get("gate.event.StatusListener");
348 if(sListener != null) sListener.statusChanged(
349 "Reading: " + aFile.getName());
350 String docName = aFile.getName() + "_" + Gate.genSym();
351 FeatureMap params = Factory.newFeatureMap();
352 params.put(Document.DOCUMENT_URL_PARAMETER_NAME, aFile.toURL());
353 if(encoding != null)
354 params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
355
356 try {
357 Document doc = (Document)Factory.createResource(
358 DocumentImpl.class.getName(), params, null, docName
359 );
360 corpus.add(doc);
361 if(corpus.getLRPersistenceId() != null){
362 corpus.unloadDocument(doc);
364 Factory.deleteResource(doc);
365 }
366 } catch(ResourceInstantiationException e) {
367 String nl = Strings.getNl();
368 Err.prln(
369 "WARNING: Corpus.populate could not intantiate document" + nl +
370 " Document name was: " + docName + nl +
371 " Exception was: " + e + nl + nl
372 );
373 }
374 if(sListener != null) sListener.statusChanged(
375 aFile.getName() + " read");
376 }
377 }
378 }
379 }
381
396 public void populate(URL directory, FileFilter filter, String encoding,
397 boolean recurseDirectories)
398 throws IOException, ResourceInstantiationException{
399 populate(this, directory, filter, encoding, recurseDirectories);
400 }
401
402 public synchronized void removeCorpusListener(CorpusListener l) {
403 if (corpusListeners != null && corpusListeners.contains(l)) {
404 Vector v = (Vector) corpusListeners.clone();
405 v.removeElement(l);
406 corpusListeners = v;
407 }
408 }
409 public synchronized void addCorpusListener(CorpusListener l) {
410 Vector v = corpusListeners == null ? new Vector(2) : (Vector) corpusListeners.clone();
411 if (!v.contains(l)) {
412 v.addElement(l);
413 corpusListeners = v;
414 }
415 }
416
417
418 static final long serialVersionUID = -1113142759053898456L;
419 private transient Vector corpusListeners;
420 protected transient java.util.List documentsList;
421
422
423 protected void fireDocumentAdded(CorpusEvent e) {
424 if (corpusListeners != null) {
425 Vector listeners = corpusListeners;
426 int count = listeners.size();
427 for (int i = 0; i < count; i++) {
428 ((CorpusListener) listeners.elementAt(i)).documentAdded(e);
429 }
430 }
431 }
432 protected void fireDocumentRemoved(CorpusEvent e) {
433 if (corpusListeners != null) {
434 Vector listeners = corpusListeners;
435 int count = listeners.size();
436 for (int i = 0; i < count; i++) {
437 ((CorpusListener) listeners.elementAt(i)).documentRemoved(e);
438 }
439 }
440 }
441 public void setDocumentsList(java.util.List documentsList) {
442 this.documentsList = documentsList;
443 }
444 public java.util.List getDocumentsList() {
445 return documentsList;
446 }
447 public void resourceLoaded(CreoleEvent e) {
448 }
449 public void resourceUnloaded(CreoleEvent e) {
450 Resource res = e.getResource();
451 if(res instanceof Document) while(contains(res)) remove(res);
453 }
454
455 public void resourceRenamed(Resource resource, String oldName,
456 String newName){
457 }
458
459 public void datastoreOpened(CreoleEvent e) {
460 }
461 public void datastoreCreated(CreoleEvent e) {
462 }
463 public void datastoreClosed(CreoleEvent e) {
464 }
465 }