1   /*
2    *  SerialCorpusImpl.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Kalina Bontcheva, 19/Oct/2001
12   *
13   *  $Id: SerialCorpusImpl.java,v 1.34 2006/03/09 13:33:19 ian_roberts Exp $
14   */
15  
16  package gate.corpora;
17  
18  import java.io.*;
19  import java.net.URL;
20  import java.util.*;
21  
22  import gate.*;
23  import gate.creole.AbstractLanguageResource;
24  import gate.creole.ResourceInstantiationException;
25  import gate.creole.ir.*;
26  import gate.event.*;
27  import gate.persist.PersistenceException;
28  import gate.security.SecurityException;
29  import gate.util.*;
30  
31  //The initial design was to implement this on the basis of a WeakValueHashMap.
32  //However this creates problems, because the user might e.g., add a transient
33  //document to the corpus and then if the Document variable goes out of scope
34  //before sync() is called, nothing will be saved of the new document. Bad!
35  //Instead, to cope with the unloading for memory saving use, I implemented
36  //a documentUnload() method, which sets the in-memory copy to null but can
37  //always restore the doc, because it has its persistence ID.
38  
39  public class SerialCorpusImpl extends
40            AbstractLanguageResource
41                        implements Corpus, CreoleListener,
42                                   DatastoreListener, IndexedCorpus {
43  
44    /** Debug flag */
45    private static final boolean DEBUG = false;
46  
47    static final long serialVersionUID = 3632609241787241616L;
48  
49    protected transient Vector corpusListeners;
50    protected java.util.List docDataList = null;
51  
52    //here I keep document index as key (same as the index in docDataList
53    //which defines the document order) and Documents as value
54    protected transient List documents = null;
55  
56    protected transient IndexManager indexManager= null;
57    protected transient List addedDocs = null;
58    protected transient List removedDocIDs = null;
59    protected transient List changedDocs = null;
60  
61    public SerialCorpusImpl() {
62    }
63  
64    /**
65     * Constructor to create a SerialCorpus from a transient one.
66     * This is called by adopt() to store the transient corpus
67     * and re-route the methods calls to it, until the corpus is
68     * sync-ed on disk. After that, the transientCorpus will always
69     * be null, so the new functionality will be used instead.
70     */
71    protected SerialCorpusImpl(Corpus tCorpus){
72      //copy the corpus name and features from the one in memory
73      this.setName(tCorpus.getName());
74      this.setFeatures(tCorpus.getFeatures());
75  
76      docDataList = new ArrayList();
77      //now cache the names of all docs for future use
78      Iterator iter = tCorpus.getDocumentNames().iterator();
79      while (iter.hasNext())
80        docDataList.add(new DocumentData((String) iter.next(), null));
81  
82      //copy all the documents from the transient corpus
83      documents = new ArrayList();
84      documents.addAll(tCorpus);
85  
86      //make sure we fire events when docs are added/removed/etc
87      Gate.getCreoleRegister().addCreoleListener(this);
88    }
89  
90    /**
91     * Gets the names of the documents in this corpus.
92     * @return a {@link List} of Strings representing the names of the documents
93     * in this corpus.
94     */
95    public List getDocumentNames(){
96      List docsNames = new ArrayList();
97      if(docDataList == null)
98        return docsNames;
99      Iterator iter = docDataList.iterator();
100     while (iter.hasNext()) {
101       DocumentData data = (DocumentData) iter.next();
102       docsNames.add(data.getDocumentName());
103     }
104     return docsNames;
105   }
106 
107   /**
108    * This method should only be used by the Serial Datastore to set
109    */
110   public void setDocumentPersistentID(int index, Object persID){
111     if (index >= docDataList.size()) return;
112     ((DocumentData)docDataList.get(index)).setPersistentID(persID);
113     if (DEBUG) Out.prln("IDs are now: " + docDataList);
114   }
115 
116   /**
117    * Gets the name of a document in this corpus.
118    * @param index the index of the document
119    * @return a String value representing the name of the document at
120    * <tt>index</tt> in this corpus.<P>
121    */
122   public String getDocumentName(int index){
123     if (index >= docDataList.size()) return "No such document";
124 
125     return ((DocumentData) docDataList.get(index)).getDocumentName();
126   }
127 
128   /**
129    * Unloads the document from memory, but calls sync() first, to store the
130    * changes
131    */
132   public void unloadDocument(int index) {
133     //1. check whether its been loaded and is a persistent one
134     // if a persistent doc is not loaded, there's nothing we need to do
135     if ( (! isDocumentLoaded(index)) && isPersistentDocument(index))
136       return;
137 
138     //2. sync the document before releasing it from memory, because the
139     //creole register garbage collects all LRs which are not used any more
140     Document doc = (Document) documents.get(index);
141     try {
142       //if the document is not already adopted, we need to do that first
143       if (doc.getLRPersistenceId() == null) {
144         doc = (Document) this.getDataStore().adopt(doc, null);
145         this.getDataStore().sync(doc);
146         this.setDocumentPersistentID(index, doc.getLRPersistenceId());
147       } else //if it is adopted, just sync it
148         this.getDataStore().sync(doc);
149 
150       //3. remove the document from the memory
151       //do this, only if the saving has succeeded
152       documents.set(index, null);
153 
154     } catch (PersistenceException ex) {
155         throw new GateRuntimeException("Error unloading document from corpus"
156                       + "because document sync failed: " + ex.getMessage());
157     } catch (gate.security.SecurityException ex1) {
158         throw new GateRuntimeException("Error unloading document from corpus"
159                       + "because of document access error: " + ex1.getMessage());
160     }
161 
162   }
163 
164   /**
165    * Unloads a document from memory
166    */
167   public void unloadDocument(Document doc) {
168     if (DEBUG) Out.prln("Document to be unloaded :" + doc.getName());
169     //1. determine the index of the document; if not there, do nothing
170     int index = findDocument(doc);
171     if (index == -1)
172       return;
173     if (DEBUG) Out.prln("Index of doc: " + index);
174     if (DEBUG) Out.prln("Size of corpus: " + documents.size());
175     unloadDocument(index);
176 //    documents.remove(new Integer(index));
177   }
178 
179   /**
180    * This method returns true when the document is already loaded in memory
181    */
182   public boolean isDocumentLoaded(int index) {
183     if (documents == null || documents.isEmpty()) return false;
184     return documents.get(index) != null;
185   }
186 
187   /**
188    * This method returns true when the document is already stored on disk
189    * i.e., is not transient
190    */
191   public boolean isPersistentDocument(int index) {
192     if (documents == null || documents.isEmpty()) return false;
193     return (((DocumentData)docDataList.get(index)).getPersistentID() != null);
194   }
195 
196   /**
197    * Every LR that is a CreoleListener (and other Listeners too) must
198    * override this method and make sure it removes itself from the
199    * objects which it has been listening to. Otherwise, the object will
200    * not be released from memory (memory leak!).
201    */
202   public void cleanup() {
203     if (DEBUG) Out.prln("serial corpus cleanup called");
204     if (corpusListeners != null)
205       corpusListeners = null;
206     if (documents != null)
207       documents.clear();
208     docDataList.clear();
209     Gate.getCreoleRegister().removeCreoleListener(this);
210     if (this.dataStore != null) {
211       this.dataStore.removeDatastoreListener(this);
212     }
213   }
214 
215   /**
216    * Fills this corpus with documents created from files in a directory.
217    * @param filter the file filter used to select files from the target
218    * directory. If the filter is <tt>null</tt> all the files will be accepted.
219    * @param directory the directory from which the files will be picked. This
220    * parameter is an URL for uniformity. It needs to be a URL of type file
221    * otherwise an InvalidArgumentException will be thrown.
222    * An implementation for this method is provided as a static method at
223    * {@link gate.corpora.CorpusImpl#populate(Corpus, URL, FileFilter, String, boolean)}.
224    * @param encoding the encoding to be used for reading the documents
225    * @param recurseDirectories should the directory be parsed recursively?. If
226    * <tt>true</tt> all the files from the provided directory and all its
227    * children directories (on as many levels as necessary) will be picked if
228    * accepted by the filter otherwise the children directories will be ignored.
229    */
230   public void populate(URL directory, FileFilter filter, String encoding,
231                        boolean recurseDirectories)
232               throws IOException, ResourceInstantiationException{
233     CorpusImpl.populate(this, directory, filter, encoding, recurseDirectories);
234   }
235 
236 
237   public synchronized void removeCorpusListener(CorpusListener l) {
238     if (corpusListeners != null && corpusListeners.contains(l)) {
239       Vector v = (Vector) corpusListeners.clone();
240       v.removeElement(l);
241       corpusListeners = v;
242     }
243   }
244   public synchronized void addCorpusListener(CorpusListener l) {
245     Vector v = corpusListeners == null ? new Vector(2) : (Vector) corpusListeners.clone();
246     if (!v.contains(l)) {
247       v.addElement(l);
248       corpusListeners = v;
249     }
250   }
251   protected void fireDocumentAdded(CorpusEvent e) {
252     if (corpusListeners != null) {
253       Vector listeners = corpusListeners;
254       int count = listeners.size();
255       for (int i = 0; i < count; i++) {
256         ((CorpusListener) listeners.elementAt(i)).documentAdded(e);
257       }
258     }
259   }
260   protected void fireDocumentRemoved(CorpusEvent e) {
261     if (corpusListeners != null) {
262       Vector listeners = corpusListeners;
263       int count = listeners.size();
264       for (int i = 0; i < count; i++) {
265         ((CorpusListener) listeners.elementAt(i)).documentRemoved(e);
266       }
267     }
268   }
269   public void resourceLoaded(CreoleEvent e) {
270   }
271 
272   public void resourceRenamed(Resource resource, String oldName,
273                               String newName){}
274 
275   public void resourceUnloaded(CreoleEvent e) {
276     Resource res = e.getResource();
277     if (res instanceof Document) {
278       Document doc = (Document) res;
279       if (DEBUG)
280         Out.prln("resource Unloaded called ");
281       //remove from the corpus too, if a transient one
282       if (doc.getDataStore() != this.getDataStore()) {
283         this.remove(doc);
284       } else {
285         //unload all occurences
286         int index = indexOf(res);
287         if (index < 0)
288           return;
289         documents.set(index, null);
290         if (DEBUG)
291           Out.prln("corpus: document "+ index + " unloaded and set to null");
292       } //if
293     }
294   }
295   public void datastoreOpened(CreoleEvent e) {
296   }
297   public void datastoreCreated(CreoleEvent e) {
298   }
299   public void datastoreClosed(CreoleEvent e) {
300     if (! e.getDatastore().equals(this.getDataStore()))
301       return;
302     if (this.getDataStore() != null)
303       this.getDataStore().removeDatastoreListener(this);
304     //close this corpus, since it cannot stay open when the DS it comes from
305     //is closed
306     Factory.deleteResource(this);
307   }
308   /**
309    * Called by a datastore when a new resource has been adopted
310    */
311   public void resourceAdopted(DatastoreEvent evt){
312   }
313 
314   /**
315    * Called by a datastore when a resource has been deleted
316    */
317   public void resourceDeleted(DatastoreEvent evt){
318     DataStore ds = (DataStore)evt.getSource();
319     //1. check whether this datastore fired the event. If not, return.
320     if (!ds.equals(this.dataStore))
321       return;
322 
323     Object docID = evt.getResourceID();
324     if (docID == null)
325       return;
326 
327     if (DEBUG) Out.prln("Resource deleted called for: " + docID);
328     //first check if it is this corpus that's been deleted, it must be
329     //unloaded immediately
330     if (docID.equals(this.getLRPersistenceId())) {
331       Factory.deleteResource(this);
332       return;
333     }//if
334 
335     boolean isDirty=false;
336     //the problem here is that I only have the doc persistent ID
337     //and nothing else, so I need to determine the index of the doc first
338     for (int i=0; i< docDataList.size(); i++) {
339       DocumentData docData = (DocumentData)docDataList.get(i);
340       //we've found the correct document
341       //don't break the loop, because it might appear more than once
342       if (docID.equals(docData.getPersistentID())) {
343         remove(i);
344         isDirty = true;
345       }//if
346     }//for loop through the doc data
347 
348     if (isDirty)
349       try {
350         this.dataStore.sync(this);
351       } catch (PersistenceException ex) {
352         throw new GateRuntimeException("SerialCorpusImpl: " + ex.getMessage());
353       } catch (SecurityException sex) {
354         throw new GateRuntimeException("SerialCorpusImpl: " + sex.getMessage());
355       }
356   }//resourceDeleted
357 
358   /**
359    * Called by a datastore when a resource has been wrote into the datastore
360    */
361   public void resourceWritten(DatastoreEvent evt){
362     if (evt.getResourceID().equals(this.getLRPersistenceId())) {
363       thisResourceWritten();
364     }
365   }
366 
367 
368 
369   //List methods
370   //java docs will be automatically copied from the List interface.
371 
372   public int size() {
373     return docDataList.size();
374   }
375 
376   public boolean isEmpty() {
377     return docDataList.isEmpty();
378   }
379 
380   public boolean contains(Object o){
381     //return true if:
382     // - the document data list contains a document with such a name
383     //   and persistent id
384 
385     if(! (o instanceof Document))
386       return false;
387 
388     int index = findDocument((Document) o);
389     if (index < 0)
390       return false;
391     else
392       return true;
393   }
394 
395   public Iterator iterator(){
396     return new Iterator(){
397       Iterator docDataIter = docDataList.iterator();
398 
399       public boolean hasNext() {
400         return docDataIter.hasNext();
401       }
402 
403       public Object next(){
404 
405         //try finding a document with the same name and persistent ID
406         DocumentData docData = (DocumentData) docDataIter.next();
407         int index = docDataList.indexOf(docData);
408         return SerialCorpusImpl.this.get(index);
409       }
410 
411       public void remove() {
412         throw new UnsupportedOperationException("SerialCorpusImpl does not " +
413                     "support remove in the iterators");
414       }
415     }; //return
416 
417   }//iterator
418 
419   public String toString() {
420     return "document data " + docDataList.toString() + " documents " + documents;
421   }
422 
423   public Object[] toArray(){
424     //there is a problem here, because some docs might not be instantiated
425     throw new MethodNotImplementedException(
426                 "toArray() is not implemented for SerialCorpusImpl");
427   }
428 
429   public Object[] toArray(Object[] a){
430     //there is a problem here, because some docs might not be instantiated
431     throw new MethodNotImplementedException(
432                 "toArray(Object[] a) is not implemented for SerialCorpusImpl");
433   }
434 
435   public boolean add(Object o){
436     if (! (o instanceof Document) || o == null)
437       return false;
438     Document doc = (Document) o;
439 
440     //make it accept only docs from its own datastore
441     if (doc.getDataStore() != null
442         && !this.dataStore.equals(doc.getDataStore())) {
443       Err.prln("Error: Persistent corpus can only accept documents " +
444                "from its own datastore!");
445       return false;
446     }//if
447 
448     //add the document with its index in the docDataList
449     //in this case, since it's going to be added to the end
450     //the index will be the size of the docDataList before
451     //the addition
452     DocumentData docData = new DocumentData(doc.getName(),
453                                             doc.getLRPersistenceId());
454     boolean result = docDataList.add(docData);
455     documents.add(doc);
456     documentAdded(doc);
457     fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this,
458                                       doc,
459                                       docDataList.size()-1,
460                                       CorpusEvent.DOCUMENT_ADDED));
461 
462     return result;
463   }
464 
465   public boolean remove(Object o){
466     if (DEBUG) Out.prln("SerialCorpus:Remove object called");
467     if (! (o instanceof Document))
468       return false;
469     Document doc = (Document) o;
470 
471     //see if we can find it first. If not, then judt return
472     int index = findDocument(doc);
473     if (index == -1)
474       return false;
475 
476     if(index < docDataList.size()) { //we found it, so remove it
477       docDataList.remove(index);
478       Document oldDoc =  (Document) documents.remove(index);
479       if (DEBUG) Out.prln("documents after remove of " + oldDoc.getName()
480                           + " are " + documents);
481       documentRemoved(oldDoc.getLRPersistenceId().toString());
482       fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this,
483                                           oldDoc,
484                                           index,
485                                           CorpusEvent.DOCUMENT_REMOVED));
486     }
487 
488     return true;
489   }
490 
491   public int findDocument(Document doc) {
492     boolean found = false;
493     DocumentData docData = null;
494 
495     //first try finding the document in memory
496     int index = documents.indexOf(doc);
497     if (index > -1 && index < docDataList.size())
498       return index;
499 
500     //else try finding a document with the same name and persistent ID
501     Iterator iter = docDataList.iterator();
502     for (index = 0;  iter.hasNext(); index++) {
503       docData = (DocumentData) iter.next();
504       if (docData.getDocumentName().equals(doc.getName()) &&
505           docData.getPersistentID().equals(doc.getLRPersistenceId())) {
506         found = true;
507         break;
508       }
509     }
510     if (found && index < docDataList.size())
511       return index;
512     else
513       return -1;
514   }//findDocument
515 
516   public boolean containsAll(Collection c){
517     Iterator iter = c.iterator();
518     while (iter.hasNext()) {
519       if (! contains(iter.next()))
520         return false;
521     }
522     return true;
523   }
524 
525   public boolean addAll(Collection c){
526     boolean allAdded = true;
527     Iterator iter = c.iterator();
528     while (iter.hasNext()) {
529       if (! add(iter.next()))
530         allAdded = false;
531     }
532     return allAdded;
533   }
534 
535   public boolean addAll(int index, Collection c){
536     throw new UnsupportedOperationException();
537   }
538 
539   public boolean removeAll(Collection c){
540     boolean allRemoved = true;
541     Iterator iter = c.iterator();
542     while (iter.hasNext()) {
543       if (! remove(iter.next()))
544         allRemoved = false;
545     }
546     return allRemoved;
547 
548   }
549 
550   public boolean retainAll(Collection c){
551     throw new UnsupportedOperationException();
552   }
553 
554   public void clear(){
555     documents.clear();
556     docDataList.clear();
557   }
558 
559   public boolean equals(Object o){
560     if (! (o instanceof SerialCorpusImpl))
561       return false;
562     SerialCorpusImpl oCorpus = (SerialCorpusImpl) o;
563     if ((this == null && oCorpus != null) || (oCorpus == null && this != null))
564       return false;
565     if (oCorpus == this)
566       return true;
567     if ((oCorpus.lrPersistentId == this.lrPersistentId ||
568           ( this.lrPersistentId != null &&
569             this.lrPersistentId.equals(oCorpus.lrPersistentId))
570           )
571         &&
572         oCorpus.name.equals(this.name)
573         &&
574         (oCorpus.dataStore == this.dataStore
575           || oCorpus.dataStore.equals(this.dataStore))
576         &&
577         oCorpus.docDataList.equals(docDataList))
578       return true;
579     return false;
580   }
581 
582   public int hashCode(){
583     return docDataList.hashCode();
584   }
585 
586   public Object get(int index){
587       if (index >= docDataList.size())
588         return null;
589 
590       Object res = documents.get(index);
591 
592       if (DEBUG)
593         Out.prln("SerialCorpusImpl: get(): index " + index + "result: " + res);
594 
595       //if the document is null, then I must get it from the DS
596       if (res == null) {
597         FeatureMap features = Factory.newFeatureMap();
598         features.put(DataStore.DATASTORE_FEATURE_NAME, this.dataStore);
599         try {
600           features.put(DataStore.LR_ID_FEATURE_NAME,
601                       ((DocumentData)docDataList.get(index)).getPersistentID());
602           Resource lr = Factory.createResource( "gate.corpora.DocumentImpl",
603                                                 features);
604           if (DEBUG)
605             Out.prln("Loaded document :" + lr.getName());
606           //change the result to the newly loaded doc
607           res = lr;
608 
609           //finally replace the doc with the instantiated version
610           documents.set(index, lr);
611         } catch (ResourceInstantiationException ex) {
612           Err.prln("Error reading document inside a serialised corpus.");
613           throw new GateRuntimeException(ex.getMessage());
614         }
615       }
616 
617       return res;
618   }
619 
620   public Object set(int index, Object element){
621     throw new gate.util.MethodNotImplementedException();
622         //fire the 2 events
623 /*        fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this,
624                                             oldDoc,
625                                             ((Integer) key).intValue(),
626                                             CorpusEvent.DOCUMENT_REMOVED));
627         fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this,
628                                           newDoc,
629                                           ((Integer) key).intValue(),
630                                           CorpusEvent.DOCUMENT_ADDED));
631 */
632   }
633 
634   public void add(int index, Object o){
635     if (! (o instanceof Document) || o == null)
636       return;
637     Document doc = (Document) o;
638 
639     DocumentData docData = new DocumentData(doc.getName(),
640                                             doc.getLRPersistenceId());
641     docDataList.add(index, docData);
642 
643     documents.add(index, doc);
644     documentAdded(doc);
645     fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this,
646                                       doc,
647                                       index,
648                                       CorpusEvent.DOCUMENT_ADDED));
649 
650   }
651 
652   public Object remove(int index){
653     if (DEBUG) Out.prln("Remove index called");
654 
655     boolean isLoaded = isDocumentLoaded(index);
656     Document removed = (Document) get(index);
657     documentRemoved(removed.getLRPersistenceId().toString());
658     if (!isLoaded){
659       unloadDocument(removed);
660     }
661 
662     docDataList.remove(index);
663     Document res = (Document) documents.remove(index);
664     fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this,
665                                         res,
666                                         index,
667                                         CorpusEvent.DOCUMENT_REMOVED));
668     return res;
669 
670   }
671 
672   public int indexOf(Object o){
673     if (o instanceof Document)
674       return findDocument((Document) o);
675 
676     return -1;
677   }
678 
679   public int lastIndexOf(Object o){
680     throw new gate.util.MethodNotImplementedException();
681   }
682 
683   public ListIterator listIterator(){
684     throw new gate.util.MethodNotImplementedException();
685   }
686 
687   public ListIterator listIterator(int index){
688     throw new gate.util.MethodNotImplementedException();
689   }
690 
691   /**
692    * persistent Corpus does not support this method as all
693    * the documents might no be in memory
694    */
695   public List subList(int fromIndex, int toIndex){
696     throw new gate.util.MethodNotImplementedException();
697   }
698 
699   public void setDataStore(DataStore dataStore)
700                 throws gate.persist.PersistenceException {
701     super.setDataStore( dataStore);
702     if (this.dataStore != null)
703       this.dataStore.addDatastoreListener(this);
704   }
705 
706   public void setTransientSource(Object source) {
707     if (! (source instanceof Corpus))
708       return;
709 
710     //the following initialisation is only valid when we're constructing
711     //this object from a transient one. If it has already been stored in
712     //a datastore, then the initialisation is done in readObject() since
713     //this method is the one called by serialisation, when objects
714     //are restored.
715     if (this.dataStore != null && this.lrPersistentId != null)
716       return;
717 
718     Corpus tCorpus = (Corpus) source;
719 
720     //copy the corpus name and features from the one in memory
721     this.setName(tCorpus.getName());
722     this.setFeatures(tCorpus.getFeatures());
723 
724     docDataList = new ArrayList();
725     //now cache the names of all docs for future use
726     Iterator iter = tCorpus.getDocumentNames().iterator();
727     while (iter.hasNext())
728       docDataList.add(new DocumentData((String) iter.next(), null));
729 
730     //copy all the documents from the transient corpus
731     documents = new ArrayList();
732     documents.addAll(tCorpus);
733 
734     this.addedDocs = new Vector();
735     this.removedDocIDs = new Vector();
736     this.changedDocs = new Vector();
737 
738     //make sure we fire events when docs are added/removed/etc
739     Gate.getCreoleRegister().addCreoleListener(this);
740 
741   }
742 
743   //we don't keep the transient source, so always return null
744   //Sill this must be implemented, coz of the GUI and Factory
745   public Object getTransientSource() {
746     return null;
747   }
748 
749 
750   public Resource init() throws gate.creole.ResourceInstantiationException {
751     super.init();
752 
753     return this;
754 
755   }
756 
757 
758   /**
759    * readObject - calls the default readObject() and then initialises the
760    * transient data
761    *
762    * @serialData Read serializable fields. No optional data read.
763    */
764   private void readObject(ObjectInputStream s)
765       throws IOException, ClassNotFoundException {
766     s.defaultReadObject();
767     documents = new ArrayList(docDataList.size());
768     for (int i = 0; i < docDataList.size(); i++)
769       documents.add(null);
770     corpusListeners = new Vector();
771     //finally set the creole listeners if the LR is like that
772     Gate.getCreoleRegister().addCreoleListener(this);
773     if (this.dataStore != null)
774       this.dataStore.addDatastoreListener(this);
775 
776     //if indexed construct the manager.
777     IndexDefinition  definition = (IndexDefinition) this.getFeatures().get(
778                 GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY);
779     if (definition != null){
780       String className = definition.getIrEngineClassName();
781       try{
782         //Class aClass = Class.forName(className);
783         Class aClass = Class.forName(className, true, Gate.getClassLoader());
784         IREngine engine = (IREngine)aClass.newInstance();
785         this.indexManager = engine.getIndexmanager();
786         this.indexManager.setIndexDefinition(definition);
787         this.indexManager.setCorpus(this);
788       }catch(Exception e){
789         e.printStackTrace(Err.getPrintWriter());
790       }
791 //      switch (definition.getIndexType()) {
792 //        case GateConstants.IR_LUCENE_INVFILE:
793 //          this.indexManager = new LuceneIndexManager();
794 //          this.indexManager.setIndexDefinition(definition);
795 //          this.indexManager.setCorpus(this);
796 //          break;
797 //      }
798       this.addedDocs = new Vector();
799       this.removedDocIDs = new Vector();
800       this.changedDocs = new Vector();
801     }
802   }//readObject
803 
804   public void setIndexDefinition(IndexDefinition definition) {
805     if (definition != null){
806       this.getFeatures().put(GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY,
807                               definition);
808 
809       String className = definition.getIrEngineClassName();
810       try{
811         //Class aClass = Class.forName(className);
812         Class aClass = Class.forName(className, true, Gate.getClassLoader());
813         IREngine engine = (IREngine)aClass.newInstance();
814         this.indexManager = engine.getIndexmanager();
815         this.indexManager.setIndexDefinition(definition);
816         this.indexManager.setCorpus(this);
817       }catch(Exception e){
818         e.printStackTrace(Err.getPrintWriter());
819       }
820 //    switch (definition.getIndexType()) {
821 //      case GateConstants.IR_LUCENE_INVFILE:
822 //        this.indexManager = new LuceneIndexManager();
823 //        this.indexManager.setIndexDefinition(definition);
824 //        this.indexManager.setCorpus(this);
825 //        break;
826 //    }
827       this.addedDocs = new Vector();
828       this.removedDocIDs = new Vector();
829       this.changedDocs = new Vector();
830     }
831   }
832 
833   public IndexDefinition getIndexDefinition() {
834     return (IndexDefinition) this.getFeatures().get(
835                            GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY);
836   }
837 
838   public IndexManager getIndexManager() {
839     return this.indexManager;
840   }
841 
842   public IndexStatistics getIndexStatistics(){
843     return (IndexStatistics) this.getFeatures().get(
844                            GateConstants.CORPUS_INDEX_STATISTICS_FEATURE_KEY);
845   }
846 
847   private void documentAdded(Document doc) {
848     if (indexManager != null){
849       addedDocs.add(doc);
850     }
851   }
852 
853   private void documentRemoved(String lrID) {
854     if (indexManager != null) {
855       removedDocIDs.add(lrID);
856     }
857   }
858 
859   private void thisResourceWritten() {
860     if (indexManager != null) {
861       try {
862         for (int i = 0; i<documents.size(); i++) {
863           if (documents.get(i) != null) {
864             Document doc = (Document) documents.get(i);
865             if (!addedDocs.contains(doc) && doc.isModified()) {
866               changedDocs.add(doc);
867             }
868           }
869         }
870         indexManager.sync(addedDocs, removedDocIDs, changedDocs);
871       } catch (IndexException ie) {
872         ie.printStackTrace();
873       }
874     }
875   }
876 
877 }
878