GATE
Version 3.1-2270

gate.corpora
Class SerialCorpusImpl

java.lang.Object
  extended by gate.util.AbstractFeatureBearer
      extended by gate.creole.AbstractResource
          extended by gate.creole.AbstractLanguageResource
              extended by gate.corpora.SerialCorpusImpl
All Implemented Interfaces:
Corpus, IndexedCorpus, CreoleListener, DatastoreListener, LanguageResource, Resource, SimpleCorpus, FeatureBearer, NameBearer, Serializable, Iterable, Collection, EventListener, List

public class SerialCorpusImpl
extends AbstractLanguageResource
implements Corpus, CreoleListener, DatastoreListener, IndexedCorpus

See Also:
Serialized Form

Field Summary
protected  List addedDocs
           
protected  List changedDocs
           
protected  Vector corpusListeners
           
protected  List docDataList
           
protected  List documents
           
protected  IndexManager indexManager
           
protected  List removedDocIDs
           
 
Fields inherited from class gate.creole.AbstractLanguageResource
dataStore, lrPersistentId
 
Fields inherited from class gate.creole.AbstractResource
name
 
Fields inherited from class gate.util.AbstractFeatureBearer
features
 
Constructor Summary
  SerialCorpusImpl()
           
protected SerialCorpusImpl(Corpus tCorpus)
          Constructor to create a SerialCorpus from a transient one.
 
Method Summary
 void add(int index, Object o)
           
 boolean add(Object o)
           
 boolean addAll(Collection c)
           
 boolean addAll(int index, Collection c)
           
 void addCorpusListener(CorpusListener l)
          Registers a new CorpusListener with this corpus.
 void cleanup()
          Every LR that is a CreoleListener (and other Listeners too) must override this method and make sure it removes itself from the objects which it has been listening to.
 void clear()
           
 boolean contains(Object o)
           
 boolean containsAll(Collection c)
           
 void datastoreClosed(CreoleEvent e)
          Called when a DataStore has been closed
 void datastoreCreated(CreoleEvent e)
          Called when a DataStore has been created
 void datastoreOpened(CreoleEvent e)
          Called when a DataStore has been opened
 boolean equals(Object o)
           
 int findDocument(Document doc)
           
protected  void fireDocumentAdded(CorpusEvent e)
           
protected  void fireDocumentRemoved(CorpusEvent e)
           
 Object get(int index)
           
 String getDocumentName(int index)
          Gets the name of a document in this corpus.
 List getDocumentNames()
          Gets the names of the documents in this corpus.
 IndexDefinition getIndexDefinition()
           
 IndexManager getIndexManager()
           
 IndexStatistics getIndexStatistics()
           
 Object getTransientSource()
           
 int hashCode()
           
 int indexOf(Object o)
           
 Resource init()
          Initialise this resource, and return it.
 boolean isDocumentLoaded(int index)
          This method returns true when the document is already loaded in memory
 boolean isEmpty()
           
 boolean isPersistentDocument(int index)
          This method returns true when the document is already stored on disk i.e., is not transient
 Iterator iterator()
           
 int lastIndexOf(Object o)
           
 ListIterator listIterator()
           
 ListIterator listIterator(int index)
           
 void populate(URL directory, FileFilter filter, String encoding, boolean recurseDirectories)
          Fills this corpus with documents created from files in a directory.
 Object remove(int index)
           
 boolean remove(Object o)
           
 boolean removeAll(Collection c)
           
 void removeCorpusListener(CorpusListener l)
          Removes one of the listeners registered with this corpus.
 void resourceAdopted(DatastoreEvent evt)
          Called by a datastore when a new resource has been adopted
 void resourceDeleted(DatastoreEvent evt)
          Called by a datastore when a resource has been deleted
 void resourceLoaded(CreoleEvent e)
          Called when a new Resource has been loaded into the system
 void resourceRenamed(Resource resource, String oldName, String newName)
          Called when the creole register has renamed a resource.1
 void resourceUnloaded(CreoleEvent e)
          Called when a Resource has been removed from the system
 void resourceWritten(DatastoreEvent evt)
          Called by a datastore when a resource has been wrote into the datastore
 boolean retainAll(Collection c)
           
 Object set(int index, Object element)
           
 void setDataStore(DataStore dataStore)
          Set the data store that this LR lives in.
 void setDocumentPersistentID(int index, Object persID)
          This method should only be used by the Serial Datastore to set
 void setIndexDefinition(IndexDefinition definition)
          Sets the definition to this corpus.
 void setTransientSource(Object source)
           
 int size()
           
 List subList(int fromIndex, int toIndex)
          persistent Corpus does not support this method as all the documents might no be in memory
 Object[] toArray()
           
 Object[] toArray(Object[] a)
           
 String toString()
           
 void unloadDocument(Document doc)
          Unloads a document from memory
 void unloadDocument(int index)
          Unloads the document from memory, but calls sync() first, to store the changes
 
Methods inherited from class gate.creole.AbstractLanguageResource
getDataStore, getLRPersistenceId, getParent, isModified, setLRPersistenceId, setParent, sync
 
Methods inherited from class gate.creole.AbstractResource
checkParameterValues, getBeanInfo, getName, getParameterValue, getParameterValue, removeResourceListeners, setName, setParameterValue, setParameterValue, setParameterValues, setParameterValues, setResourceListeners
 
Methods inherited from class gate.util.AbstractFeatureBearer
getFeatures, setFeatures
 
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
 

Field Detail

corpusListeners

protected transient Vector corpusListeners

docDataList

protected List docDataList

documents

protected transient List documents

indexManager

protected transient IndexManager indexManager

addedDocs

protected transient List addedDocs

removedDocIDs

protected transient List removedDocIDs

changedDocs

protected transient List changedDocs
Constructor Detail

SerialCorpusImpl

public SerialCorpusImpl()

SerialCorpusImpl

protected SerialCorpusImpl(Corpus tCorpus)
Constructor to create a SerialCorpus from a transient one. This is called by adopt() to store the transient corpus and re-route the methods calls to it, until the corpus is sync-ed on disk. After that, the transientCorpus will always be null, so the new functionality will be used instead.

Method Detail

getDocumentNames

public List getDocumentNames()
Gets the names of the documents in this corpus.

Specified by:
getDocumentNames in interface SimpleCorpus
Returns:
a List of Strings representing the names of the documents in this corpus.

setDocumentPersistentID

public void setDocumentPersistentID(int index,
                                    Object persID)
This method should only be used by the Serial Datastore to set


getDocumentName

public String getDocumentName(int index)
Gets the name of a document in this corpus.

Specified by:
getDocumentName in interface SimpleCorpus
Parameters:
index - the index of the document
Returns:
a String value representing the name of the document at index in this corpus.


unloadDocument

public void unloadDocument(int index)
Unloads the document from memory, but calls sync() first, to store the changes


unloadDocument

public void unloadDocument(Document doc)
Unloads a document from memory

Specified by:
unloadDocument in interface Corpus
Parameters:
doc - Document to be unloaded from memory.

isDocumentLoaded

public boolean isDocumentLoaded(int index)
This method returns true when the document is already loaded in memory

Specified by:
isDocumentLoaded in interface Corpus

isPersistentDocument

public boolean isPersistentDocument(int index)
This method returns true when the document is already stored on disk i.e., is not transient


cleanup

public void cleanup()
Every LR that is a CreoleListener (and other Listeners too) must override this method and make sure it removes itself from the objects which it has been listening to. Otherwise, the object will not be released from memory (memory leak!).

Specified by:
cleanup in interface Resource
Overrides:
cleanup in class AbstractLanguageResource

populate

public void populate(URL directory,
                     FileFilter filter,
                     String encoding,
                     boolean recurseDirectories)
              throws IOException,
                     ResourceInstantiationException
Fills this corpus with documents created from files in a directory.

Specified by:
populate in interface SimpleCorpus
Parameters:
filter - the file filter used to select files from the target directory. If the filter is null all the files will be accepted.
directory - the directory from which the files will be picked. This parameter is an URL for uniformity. It needs to be a URL of type file otherwise an InvalidArgumentException will be thrown. An implementation for this method is provided as a static method at CorpusImpl.populate(Corpus, URL, FileFilter, String, boolean).
encoding - the encoding to be used for reading the documents
recurseDirectories - should the directory be parsed recursively?. If true all the files from the provided directory and all its children directories (on as many levels as necessary) will be picked if accepted by the filter otherwise the children directories will be ignored.
Throws:
IOException
ResourceInstantiationException

removeCorpusListener

public void removeCorpusListener(CorpusListener l)
Description copied from interface: Corpus
Removes one of the listeners registered with this corpus.

Specified by:
removeCorpusListener in interface Corpus
Parameters:
l - the listener to be removed.

addCorpusListener

public void addCorpusListener(CorpusListener l)
Description copied from interface: Corpus
Registers a new CorpusListener with this corpus.

Specified by:
addCorpusListener in interface Corpus
Parameters:
l - the listener to be added.

fireDocumentAdded

protected void fireDocumentAdded(CorpusEvent e)

fireDocumentRemoved

protected void fireDocumentRemoved(CorpusEvent e)

resourceLoaded

public void resourceLoaded(CreoleEvent e)
Description copied from interface: CreoleListener
Called when a new Resource has been loaded into the system

Specified by:
resourceLoaded in interface CreoleListener

resourceRenamed

public void resourceRenamed(Resource resource,
                            String oldName,
                            String newName)
Description copied from interface: CreoleListener
Called when the creole register has renamed a resource.1

Specified by:
resourceRenamed in interface CreoleListener

resourceUnloaded

public void resourceUnloaded(CreoleEvent e)
Description copied from interface: CreoleListener
Called when a Resource has been removed from the system

Specified by:
resourceUnloaded in interface CreoleListener

datastoreOpened

public void datastoreOpened(CreoleEvent e)
Description copied from interface: CreoleListener
Called when a DataStore has been opened

Specified by:
datastoreOpened in interface CreoleListener

datastoreCreated

public void datastoreCreated(CreoleEvent e)
Description copied from interface: CreoleListener
Called when a DataStore has been created

Specified by:
datastoreCreated in interface CreoleListener

datastoreClosed

public void datastoreClosed(CreoleEvent e)
Description copied from interface: CreoleListener
Called when a DataStore has been closed

Specified by:
datastoreClosed in interface CreoleListener

resourceAdopted

public void resourceAdopted(DatastoreEvent evt)
Called by a datastore when a new resource has been adopted

Specified by:
resourceAdopted in interface DatastoreListener

resourceDeleted

public void resourceDeleted(DatastoreEvent evt)
Called by a datastore when a resource has been deleted

Specified by:
resourceDeleted in interface DatastoreListener

resourceWritten

public void resourceWritten(DatastoreEvent evt)
Called by a datastore when a resource has been wrote into the datastore

Specified by:
resourceWritten in interface DatastoreListener

size

public int size()
Specified by:
size in interface Collection
Specified by:
size in interface List

isEmpty

public boolean isEmpty()
Specified by:
isEmpty in interface Collection
Specified by:
isEmpty in interface List

contains

public boolean contains(Object o)
Specified by:
contains in interface Collection
Specified by:
contains in interface List

iterator

public Iterator iterator()
Specified by:
iterator in interface Iterable
Specified by:
iterator in interface Collection
Specified by:
iterator in interface List

toString

public String toString()
Overrides:
toString in class Object

toArray

public Object[] toArray()
Specified by:
toArray in interface Collection
Specified by:
toArray in interface List

toArray

public Object[] toArray(Object[] a)
Specified by:
toArray in interface Collection
Specified by:
toArray in interface List

add

public boolean add(Object o)
Specified by:
add in interface Collection
Specified by:
add in interface List

remove

public boolean remove(Object o)
Specified by:
remove in interface Collection
Specified by:
remove in interface List

findDocument

public int findDocument(Document doc)

containsAll

public boolean containsAll(Collection c)
Specified by:
containsAll in interface Collection
Specified by:
containsAll in interface List

addAll

public boolean addAll(Collection c)
Specified by:
addAll in interface Collection
Specified by:
addAll in interface List

addAll

public boolean addAll(int index,
                      Collection c)
Specified by:
addAll in interface List

removeAll

public boolean removeAll(Collection c)
Specified by:
removeAll in interface Collection
Specified by:
removeAll in interface List

retainAll

public boolean retainAll(Collection c)
Specified by:
retainAll in interface Collection
Specified by:
retainAll in interface List

clear

public void clear()
Specified by:
clear in interface Collection
Specified by:
clear in interface List

equals

public boolean equals(Object o)
Specified by:
equals in interface Collection
Specified by:
equals in interface List
Overrides:
equals in class Object

hashCode

public int hashCode()
Specified by:
hashCode in interface Collection
Specified by:
hashCode in interface List
Overrides:
hashCode in class Object

get

public Object get(int index)
Specified by:
get in interface List

set

public Object set(int index,
                  Object element)
Specified by:
set in interface List

add

public void add(int index,
                Object o)
Specified by:
add in interface List

remove

public Object remove(int index)
Specified by:
remove in interface List

indexOf

public int indexOf(Object o)
Specified by:
indexOf in interface List

lastIndexOf

public int lastIndexOf(Object o)
Specified by:
lastIndexOf in interface List

listIterator

public ListIterator listIterator()
Specified by:
listIterator in interface List

listIterator

public ListIterator listIterator(int index)
Specified by:
listIterator in interface List

subList

public List subList(int fromIndex,
                    int toIndex)
persistent Corpus does not support this method as all the documents might no be in memory

Specified by:
subList in interface List

setDataStore

public void setDataStore(DataStore dataStore)
                  throws PersistenceException
Description copied from class: AbstractLanguageResource
Set the data store that this LR lives in.

Specified by:
setDataStore in interface LanguageResource
Overrides:
setDataStore in class AbstractLanguageResource
Throws:
PersistenceException

setTransientSource

public void setTransientSource(Object source)

getTransientSource

public Object getTransientSource()

init

public Resource init()
              throws ResourceInstantiationException
Description copied from class: AbstractResource
Initialise this resource, and return it.

Specified by:
init in interface Resource
Overrides:
init in class AbstractResource
Throws:
ResourceInstantiationException

setIndexDefinition

public void setIndexDefinition(IndexDefinition definition)
Description copied from interface: IndexedCorpus
Sets the definition to this corpus.

Specified by:
setIndexDefinition in interface IndexedCorpus
Parameters:
definition - of index for this corpus

getIndexDefinition

public IndexDefinition getIndexDefinition()
Specified by:
getIndexDefinition in interface IndexedCorpus
Returns:
IndexDefinition definition of index for this corpus.

getIndexManager

public IndexManager getIndexManager()
Specified by:
getIndexManager in interface IndexedCorpus
Returns:
IndexManager manager object for this corpus. It creates after seting of IndexDefinition by indexType property.

getIndexStatistics

public IndexStatistics getIndexStatistics()
Specified by:
getIndexStatistics in interface IndexedCorpus
Returns:
IndexStatistics statistics for this index.

GATE
Version 3.1-2270