public class TeiReader extends ResourceCollectionReaderBase
Modifier and Type | Class and Description |
---|---|
protected static class |
TeiReader.Handler |
class |
TeiReader.TeiHandler |
ResourceCollectionReaderBase.Resource
Modifier and Type | Field and Description |
---|---|
protected String |
mappingPosLocation |
static String |
PARAM_OMIT_IGNORABLE_WHITESPACE
Do not write ignoreable whitespace from the XML file to the CAS.
|
static String |
PARAM_POS_MAPPING_LOCATION
Location of the mapping file for part-of-speech tags to UIMA types.
|
static String |
PARAM_POS_TAG_SET
Use this part-of-speech tag set to use to resolve the tag set mapping instead of using the
tag set defined as part of the model meta data.
|
static String |
PARAM_READ_CONSTITUENT
Write constituent annotations to the CAS.
|
static String |
PARAM_READ_LEMMA
Write lemma annotations to the CAS.
|
static String |
PARAM_READ_NAMED_ENTITY
Write named entity annotations to the CAS.
|
static String |
PARAM_READ_PARAGRAPH
Write paragraphs annotations to the CAS.
|
static String |
PARAM_READ_POS
Write part-of-speech annotations to the CAS.
|
static String |
PARAM_READ_SENTENCE
Write sentence annotations to the CAS.
|
static String |
PARAM_READ_TOKEN
Write token annotations to the CAS.
|
static String |
PARAM_USE_FILENAME_ID
When not using the XML ID, use only the filename instead of the whole URL as ID.
|
static String |
PARAM_USE_XML_ID
Use the xml:id attribute on the TEI elements as document ID.
|
static String |
PARAM_UTTERANCES_AS_SENTENCES
Interpret utterances "u" as sentenes "s".
|
protected String |
posTagset |
EXCLUDE_PREFIX, INCLUDE_PREFIX, JAR_PREFIX, KEY_RESOURCE_RESOLVER, PARAM_INCLUDE_HIDDEN, PARAM_LANGUAGE, PARAM_LOG_FREQ, PARAM_PATH, PARAM_PATTERNS, PARAM_SOURCE_LOCATION, PARAM_USE_DEFAULT_EXCLUDES
Constructor and Description |
---|
TeiReader() |
Modifier and Type | Method and Description |
---|---|
void |
getNext(org.apache.uima.cas.CAS aCAS) |
boolean |
hasNext() |
void |
initialize(org.apache.uima.UimaContext aContext) |
protected TeiReader.Handler |
newSaxHandler() |
getBase, getBase, getDefaultExcludes, getLanguage, getProgress, getResolver, getResourceIterator, getResources, getSourceLocation, initCas, initCas, isSingleLocation, locationToUrl, nextFile, scan
close, getLogger, initialize
destroy, getCasInitializer, getProcessingResourceMetaData, initialize, isConsuming, reconfigure, setCasInitializer, typeSystemInit
getConfigParameterValue, getConfigParameterValue, setConfigParameterValue, setConfigParameterValue
getCasManager, getMetaData, getRelativePathResolver, getResourceManager, getUimaContext, getUimaContextAdmin, setLogger, setMetaData
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
public static final String PARAM_READ_TOKEN
public static final String PARAM_READ_POS
public static final String PARAM_READ_LEMMA
public static final String PARAM_READ_SENTENCE
public static final String PARAM_READ_CONSTITUENT
public static final String PARAM_READ_NAMED_ENTITY
public static final String PARAM_READ_PARAGRAPH
public static final String PARAM_USE_XML_ID
public static final String PARAM_USE_FILENAME_ID
public static final String PARAM_OMIT_IGNORABLE_WHITESPACE
public static final String PARAM_POS_MAPPING_LOCATION
protected String mappingPosLocation
public static final String PARAM_POS_TAG_SET
protected String posTagset
public static final String PARAM_UTTERANCES_AS_SENTENCES
public void initialize(org.apache.uima.UimaContext aContext) throws org.apache.uima.resource.ResourceInitializationException
initialize
in class ResourceCollectionReaderBase
org.apache.uima.resource.ResourceInitializationException
public boolean hasNext() throws IOException, org.apache.uima.collection.CollectionException
hasNext
in interface org.apache.uima.collection.base_cpm.BaseCollectionReader
hasNext
in class ResourceCollectionReaderBase
IOException
org.apache.uima.collection.CollectionException
public void getNext(org.apache.uima.cas.CAS aCAS) throws IOException, org.apache.uima.collection.CollectionException
IOException
org.apache.uima.collection.CollectionException
protected TeiReader.Handler newSaxHandler()
Copyright © 2007–2018 Ubiquitous Knowledge Processing (UKP) Lab, Technische Universität Darmstadt. All rights reserved.