public abstract class SegmenterBase
extends org.apache.uima.fit.component.JCasAnnotator_ImplBase
Modifier and Type | Field and Description |
---|---|
static String |
PARAM_LANGUAGE
The language.
|
static String |
PARAM_STRICT_ZONING
Strict zoning causes the segmentation to be applied only within the
boundaries of a zone annotation.
|
static String |
PARAM_WRITE_SENTENCE
Create
Sentence annotations. |
static String |
PARAM_WRITE_TOKEN
Create
Token annotations. |
static String |
PARAM_ZONE_TYPES
A list of type names used for zoning.
|
Constructor and Description |
---|
SegmenterBase() |
Modifier and Type | Method and Description |
---|---|
protected Sentence |
createSentence(org.apache.uima.jcas.JCas aJCas,
int aBegin,
int aEnd) |
protected Token |
createToken(org.apache.uima.jcas.JCas aJCas,
int aBegin,
int aEnd) |
protected Token |
createToken(org.apache.uima.jcas.JCas aJCas,
int aBegin,
int aEnd,
int aIndex) |
String |
getLanguage(org.apache.uima.jcas.JCas aJCas) |
Locale |
getLocale(org.apache.uima.jcas.JCas aJCas)
Get the locale from the parameter, then from the document if available.
|
String[] |
getZoneTypes() |
boolean |
isEmpty(int aBegin,
int aEnd) |
boolean |
isStrictZoning() |
boolean |
isWriteSentence() |
boolean |
isWriteToken() |
protected int[] |
limit(String text,
int aBegin,
int aEnd)
Adjust the values in the two numeric arguments to be within the limits
of the specified text.
|
void |
process(org.apache.uima.jcas.JCas jcas) |
protected abstract void |
process(org.apache.uima.jcas.JCas aJCas,
String text,
int zoneBegin) |
void |
trim(String aText,
int[] aSpan)
Remove trailing or leading whitespace from the annotation.
|
boolean |
trimChar(char aChar) |
getLogger, initialize
getRequiredCasInterface, process
getCasInstancesRequired, hasNext, next
public static final String PARAM_ZONE_TYPES
public static final String PARAM_STRICT_ZONING
public static final String PARAM_LANGUAGE
public static final String PARAM_WRITE_TOKEN
Token
annotations.public static final String PARAM_WRITE_SENTENCE
Sentence
annotations.public boolean isStrictZoning()
public boolean isWriteSentence()
public boolean isWriteToken()
public String[] getZoneTypes()
public void process(org.apache.uima.jcas.JCas jcas) throws org.apache.uima.analysis_engine.AnalysisEngineProcessException
process
in class org.apache.uima.analysis_component.JCasAnnotator_ImplBase
org.apache.uima.analysis_engine.AnalysisEngineProcessException
protected int[] limit(String text, int aBegin, int aEnd)
text
- the text.aBegin
- the zone begin.aEnd
- the zone end.protected Sentence createSentence(org.apache.uima.jcas.JCas aJCas, int aBegin, int aEnd)
protected Token createToken(org.apache.uima.jcas.JCas aJCas, int aBegin, int aEnd)
protected Token createToken(org.apache.uima.jcas.JCas aJCas, int aBegin, int aEnd, int aIndex)
protected abstract void process(org.apache.uima.jcas.JCas aJCas, String text, int zoneBegin) throws org.apache.uima.analysis_engine.AnalysisEngineProcessException
org.apache.uima.analysis_engine.AnalysisEngineProcessException
public void trim(String aText, int[] aSpan)
aText
- the text.aSpan
- the offsets.public boolean isEmpty(int aBegin, int aEnd)
public boolean trimChar(char aChar)
public String getLanguage(org.apache.uima.jcas.JCas aJCas)
public Locale getLocale(org.apache.uima.jcas.JCas aJCas)
aJCas
- the JCas.Copyright © 2011–2015. All rights reserved.