public class WikipediaLinkReader extends WikipediaStandardReaderBase
Modifier and Type | Field and Description |
---|---|
static String |
PARAM_ALLOWED_LINK_TYPES
Which types of links are allowed?
|
currentArticleIndex, nrOfArticles, outputPlainText, pageBuffer, pageIdFile, pageIdParamArray, pageIter, pageNameFile, pageNameParamArray, PARAM_OUTPUT_PLAIN_TEXT, PARAM_PAGE_BUFFER, PARAM_PAGE_ID_LIST, PARAM_PAGE_TITLE_LIST, PARAM_PATH_TO_PAGE_ID_LIST, PARAM_PATH_TO_PAGE_TITLE_LIST, parser
dbconfig, PARAM_CREATE_DATABASE_CONFIG_ANNOTATION, PARAM_DB, PARAM_HOST, PARAM_LANGUAGE, PARAM_PASSWORD, PARAM_USER, wiki
Constructor and Description |
---|
WikipediaLinkReader() |
Modifier and Type | Method and Description |
---|---|
void |
getNext(org.apache.uima.jcas.JCas jcas) |
protected String |
getPlainDocumentText(de.tudarmstadt.ukp.wikipedia.api.Page page) |
protected boolean |
isValidPage(de.tudarmstadt.ukp.wikipedia.api.Page page) |
getDocumentText, getPage, getProgress, hasNext, initialize
close, getLogger, getNext, initialize
destroy, getCasInitializer, getProcessingResourceMetaData, initialize, isConsuming, reconfigure, setCasInitializer, typeSystemInit
getConfigParameterValue, getConfigParameterValue, setConfigParameterValue, setConfigParameterValue
getCasManager, getMetaData, getResourceManager, getUimaContext, getUimaContextAdmin, setLogger, setMetaData
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
public static final String PARAM_ALLOWED_LINK_TYPES
protected boolean isValidPage(de.tudarmstadt.ukp.wikipedia.api.Page page) throws de.tudarmstadt.ukp.wikipedia.api.exception.WikiTitleParsingException
isValidPage
in class WikipediaStandardReaderBase
de.tudarmstadt.ukp.wikipedia.api.exception.WikiTitleParsingException
protected String getPlainDocumentText(de.tudarmstadt.ukp.wikipedia.api.Page page)
getPlainDocumentText
in class WikipediaStandardReaderBase
public void getNext(org.apache.uima.jcas.JCas jcas) throws IOException, org.apache.uima.collection.CollectionException
getNext
in class WikipediaStandardReaderBase
IOException
org.apache.uima.collection.CollectionException
Copyright © 2011–2015. All rights reserved.