public class WikipediaPageReader extends WikipediaStandardReaderBase
Modifier and Type | Field and Description |
---|---|
static String |
PARAM_ONLY_FIRST_PARAGRAPH
If set to true, only the first paragraph instead of the whole article is used.
|
currentArticleIndex, nrOfArticles, outputPlainText, pageBuffer, pageIdFile, pageIdParamArray, pageIter, pageNameFile, pageNameParamArray, PARAM_OUTPUT_PLAIN_TEXT, PARAM_PAGE_BUFFER, PARAM_PAGE_ID_LIST, PARAM_PAGE_TITLE_LIST, PARAM_PATH_TO_PAGE_ID_LIST, PARAM_PATH_TO_PAGE_TITLE_LIST, parser
dbconfig, PARAM_CREATE_DATABASE_CONFIG_ANNOTATION, PARAM_DB, PARAM_HOST, PARAM_LANGUAGE, PARAM_PASSWORD, PARAM_USER, wiki
Constructor and Description |
---|
WikipediaPageReader() |
Modifier and Type | Method and Description |
---|---|
protected String |
getPlainDocumentText(de.tudarmstadt.ukp.wikipedia.api.Page page) |
void |
initialize(org.apache.uima.UimaContext context) |
protected boolean |
isValidPage(de.tudarmstadt.ukp.wikipedia.api.Page page) |
getDocumentText, getNext, getPage, getProgress, hasNext
close, getLogger, getNext, initialize
destroy, getCasInitializer, getProcessingResourceMetaData, initialize, isConsuming, reconfigure, setCasInitializer, typeSystemInit
getConfigParameterValue, getConfigParameterValue, setConfigParameterValue, setConfigParameterValue
getCasManager, getMetaData, getRelativePathResolver, getResourceManager, getUimaContext, getUimaContextAdmin, setLogger, setMetaData
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
public static final String PARAM_ONLY_FIRST_PARAGRAPH
public void initialize(org.apache.uima.UimaContext context) throws org.apache.uima.resource.ResourceInitializationException
initialize
in class WikipediaStandardReaderBase
org.apache.uima.resource.ResourceInitializationException
protected String getPlainDocumentText(de.tudarmstadt.ukp.wikipedia.api.Page page)
getPlainDocumentText
in class WikipediaStandardReaderBase
protected boolean isValidPage(de.tudarmstadt.ukp.wikipedia.api.Page page) throws de.tudarmstadt.ukp.wikipedia.api.exception.WikiTitleParsingException
isValidPage
in class WikipediaStandardReaderBase
de.tudarmstadt.ukp.wikipedia.api.exception.WikiTitleParsingException
Copyright © 2007–2018 Ubiquitous Knowledge Processing (UKP) Lab, Technische Universität Darmstadt. All rights reserved.