public abstract class WikipediaRevisionReaderBase extends WikipediaReaderBase
Modifier and Type | Field and Description |
---|---|
protected de.tudarmstadt.ukp.wikipedia.api.Page |
currentArticle |
protected long |
currentArticleIndex |
protected long |
currentRevisionIndex |
protected long |
nrOfArticles |
protected boolean |
outputPlainText |
protected int |
pageBuffer |
protected Iterator<de.tudarmstadt.ukp.wikipedia.api.Page> |
pageIter |
static String |
PARAM_OUTPUT_PLAIN_TEXT
Whether the reader outputs plain text or wiki markup.
|
static String |
PARAM_PAGE_BUFFER
The page buffer size (#pages) of the page iterator.
|
static String |
PARAM_PATH_TO_REVISION_ID_LIST
Defines the path to a file containing a line-separated list of revision ids of the revisions
that should be retrieved.
|
static String |
PARAM_REVISION_ID_LIST
Defines an array of revision ids of the revisions that should be retrieved.
|
protected de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser |
parser |
protected Iterator<String> |
revIdIterator |
protected de.tudarmstadt.ukp.wikipedia.revisionmachine.api.RevisionApi |
revisionApi |
protected String |
revisionIdFile |
protected String[] |
revisionIdParamArray |
protected Set<String> |
revisionIds |
protected Iterator<Timestamp> |
timestampIter |
dbconfig, PARAM_CREATE_DATABASE_CONFIG_ANNOTATION, PARAM_DB, PARAM_HOST, PARAM_LANGUAGE, PARAM_PASSWORD, PARAM_USER, wiki
Constructor and Description |
---|
WikipediaRevisionReaderBase() |
Modifier and Type | Method and Description |
---|---|
protected void |
addDocumentMetaData(org.apache.uima.jcas.JCas jcas,
int pageId,
int revisionId) |
protected void |
addRevisionAnnotation(org.apache.uima.jcas.JCas jcas,
de.tudarmstadt.ukp.wikipedia.revisionmachine.api.Revision revision) |
org.apache.uima.util.Progress[] |
getProgress() |
protected Iterator<Timestamp> |
getTimestampIter(int pageId) |
boolean |
hasNext() |
void |
initialize(org.apache.uima.UimaContext context) |
getNext
close, getLogger, getNext, initialize
destroy, getCasInitializer, getProcessingResourceMetaData, initialize, isConsuming, reconfigure, setCasInitializer, typeSystemInit
getConfigParameterValue, getConfigParameterValue, setConfigParameterValue, setConfigParameterValue
getCasManager, getMetaData, getRelativePathResolver, getResourceManager, getUimaContext, getUimaContextAdmin, setLogger, setMetaData
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
public static final String PARAM_OUTPUT_PLAIN_TEXT
protected boolean outputPlainText
public static final String PARAM_PAGE_BUFFER
protected int pageBuffer
public static final String PARAM_PATH_TO_REVISION_ID_LIST
protected String revisionIdFile
public static final String PARAM_REVISION_ID_LIST
protected String[] revisionIdParamArray
protected de.tudarmstadt.ukp.wikipedia.api.Page currentArticle
protected de.tudarmstadt.ukp.wikipedia.revisionmachine.api.RevisionApi revisionApi
protected Iterator<de.tudarmstadt.ukp.wikipedia.api.Page> pageIter
protected long currentArticleIndex
protected long currentRevisionIndex
protected long nrOfArticles
protected de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser parser
public void initialize(org.apache.uima.UimaContext context) throws org.apache.uima.resource.ResourceInitializationException
initialize
in class WikipediaReaderBase
org.apache.uima.resource.ResourceInitializationException
public boolean hasNext() throws IOException, org.apache.uima.collection.CollectionException
IOException
org.apache.uima.collection.CollectionException
public org.apache.uima.util.Progress[] getProgress()
getProgress
in interface org.apache.uima.collection.base_cpm.BaseCollectionReader
getProgress
in class WikipediaReaderBase
protected Iterator<Timestamp> getTimestampIter(int pageId) throws IOException
IOException
protected void addRevisionAnnotation(org.apache.uima.jcas.JCas jcas, de.tudarmstadt.ukp.wikipedia.revisionmachine.api.Revision revision)
protected void addDocumentMetaData(org.apache.uima.jcas.JCas jcas, int pageId, int revisionId) throws de.tudarmstadt.ukp.wikipedia.api.exception.WikiTitleParsingException, de.tudarmstadt.ukp.wikipedia.api.exception.WikiApiException
de.tudarmstadt.ukp.wikipedia.api.exception.WikiTitleParsingException
de.tudarmstadt.ukp.wikipedia.api.exception.WikiApiException
Copyright © 2007–2018 Ubiquitous Knowledge Processing (UKP) Lab, Technische Universität Darmstadt. All rights reserved.