public class StringWordIndexer extends Object implements WordIndexer<String>
WordIndexer.StaticMethods
Constructor and Description |
---|
StringWordIndexer() |
Modifier and Type | Method and Description |
---|---|
String |
getEndSymbol()
Returns the start symbol (usually something like </s>
|
int |
getIndexPossiblyUnk(String word)
Should never add to vocabulary, and should return getUnkSymbol() if the
word is not in the vocabulary.
|
int |
getOrAddIndex(String word)
Gets the index for a word, adding if necessary.
|
int |
getOrAddIndexFromString(String word) |
String |
getStartSymbol()
Returns the start symbol (usually something like <s>
|
String |
getUnkSymbol()
Returns the unk symbol (usually something like <unk>
|
String |
getWord(int index)
Gets the word object for an index.
|
int |
numWords()
Number of words that have been added so far
|
void |
setEndSymbol(String sym) |
void |
setStartSymbol(String sym) |
void |
setUnkSymbol(String sym) |
void |
trimAndLock()
Informs the implementation that no more words can be added to the
vocabulary.
|
public int getOrAddIndex(String word)
WordIndexer
getOrAddIndex
in interface WordIndexer<String>
public String getWord(int index)
WordIndexer
getWord
in interface WordIndexer<String>
public int numWords()
WordIndexer
numWords
in interface WordIndexer<String>
public String getStartSymbol()
WordIndexer
getStartSymbol
in interface WordIndexer<String>
public String getEndSymbol()
WordIndexer
getEndSymbol
in interface WordIndexer<String>
public String getUnkSymbol()
WordIndexer
getUnkSymbol
in interface WordIndexer<String>
public int getOrAddIndexFromString(String word)
getOrAddIndexFromString
in interface WordIndexer<String>
public void setStartSymbol(String sym)
setStartSymbol
in interface WordIndexer<String>
public void setEndSymbol(String sym)
setEndSymbol
in interface WordIndexer<String>
public void setUnkSymbol(String sym)
setUnkSymbol
in interface WordIndexer<String>
public void trimAndLock()
WordIndexer
trimAndLock
in interface WordIndexer<String>
public int getIndexPossiblyUnk(String word)
WordIndexer
getIndexPossiblyUnk
in interface WordIndexer<String>