public class WordNetUtilities
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static int |
errorCount |
static java.util.HashMap<java.lang.String,java.lang.String> |
mappings
POS-prefixed mappings from a new synset number to the old
one.
|
static int |
TPTPidCounter |
static boolean |
withThoughtEmotion |
protected static java.util.ArrayList<java.lang.String> |
WordNetRelations |
Constructor and Description |
---|
WordNetUtilities() |
Modifier and Type | Method and Description |
---|---|
static java.util.HashMap<java.lang.String,java.util.HashSet<java.lang.String>> |
collapseSenses() |
static void |
commentSentiment(java.lang.String fileWithPath) |
static java.util.ArrayList<java.lang.String> |
convertTermList(java.lang.String termList)
Convert a list of Terms in the format "&%term1 &%term2" to an ArrayList
of bare term Strings
|
static java.lang.String |
convertWordNetPointer(java.lang.String ptr) |
static void |
deduceMissingLinks()
Use the WordNet hyper-/hypo-nym links to deduce a likely link
for a SUMO term that has not yet been manually linked.
|
static void |
extractMeronyms()
A utility to extract meronym relations as relations between
SUMO terms.
|
static java.util.HashSet<java.lang.String> |
findLeaves(java.lang.String rel)
Find all the leaf nodes for a particular relation in WordNet.
|
static java.util.HashSet<java.lang.String> |
findLeavesInTree(java.util.HashSet<java.lang.String> rels)
Find all the leaf nodes for a particular relation in WordNet.
|
static java.util.ArrayList<java.util.ArrayList<java.lang.String>> |
findPathsToRoot(java.util.ArrayList<java.lang.String> base,
java.lang.String synset)
Find the complete path from a given synset.
|
static java.lang.String |
formatWords(java.util.TreeMap<java.lang.String,java.lang.String> words,
java.lang.String kbName)
HTML format a TreeMap of word senses and their associated synset
|
static java.lang.String |
formatWordsList(java.util.TreeMap<java.lang.String,java.util.ArrayList<java.lang.String>> words,
java.lang.String kbName)
HTML format a TreeMap of ArrayLists word senses
|
static void |
generateHyponymSets(java.lang.String filename)
Generate sets of all hyponymous words for each synset in a file
|
static void |
generateSUMOfromWN()
Generate notional SUMO terms from WordNet.
|
static void |
generateSUMOfromWN(java.lang.String synset,
java.lang.String sumo)
Generate notional SUMO terms from WordNet
|
static void |
generateSUMOfromWNsubtree(java.lang.String synset,
java.lang.String sumo)
Generate notional SUMO terms from WordNet
|
static java.util.HashSet<java.lang.String> |
getAllHyponyms(java.lang.String s) |
static java.util.HashSet<java.lang.String> |
getAllHyponymsTransitive(java.lang.String s) |
static java.lang.String |
getBareSUMOTerm(java.lang.String term)
Get a SUMO term minus its &% prefix and one character mapping
suffix.
|
static java.lang.String |
getKeyFromSense(java.lang.String synset)
Get the word_POS_num sense key corresponding to a 9 digit synset.
|
static java.lang.String |
getPOSfromKey(java.lang.String senseKey)
Extract the POS from a word_POS_num sense key.
|
static java.lang.String |
getSenseFromKey(java.lang.String senseKey)
Extract the synset corresponding to a word_POS_num sense key.
|
static char |
getSUMOMappingSuffix(java.lang.String term)
Get a SUMO term mapping suffix.
|
static java.util.ArrayList<java.lang.String> |
getSynsetsFromSUMO(java.lang.String sumo)
get all synsets corresponding to a SUMO term
|
static java.util.HashSet<java.lang.String> |
getSynsetsFromSUMOList(java.util.Collection<java.lang.String> sumo)
get all synsets corresponding to a list of SUMO terms
|
static java.lang.String |
getWordFromKey(java.lang.String senseKey)
Extract the word from a word_POS_num sense key.
|
static java.util.HashSet<java.lang.String> |
getWordsFromSynsetList(java.util.Collection<java.lang.String> synsets)
get all words corresponding to a list of synsets
|
void |
imageNetLinks()
Import links from www.image-net.org that are linked to
WordNet and links them to SUMO terms when the synset has a
directly equivalent SUMO term
|
static boolean |
isHyponymousWord(java.lang.String word,
java.util.HashSet<java.lang.String> synsets) |
static boolean |
isValidKey(java.lang.String senseKey)
Check whether a sense key format is valid
|
static boolean |
isValidSynset8(java.lang.String synset)
Check whether a synset format is valid
|
static boolean |
isValidSynset9(java.lang.String synset)
Check whether a synset format is valid
|
static java.lang.String |
lowestCommonParent(java.lang.String s1,
java.lang.String s2) |
static void |
main(java.lang.String[] args)
A main method, used only for testing.
|
static java.lang.String |
mappingCharToName(char mappingType) |
static void |
mergeUpdates()
Read in a file with a nine-digit synset number followed by a space
and a SUMO term.
|
static int |
numSynsets(char pos) |
static java.lang.String |
posLettersToNumber(java.lang.String pos)
Convert a part of speech number to the two letter format used by
the WordNet sense index code.
|
static char |
posLetterToNumber(char POS) |
static char |
posNumberToLetter(char POS) |
static java.lang.String |
posNumberToLetters(java.lang.String pos)
Convert a part of speech number to the two letter format used by
the WordNet sense index code.
|
static char |
posPennToNumber(java.lang.String penn) |
static java.lang.String |
printStatistics() |
static void |
processMissingLinks(java.lang.String fileName,
java.lang.String pattern,
java.lang.String posNum)
This is a utility routine that should not be called during
normal Sigma operation.
|
static void |
readWNversionMap(java.lang.String fileName,
java.lang.String pattern,
java.lang.String posNum)
Read the version mapping files and store in the HashMap
called "mappings".
|
static java.lang.String |
removeTermPrefixes(java.lang.String formula) |
static java.lang.String |
rootFormOf(java.lang.String word) |
static void |
searchCoherence(java.lang.String fileWithPath)
Take a file of
|
static int |
sensePOS(java.lang.String sense)
Take a WordNet sense identifier, and return the integer part of
speech code.
|
static boolean |
sensoryOrMentalWord(java.lang.String word)
test if a word is sensory or mental and return true if so
|
static java.util.HashMap<java.lang.String,java.util.HashSet<java.lang.String>> |
sensoryWords()
Find all words associated with sensory, psychological and
emotional concepts.
|
static void |
showAllLeaves() |
static void |
showAllRoots() |
static java.lang.String |
subst(java.lang.String result,
java.lang.String match,
java.lang.String subst)
A utility function that mimics the functionality of the perl
substitution feature (s/match/replacement/).
|
static boolean |
substTest(java.lang.String result,
java.lang.String match,
java.lang.String subst,
java.util.HashMap<java.lang.String,java.util.HashSet<java.lang.String>> hash)
A utility function that mimics the functionality of the perl
substitution feature (s/match/replacement/) but rather than
returning the result of the substitution, just tests whether the
result is a key in a hashtable.
|
static void |
synesthesiaCompare(java.util.HashMap<java.lang.String,java.util.HashSet<java.lang.String>> words,
java.util.HashSet<java.lang.String> synwords)
Compare Lievers list of synesthetic words with those derived from
SUMO-WordNet
|
static java.util.HashSet<java.lang.String> |
synestheticSynsets(java.util.HashMap<java.lang.String,java.util.HashSet<java.lang.String>> words)
Find all the words that exhibit links to multiple sensory modes
in SUMO
|
static java.lang.String |
synsetFromOntoNotes(java.lang.String onKey)
Extract the nine digit synset ID corresponding to a word-POS.num sense key.
|
static java.lang.String |
synsetToOneWord(java.lang.String s) |
static void |
testCommonParent()
A method used only for testing.
|
static void |
testIsValidKey()
A method used only for testing.
|
static void |
testSynesthesia() |
static void |
testWord() |
static void |
updateWNversion(java.lang.String path,
java.lang.String versionPair)
Port the mappings from one version of WordNet to another.
|
static void |
updateWNversionProcess(java.lang.String fileName,
java.lang.String pattern,
java.lang.String posNum)
This is a utility routine that should not be called during
normal Sigma operation.
|
static void |
updateWNversionReading(java.lang.String path,
java.lang.String versionPair)
Note that the "old" synset should be the second element of each line
|
static java.lang.String |
verbPlural(java.lang.String verb)
Return the plural form of the verb.
|
static java.util.HashSet<java.lang.String> |
wordsToSynsets(java.lang.String word) |
static void |
writeTPTPWordNet(java.io.PrintWriter pw)
Write TPTP format for WordNet
|
public static java.util.HashMap<java.lang.String,java.lang.String> mappings
public static int TPTPidCounter
public static int errorCount
protected static java.util.ArrayList<java.lang.String> WordNetRelations
public static boolean withThoughtEmotion
public static java.lang.String getBareSUMOTerm(java.lang.String term)
public static boolean isValidSynset8(java.lang.String synset)
public static boolean isValidSynset9(java.lang.String synset)
public static boolean isValidKey(java.lang.String senseKey)
public static java.lang.String getPOSfromKey(java.lang.String senseKey)
public static java.lang.String getWordFromKey(java.lang.String senseKey)
public static java.lang.String getSenseFromKey(java.lang.String senseKey)
public static java.lang.String getKeyFromSense(java.lang.String synset)
public static java.lang.String synsetFromOntoNotes(java.lang.String onKey)
public static java.lang.String removeTermPrefixes(java.lang.String formula)
public static java.util.ArrayList<java.lang.String> convertTermList(java.lang.String termList)
public static char getSUMOMappingSuffix(java.lang.String term)
public static java.lang.String convertWordNetPointer(java.lang.String ptr)
public static char posLetterToNumber(char POS)
public static char posNumberToLetter(char POS)
public static char posPennToNumber(java.lang.String penn)
public static java.lang.String posNumberToLetters(java.lang.String pos)
public static java.lang.String posLettersToNumber(java.lang.String pos)
public static int sensePOS(java.lang.String sense)
public static java.lang.String mappingCharToName(char mappingType)
public static java.lang.String subst(java.lang.String result, java.lang.String match, java.lang.String subst)
result
- is the string on which the substitution is performed.match
- is the substring to be found and replaced.subst
- is the string replacement for match.public static boolean substTest(java.lang.String result, java.lang.String match, java.lang.String subst, java.util.HashMap<java.lang.String,java.util.HashSet<java.lang.String>> hash)
result
- is the string on which the substitution is performed.match
- is the substring to be found and replaced.subst
- is the string replacement for match.hash
- is a hashtable to be checked against the result.public static java.lang.String verbPlural(java.lang.String verb)
public static java.lang.String formatWords(java.util.TreeMap<java.lang.String,java.lang.String> words, java.lang.String kbName)
public static java.lang.String formatWordsList(java.util.TreeMap<java.lang.String,java.util.ArrayList<java.lang.String>> words, java.lang.String kbName)
public static void mergeUpdates() throws java.io.IOException
java.io.IOException
public static void processMissingLinks(java.lang.String fileName, java.lang.String pattern, java.lang.String posNum) throws java.io.IOException
java.io.IOException
public static void deduceMissingLinks() throws java.io.IOException
java.io.IOException
public static void updateWNversionProcess(java.lang.String fileName, java.lang.String pattern, java.lang.String posNum) throws java.io.IOException
java.io.IOException
public static void readWNversionMap(java.lang.String fileName, java.lang.String pattern, java.lang.String posNum) throws java.io.IOException
java.io.IOException
public static void updateWNversionReading(java.lang.String path, java.lang.String versionPair) throws java.io.IOException
java.io.IOException
public static void updateWNversion(java.lang.String path, java.lang.String versionPair) throws java.io.IOException
java.io.IOException
public static int numSynsets(char pos)
public static java.lang.String printStatistics()
public void imageNetLinks() throws java.io.IOException
java.io.IOException
public static void extractMeronyms()
public static void searchCoherence(java.lang.String fileWithPath)
public static void commentSentiment(java.lang.String fileWithPath)
public static void writeTPTPWordNet(java.io.PrintWriter pw) throws java.io.IOException
java.io.IOException
public static java.util.HashSet<java.lang.String> findLeavesInTree(java.util.HashSet<java.lang.String> rels)
public static java.util.ArrayList<java.util.ArrayList<java.lang.String>> findPathsToRoot(java.util.ArrayList<java.lang.String> base, java.lang.String synset)
public static java.lang.String lowestCommonParent(java.lang.String s1, java.lang.String s2)
public static java.util.HashSet<java.lang.String> findLeaves(java.lang.String rel)
public static void showAllLeaves()
public static void showAllRoots()
public static java.util.HashSet<java.lang.String> wordsToSynsets(java.lang.String word)
public static java.lang.String synsetToOneWord(java.lang.String s)
public static java.util.HashMap<java.lang.String,java.util.HashSet<java.lang.String>> collapseSenses()
public static java.util.HashSet<java.lang.String> getAllHyponyms(java.lang.String s)
public static java.util.HashSet<java.lang.String> getAllHyponymsTransitive(java.lang.String s)
public static boolean isHyponymousWord(java.lang.String word, java.util.HashSet<java.lang.String> synsets)
public static void generateHyponymSets(java.lang.String filename)
public static void generateSUMOfromWNsubtree(java.lang.String synset, java.lang.String sumo)
public static void generateSUMOfromWN(java.lang.String synset, java.lang.String sumo)
public static void generateSUMOfromWN()
public static java.util.ArrayList<java.lang.String> getSynsetsFromSUMO(java.lang.String sumo)
public static java.util.HashSet<java.lang.String> getSynsetsFromSUMOList(java.util.Collection<java.lang.String> sumo)
public static java.util.HashSet<java.lang.String> getWordsFromSynsetList(java.util.Collection<java.lang.String> synsets)
public static java.lang.String rootFormOf(java.lang.String word)
public static java.util.HashMap<java.lang.String,java.util.HashSet<java.lang.String>> sensoryWords()
public static java.util.HashSet<java.lang.String> synestheticSynsets(java.util.HashMap<java.lang.String,java.util.HashSet<java.lang.String>> words)
public static void synesthesiaCompare(java.util.HashMap<java.lang.String,java.util.HashSet<java.lang.String>> words, java.util.HashSet<java.lang.String> synwords)
public static void testCommonParent()
public static void testIsValidKey()
public static boolean sensoryOrMentalWord(java.lang.String word)
public static void testWord()
public static void testSynesthesia()
public static void main(java.lang.String[] args)