'''
Class for all rating related things.
'''
from pyanp.prioritizer import Prioritizer, PriorityType
import re
from enum import Enum
import pandas as pd
import numpy as np
from pyanp.general import islist
__SPACE_REGEXP = re.compile('\\s+')
[docs]def clean_word(word:str)->str:
'''
Cleans a word before subjecting it to ratings lookup
:param word: The word to clean.
:return: The sanitized word
'''
word = word.strip().lower()
word = __SPACE_REGEXP.sub(string=word, repl=' ')
return word
[docs]class WordEvalType(Enum):
'''
What kind of WordEval will we use.
'''
LINEAR = 1
EXPONENTIAL = 2
MANUAL = 3
[docs]class WordEval:
'''
Information for a Word Evaluator, i.e. a function that inputs a word and
outputs a numeric value.
'''
def __init__(self, vals):
self.names_to_synonyms = vals
self.keys = list(vals.keys())
self.lookup_synonym = {}
self.base = 0.9
self.type = WordEvalType.LINEAR
self.values = {}
for key, synonyms in vals.items():
self.lookup_synonym[key] = key
for synonym in synonyms:
if isinstance(synonym, (float, int)):
# You are telling us the value of this key, not a
# a synonym
self.values[key] = synonym
else:
# This is actually a synonym
self.lookup_synonym[synonym] = key
[docs] def get_key(self, word):
'''
Find the key word for this word. A WordEval has a list of words that
represent different levels/numerical values. Those words are called
keys. In addition, each key has a list of synonyms. For instance
the keyword "high" might have a synonym "hi" or "h". In that case
get_key("hi") would return "high".
:param word: The word to look up a synonym for.
:return: The key if this word is a key or a synonym. If it is not a
synonym or key, we return None.
'''
if word in self.lookup_synonym:
return self.lookup_synonym[word]
else:
return None
[docs] def keys_match_score(self, word_list):
'''
This function tells us how well this WordEval interprets a list of
words. It is used for searhcing through the "standard list" of words
to find the best match for a data set.
:param word_list: The list-like of words to see how we can match.
:return: A score <= 1. A positive number means no missing words, i.e.
every word in word_list has a value in this WordEval object.
The larger number means our word_list uses more of the names in this
WordEval object.
'''
keys_used = set()
none_count = 0
for word in word_list:
if word is not None and isinstance(word, str) and len(word) > 0:
key = self.get_key(word)
if key is not None:
keys_used.add(key)
else:
none_count += 1
percent = len(keys_used) / len(self.keys)
rval = percent - none_count
return rval
[docs] def eval(self, word):
'''
Evaluates a word, or a pandas.Series of words.
:param word: The string word to evaluate to a number, or a pandas.Series
of data.
:return: The float value if we can evaluate, or None if a single value
is passed in. If the word was actually a pandas.Series, we return
a pandas.Series with the same index.
'''
if isinstance(word, pd.Series):
data = [self.eval(w) for w in word]
rval = pd.Series(data=data, index = word.index, dtype=float)
return rval
word = clean_word(word)
key = self.get_key(word)
if key is None:
return None
if key in self.values:
# We have it manually set
return self.values[key]
# If we make it here, we have to work the other way round
nitems = len(self.names_to_synonyms)
if self.type is WordEvalType.LINEAR:
index = self.keys.index(key)
# print(index)
if nitems <= 1:
return nitems
else:
return (nitems - 1 - index) / (nitems - 1)
elif self.type is WordEvalType.EXPONENTIAL:
index = self.keys.index(key)
if nitems <= 1:
return nitems
else:
return self.base ** index
else:
raise ValueError("Have not done manual case yet")
pass
## Does this do anything feverish?
STD_WORD_EVALUATORS = {
'hml': WordEval({
'high': ('h', 'hi'),
'medium': ('medi', 'med', 'me', 'm'),
'low': ('lowe', 'lo', 'l')
}),
'vhhmlvl':WordEval({
'very high': (
'ver high', 'vy high', 'v high', 'vhigh', 'very hi', 'very h', 'v h', 'vh'),
'high': ('hig', 'hi', 'h'),
'medium': (
'mediu', 'med', 'me', 'm', 'okay', 'ok', 'o', 'average', 'aver', 'avg'),
'low': ('lo', 'l', 'lw', 'bad', 'bd', 'not high', 'not hi', 'not h'),
'very low': ('ver low', 'vy low', 'v low', 'vlow', 'vlo', 'vl', 'v lo')
}),
'abcdf': WordEval({
'a': (),
'b': (),
'c': (),
'd': (),
'f': ('e')
}),
'egobvb': WordEval({
'excellent':('excel', 'excl', 'exc','ex', 'e', '++', 'very good', 'vy good'
'vy gd', 'vy g', 'v good', 'vgood', 'vg', 'great'),
'good':('g', 'gd'),
'okay':('ok', 'equal', '=', 'equals', 'eq'),
'bad':('b', 'bd', 'not good', 'notgood', 'not g', 'ngood', 'ng'),
'very bad':('horrible', 'horrid', 'v bad', 'vbad', 'veryb', 'verybad',
'vb', 'v b')
})
}
[docs]def best_std_word_evaluator(list_of_words, return_name=True):
'''
Finds the WordEval in STD_WORD_EVALUATOR that best matches the list of words
:param list_of_words: The list of words to look for best matches of
:param return_name: Should we return the best WordEval or its name in the
STD_WORD_EVALUATOR.
:return: The name of the best match, or the best match WordEval
'''
scores = {name:weval.keys_match_score(list_of_words) for name,weval in STD_WORD_EVALUATORS.items()}
rval = max(scores, key=scores.get)
if return_name:
return rval
else:
return STD_WORD_EVALUATORS[rval]
[docs]class Rating(Prioritizer):
'''
Represents rating a full group of alternatives for a group of users.
The data is essentially a dataframe and a WordEval object to
evaluate that to scores.
'''
def __init__(self):
self.df = pd.DataFrame()
self.word_eval = None
[docs] def is_alt(self, alt:str)->bool:
'''
Tells if the item is an alternative
:param alt: The name of the alternative to check for.
:return: True/False
'''
return alt in self.df.columns
[docs] def nusers(self)->int:
'''
The number of users in this system.
:return: The number of users
'''
return len(self.df.index)
[docs] def nalts(self)->int:
'''
:return: The number of alternatives in this system.
'''
return len(self.df.columns)
[docs] def add_alt(self, alt_name, ignore_existing=True):
'''
Adds an alternative/s, by name
:param alt_name: A str name, or a list of names to add.
:param ignore_existing: If True and we try to add an existing alternative
we simply skip by, otherwise we throw an error.
:return: Nothing
'''
if islist(alt_name):
for alt in alt_name:
self.add_alt(alt)
return
if self.is_alt(alt_name):
if ignore_existing:
# We already have an alternative like this, we were told
# to ignore this.
return None
else:
raise ValueError("Already have an alt name "+alt_name)
else:
self.df[alt_name] = [None]*self.nusers()
[docs] def add_user(self, uname):
'''
Adds one or more uses to this system.
:param uname: The str name of the user to add, or a list of str names
of users to add.
:return: Nothing
'''
if islist(uname):
for un in uname:
self.add_user(un)
return
# Add alt for singular
if uname in self.df.index:
# Already existed
return
else:
self.df.loc[uname,:] = [None] * self.nalts()
[docs] def user_names(self):
'''
:return: A list of str names of users in this system. Ordered as the
data in the ratings votes are ordered (the rows).
'''
return list(self.df.index)
[docs] def alt_names(self):
'''
:return: A list of str alternative names in this system. Ordered as the
data in the ratings votes are ordered (columns).
'''
return list(self.df.columns)
[docs] def vote_column(self, alt_name, votes, createUnknownUsers=True):
'''
Specifies all votes (across all users) for a specific alternative.
:param alt_name: The name of the alternative to set the data for
:param votes: Should either be a list with self.nusers() items, or a
pandas.Series or dict with usernames as index.
:param createUnknownUsers: If True and unknown users appear in the index
of votes, we will create those users before trying to do the
assignment.
:return: Nothing
'''
if not self.is_alt(alt_name):
raise ValueError("No such alternative "+alt_name)
if createUnknownUsers:
if isinstance(votes, pd.Series):
for uname in votes.keys():
if not self.is_user(uname):
self.add_user(uname)
self.df[alt_name] = votes
[docs] def priority(self, username=None, ptype:PriorityType=None):
'''
Calculates the alternative priority for the specified user/users and the
given normalizer type.
:param username: The name (this of names) of the user (users) to get
the overall priority of. If None, then we return the total group
average.
:param ptype: How should we normalize?
:return: A pandas.Series whose index is self.alt_names() and whose values
are the priorities.
'''
values = self.vote_values(username=username)
rval = values.mean()
for key, val in rval.iteritems():
if np.isnan(val):
rval[key]=0
if ptype is None:
return rval
else:
return ptype.apply(rval)
[docs] def vote_values(self, username=None, alt_name=None):
'''
Gets the numeric vote values for the given user/alternative (or whole
column or dataframe).
:param username: If None, we get the values for all users. If a list
get the values for each user in the list, or it could just be a single
username.
:param alt_name: Either None, meaing get it for all alternatives, or
a single alternative name (to get one column).
:return: If username=None and alt_name=None, returns a pandas.DataFrame
of the numeric values. Otherwise returns a pandas.Series of values
as the result.
'''
if username is None:
df = self.df
else:
df = self.df.loc[username,:]
if alt_name is not None:
votes = df[alt_name]
weval = self.word_eval
if weval is None:
weval = best_std_word_evaluator(votes, return_name=False)
if all([isinstance(vote, float) and np.isnan(vote) for vote in votes]):
return pd.Series(index=self.user_names())
else:
return weval.eval(votes)
else:
rval = pd.DataFrame(index=self.user_names())
for alt in self.alt_names():
rval[alt] = self.vote_values(username=username, alt_name=alt)
return rval
[docs] def is_user(self, uname:str):
'''
:param uname: The name of the user to check for
:return: True/False if the given user exists in the system.
'''
return uname in self.df.index
[docs] def set_word_eval(self, param):
'''
Sets the WordEval object
:param param: This could either be a WordEval object, or a something
that WordEval(param) would work with
:return: None
'''
if param is None or isinstance(param, WordEval):
self.word_eval=param
else:
self.word_eval=WordEval(param)