Source code for pyanp.rating

'''
Class for all rating related things.
'''

from pyanp.prioritizer import Prioritizer, PriorityType

import re
from enum import Enum
import pandas as pd
import numpy as np
from pyanp.general import islist


__SPACE_REGEXP = re.compile('\\s+')


[docs]def clean_word(word:str)->str: ''' Cleans a word before subjecting it to ratings lookup :param word: The word to clean. :return: The sanitized word ''' word = word.strip().lower() word = __SPACE_REGEXP.sub(string=word, repl=' ') return word
[docs]class WordEvalType(Enum): ''' What kind of WordEval will we use. ''' LINEAR = 1 EXPONENTIAL = 2 MANUAL = 3
[docs]class WordEval: ''' Information for a Word Evaluator, i.e. a function that inputs a word and outputs a numeric value. ''' def __init__(self, vals): self.names_to_synonyms = vals self.keys = list(vals.keys()) self.lookup_synonym = {} self.base = 0.9 self.type = WordEvalType.LINEAR self.values = {} for key, synonyms in vals.items(): self.lookup_synonym[key] = key for synonym in synonyms: if isinstance(synonym, (float, int)): # You are telling us the value of this key, not a # a synonym self.values[key] = synonym else: # This is actually a synonym self.lookup_synonym[synonym] = key
[docs] def get_key(self, word): ''' Find the key word for this word. A WordEval has a list of words that represent different levels/numerical values. Those words are called keys. In addition, each key has a list of synonyms. For instance the keyword "high" might have a synonym "hi" or "h". In that case get_key("hi") would return "high". :param word: The word to look up a synonym for. :return: The key if this word is a key or a synonym. If it is not a synonym or key, we return None. ''' if word in self.lookup_synonym: return self.lookup_synonym[word] else: return None
[docs] def keys_match_score(self, word_list): ''' This function tells us how well this WordEval interprets a list of words. It is used for searhcing through the "standard list" of words to find the best match for a data set. :param word_list: The list-like of words to see how we can match. :return: A score <= 1. A positive number means no missing words, i.e. every word in word_list has a value in this WordEval object. The larger number means our word_list uses more of the names in this WordEval object. ''' keys_used = set() none_count = 0 for word in word_list: if word is not None and isinstance(word, str) and len(word) > 0: key = self.get_key(word) if key is not None: keys_used.add(key) else: none_count += 1 percent = len(keys_used) / len(self.keys) rval = percent - none_count return rval
[docs] def eval(self, word): ''' Evaluates a word, or a pandas.Series of words. :param word: The string word to evaluate to a number, or a pandas.Series of data. :return: The float value if we can evaluate, or None if a single value is passed in. If the word was actually a pandas.Series, we return a pandas.Series with the same index. ''' if isinstance(word, pd.Series): data = [self.eval(w) for w in word] rval = pd.Series(data=data, index = word.index, dtype=float) return rval word = clean_word(word) key = self.get_key(word) if key is None: return None if key in self.values: # We have it manually set return self.values[key] # If we make it here, we have to work the other way round nitems = len(self.names_to_synonyms) if self.type is WordEvalType.LINEAR: index = self.keys.index(key) # print(index) if nitems <= 1: return nitems else: return (nitems - 1 - index) / (nitems - 1) elif self.type is WordEvalType.EXPONENTIAL: index = self.keys.index(key) if nitems <= 1: return nitems else: return self.base ** index else: raise ValueError("Have not done manual case yet") pass
## Does this do anything feverish? STD_WORD_EVALUATORS = { 'hml': WordEval({ 'high': ('h', 'hi'), 'medium': ('medi', 'med', 'me', 'm'), 'low': ('lowe', 'lo', 'l') }), 'vhhmlvl':WordEval({ 'very high': ( 'ver high', 'vy high', 'v high', 'vhigh', 'very hi', 'very h', 'v h', 'vh'), 'high': ('hig', 'hi', 'h'), 'medium': ( 'mediu', 'med', 'me', 'm', 'okay', 'ok', 'o', 'average', 'aver', 'avg'), 'low': ('lo', 'l', 'lw', 'bad', 'bd', 'not high', 'not hi', 'not h'), 'very low': ('ver low', 'vy low', 'v low', 'vlow', 'vlo', 'vl', 'v lo') }), 'abcdf': WordEval({ 'a': (), 'b': (), 'c': (), 'd': (), 'f': ('e') }), 'egobvb': WordEval({ 'excellent':('excel', 'excl', 'exc','ex', 'e', '++', 'very good', 'vy good' 'vy gd', 'vy g', 'v good', 'vgood', 'vg', 'great'), 'good':('g', 'gd'), 'okay':('ok', 'equal', '=', 'equals', 'eq'), 'bad':('b', 'bd', 'not good', 'notgood', 'not g', 'ngood', 'ng'), 'very bad':('horrible', 'horrid', 'v bad', 'vbad', 'veryb', 'verybad', 'vb', 'v b') }) }
[docs]def best_std_word_evaluator(list_of_words, return_name=True): ''' Finds the WordEval in STD_WORD_EVALUATOR that best matches the list of words :param list_of_words: The list of words to look for best matches of :param return_name: Should we return the best WordEval or its name in the STD_WORD_EVALUATOR. :return: The name of the best match, or the best match WordEval ''' scores = {name:weval.keys_match_score(list_of_words) for name,weval in STD_WORD_EVALUATORS.items()} rval = max(scores, key=scores.get) if return_name: return rval else: return STD_WORD_EVALUATORS[rval]
[docs]class Rating(Prioritizer): ''' Represents rating a full group of alternatives for a group of users. The data is essentially a dataframe and a WordEval object to evaluate that to scores. ''' def __init__(self): self.df = pd.DataFrame() self.word_eval = None
[docs] def is_alt(self, alt:str)->bool: ''' Tells if the item is an alternative :param alt: The name of the alternative to check for. :return: True/False ''' return alt in self.df.columns
[docs] def nusers(self)->int: ''' The number of users in this system. :return: The number of users ''' return len(self.df.index)
[docs] def nalts(self)->int: ''' :return: The number of alternatives in this system. ''' return len(self.df.columns)
[docs] def add_alt(self, alt_name, ignore_existing=True): ''' Adds an alternative/s, by name :param alt_name: A str name, or a list of names to add. :param ignore_existing: If True and we try to add an existing alternative we simply skip by, otherwise we throw an error. :return: Nothing ''' if islist(alt_name): for alt in alt_name: self.add_alt(alt) return if self.is_alt(alt_name): if ignore_existing: # We already have an alternative like this, we were told # to ignore this. return None else: raise ValueError("Already have an alt name "+alt_name) else: self.df[alt_name] = [None]*self.nusers()
[docs] def add_user(self, uname): ''' Adds one or more uses to this system. :param uname: The str name of the user to add, or a list of str names of users to add. :return: Nothing ''' if islist(uname): for un in uname: self.add_user(un) return # Add alt for singular if uname in self.df.index: # Already existed return else: self.df.loc[uname,:] = [None] * self.nalts()
[docs] def user_names(self): ''' :return: A list of str names of users in this system. Ordered as the data in the ratings votes are ordered (the rows). ''' return list(self.df.index)
[docs] def alt_names(self): ''' :return: A list of str alternative names in this system. Ordered as the data in the ratings votes are ordered (columns). ''' return list(self.df.columns)
[docs] def vote_column(self, alt_name, votes, createUnknownUsers=True): ''' Specifies all votes (across all users) for a specific alternative. :param alt_name: The name of the alternative to set the data for :param votes: Should either be a list with self.nusers() items, or a pandas.Series or dict with usernames as index. :param createUnknownUsers: If True and unknown users appear in the index of votes, we will create those users before trying to do the assignment. :return: Nothing ''' if not self.is_alt(alt_name): raise ValueError("No such alternative "+alt_name) if createUnknownUsers: if isinstance(votes, pd.Series): for uname in votes.keys(): if not self.is_user(uname): self.add_user(uname) self.df[alt_name] = votes
[docs] def priority(self, username=None, ptype:PriorityType=None): ''' Calculates the alternative priority for the specified user/users and the given normalizer type. :param username: The name (this of names) of the user (users) to get the overall priority of. If None, then we return the total group average. :param ptype: How should we normalize? :return: A pandas.Series whose index is self.alt_names() and whose values are the priorities. ''' values = self.vote_values(username=username) rval = values.mean() for key, val in rval.iteritems(): if np.isnan(val): rval[key]=0 if ptype is None: return rval else: return ptype.apply(rval)
[docs] def vote_values(self, username=None, alt_name=None): ''' Gets the numeric vote values for the given user/alternative (or whole column or dataframe). :param username: If None, we get the values for all users. If a list get the values for each user in the list, or it could just be a single username. :param alt_name: Either None, meaing get it for all alternatives, or a single alternative name (to get one column). :return: If username=None and alt_name=None, returns a pandas.DataFrame of the numeric values. Otherwise returns a pandas.Series of values as the result. ''' if username is None: df = self.df else: df = self.df.loc[username,:] if alt_name is not None: votes = df[alt_name] weval = self.word_eval if weval is None: weval = best_std_word_evaluator(votes, return_name=False) if all([isinstance(vote, float) and np.isnan(vote) for vote in votes]): return pd.Series(index=self.user_names()) else: return weval.eval(votes) else: rval = pd.DataFrame(index=self.user_names()) for alt in self.alt_names(): rval[alt] = self.vote_values(username=username, alt_name=alt) return rval
[docs] def is_user(self, uname:str): ''' :param uname: The name of the user to check for :return: True/False if the given user exists in the system. ''' return uname in self.df.index
[docs] def set_word_eval(self, param): ''' Sets the WordEval object :param param: This could either be a WordEval object, or a something that WordEval(param) would work with :return: None ''' if param is None or isinstance(param, WordEval): self.word_eval=param else: self.word_eval=WordEval(param)