Module topicnet.cooking_machine.models.base_score
Expand source code
import dill
from typing import (
Any,
Callable,
Dict,
)
from . import scores as tn_scores
class BaseScore:
"""
Base Class to construct custom score functions.
"""
_PRECOMPUTED_DATA_PARAMETER_NAME = 'precomputed_data'
# TODO: name should not be optional
def __init__(
self,
name: str = None,
should_compute: Callable[[int], bool] or bool = None):
"""
Parameters
----------
name
Name of the score
should_compute
Function which decides whether the score should be computed
on the current fit iteration or not.
If `should_compute` is `None`, then score is going to be computed on every iteration.
At the same time, whatever function one defines,
score is always computed on the last fit iteration.
This is done for two reasons.
Firstly, so that the score is always computed at least once during `model._fit()`.
Secondly, so that `experiment.select()` works correctly.
The parameter `should_compute` might be helpful
if the score is slow but one still needs
to get the dependence of the score on iteration
(for the described case, one may compute the score
on every even iteration or somehow else).
However, be aware that if `should_compute` is used for some model's scores,
then the scores may have different number of values in `model.scores`!
Number of score values is the number of times the scores was calculated;
first value corresponds to the first fit iteration
which passed `should_compute` etc.
There are a couple of things also worth noting.
Fit iteration numbering starts from zero.
And every new `model._fit()` call is a new range of fit iterations.
Examples
--------
Scores created below are unworkable (as BaseScore has no `call` method inplemented).
These are just the examples of how one can create a score and set some of its parameters.
Scores to be computed on every iteration:
>>> score = BaseScore()
>>> score = BaseScore(should_compute=BaseScore.compute_always)
>>> score = BaseScore(should_compute=lambda i: True)
>>> score = BaseScore(should_compute=True)
Scores to be computed only on the last iteration:
>>> score = BaseScore(should_compute=BaseScore.compute_on_last)
>>> score = BaseScore(should_compute=lambda i: False)
>>> score = BaseScore(should_compute=False)
Score to be computed only on even iterations:
>>> score = BaseScore(should_compute=lambda i: i % 2 == 0)
"""
self._name = name
if should_compute is None:
should_compute = self.compute_always
elif should_compute is True:
should_compute = self.compute_always
elif should_compute is False:
should_compute = self.compute_on_last
elif not isinstance(should_compute, type(lambda: None)):
raise TypeError(f'Unknown type of `should_compute`: {type(should_compute)}!')
else:
pass
self._should_compute = should_compute
self.value = []
if not hasattr(tn_scores, self.__class__.__name__):
setattr(tn_scores, self.__class__.__name__, self.__class__)
@staticmethod
def compute_always(fit_iteration: int) -> bool:
return True
@staticmethod
def compute_on_last(fit_iteration: int) -> bool:
return False
def __repr__(self):
return f'{self.__class__.__name__}'
def save(self, path):
with open(path, "wb") as f:
dill.dump(self, f)
@classmethod
def load(cls, path):
with open(path, "rb") as f:
score = dill.load(f)
return score
def update(self, score):
"""
Parameters
----------
score : float
score value
Returns
-------
"""
known_errors = (ValueError, TypeError)
try:
score = float(score)
except known_errors:
raise ValueError(f'Score call should return float but not {score}')
self.value.append(score)
def call(self, model, precomputed_data: Dict[str, Any] = None):
"""
Call to custom score function.
Parameters
----------
model : TopicModel
a TopicNet model inherited from BaseModel
precomputed_data
Data which scores may share between each other during *one fit iteration*.
For example, if the model has several scores of the same score class,
and there is a heavy time consuming computation inside this score class,
it may be useful to perform the calculations *only once*, for one score instance,
and then make the result visible for all other scores that might need it.
Returns
-------
float
score
Notes
-----
Higher score not necessarily should correspond to better model.
It is up to user to decide what the meaning is behind the score,
and then use this logic in query in Experiment's `select()` method.
If one need ARTM model for score (not TopicNet one), it is available as model._model
When creating a custom score class,
it is recommended to use `**kwargs` in the score's `call` method,
so that all `BaseScore` optional parameters are also available
in its successor score classes.
Examples
--------
Score which uses `precomputed_data`:
>>> import time
...
>>> class NewScore(BaseScore):
... def __init__(self, name: str, multiplier: float):
... super().__init__(name=name)
...
... self._multiplier = multiplier
... self._heavy_value_name = 'time_consuming_value_name'
...
... def call(self, model, precomputed_data = None):
... if precomputed_data is None:
... # Parameter `precomputed_data` is optional in BaseScore
... # So this case also should be supported
... heavy_value = self._compute_heavy(model)
... elif self._heavy_value_name in precomputed_data:
... # This is going to be fast
... heavy_value = precomputed_data[self._heavy_value_name]
... else:
... # This is slow (but only one such call!)
... heavy_value = self._compute_heavy(model)
... precomputed_data[self._heavy_value_name] = heavy_value
...
... return heavy_value * self._multiplier
...
... def _compute_heavy(self, model):
... time.sleep(100) # just for demonstration
...
... return 0
"""
raise NotImplementedError('Define your score here')
Classes
class BaseScore (name: str = None, should_compute: Callable[[int], bool] = None)
-
Base Class to construct custom score functions.
Parameters
name
- Name of the score
should_compute
-
Function which decides whether the score should be computed on the current fit iteration or not. If
should_compute
isNone
, then score is going to be computed on every iteration. At the same time, whatever function one defines, score is always computed on the last fit iteration. This is done for two reasons. Firstly, so that the score is always computed at least once duringmodel._fit()
. Secondly, so thatexperiment.select()
works correctly.The parameter
should_compute
might be helpful if the score is slow but one still needs to get the dependence of the score on iteration (for the described case, one may compute the score on every even iteration or somehow else). However, be aware that ifshould_compute
is used for some model's scores, then the scores may have different number of values inmodel.scores
! Number of score values is the number of times the scores was calculated; first value corresponds to the first fit iteration which passedshould_compute
etc.There are a couple of things also worth noting. Fit iteration numbering starts from zero. And every new
model._fit()
call is a new range of fit iterations.
Examples
Scores created below are unworkable (as BaseScore has no
call
method inplemented). These are just the examples of how one can create a score and set some of its parameters.Scores to be computed on every iteration:
>>> score = BaseScore() >>> score = BaseScore(should_compute=BaseScore.compute_always) >>> score = BaseScore(should_compute=lambda i: True) >>> score = BaseScore(should_compute=True)
Scores to be computed only on the last iteration:
>>> score = BaseScore(should_compute=BaseScore.compute_on_last) >>> score = BaseScore(should_compute=lambda i: False) >>> score = BaseScore(should_compute=False)
Score to be computed only on even iterations:
>>> score = BaseScore(should_compute=lambda i: i % 2 == 0)
Expand source code
class BaseScore: """ Base Class to construct custom score functions. """ _PRECOMPUTED_DATA_PARAMETER_NAME = 'precomputed_data' # TODO: name should not be optional def __init__( self, name: str = None, should_compute: Callable[[int], bool] or bool = None): """ Parameters ---------- name Name of the score should_compute Function which decides whether the score should be computed on the current fit iteration or not. If `should_compute` is `None`, then score is going to be computed on every iteration. At the same time, whatever function one defines, score is always computed on the last fit iteration. This is done for two reasons. Firstly, so that the score is always computed at least once during `model._fit()`. Secondly, so that `experiment.select()` works correctly. The parameter `should_compute` might be helpful if the score is slow but one still needs to get the dependence of the score on iteration (for the described case, one may compute the score on every even iteration or somehow else). However, be aware that if `should_compute` is used for some model's scores, then the scores may have different number of values in `model.scores`! Number of score values is the number of times the scores was calculated; first value corresponds to the first fit iteration which passed `should_compute` etc. There are a couple of things also worth noting. Fit iteration numbering starts from zero. And every new `model._fit()` call is a new range of fit iterations. Examples -------- Scores created below are unworkable (as BaseScore has no `call` method inplemented). These are just the examples of how one can create a score and set some of its parameters. Scores to be computed on every iteration: >>> score = BaseScore() >>> score = BaseScore(should_compute=BaseScore.compute_always) >>> score = BaseScore(should_compute=lambda i: True) >>> score = BaseScore(should_compute=True) Scores to be computed only on the last iteration: >>> score = BaseScore(should_compute=BaseScore.compute_on_last) >>> score = BaseScore(should_compute=lambda i: False) >>> score = BaseScore(should_compute=False) Score to be computed only on even iterations: >>> score = BaseScore(should_compute=lambda i: i % 2 == 0) """ self._name = name if should_compute is None: should_compute = self.compute_always elif should_compute is True: should_compute = self.compute_always elif should_compute is False: should_compute = self.compute_on_last elif not isinstance(should_compute, type(lambda: None)): raise TypeError(f'Unknown type of `should_compute`: {type(should_compute)}!') else: pass self._should_compute = should_compute self.value = [] if not hasattr(tn_scores, self.__class__.__name__): setattr(tn_scores, self.__class__.__name__, self.__class__) @staticmethod def compute_always(fit_iteration: int) -> bool: return True @staticmethod def compute_on_last(fit_iteration: int) -> bool: return False def __repr__(self): return f'{self.__class__.__name__}' def save(self, path): with open(path, "wb") as f: dill.dump(self, f) @classmethod def load(cls, path): with open(path, "rb") as f: score = dill.load(f) return score def update(self, score): """ Parameters ---------- score : float score value Returns ------- """ known_errors = (ValueError, TypeError) try: score = float(score) except known_errors: raise ValueError(f'Score call should return float but not {score}') self.value.append(score) def call(self, model, precomputed_data: Dict[str, Any] = None): """ Call to custom score function. Parameters ---------- model : TopicModel a TopicNet model inherited from BaseModel precomputed_data Data which scores may share between each other during *one fit iteration*. For example, if the model has several scores of the same score class, and there is a heavy time consuming computation inside this score class, it may be useful to perform the calculations *only once*, for one score instance, and then make the result visible for all other scores that might need it. Returns ------- float score Notes ----- Higher score not necessarily should correspond to better model. It is up to user to decide what the meaning is behind the score, and then use this logic in query in Experiment's `select()` method. If one need ARTM model for score (not TopicNet one), it is available as model._model When creating a custom score class, it is recommended to use `**kwargs` in the score's `call` method, so that all `BaseScore` optional parameters are also available in its successor score classes. Examples -------- Score which uses `precomputed_data`: >>> import time ... >>> class NewScore(BaseScore): ... def __init__(self, name: str, multiplier: float): ... super().__init__(name=name) ... ... self._multiplier = multiplier ... self._heavy_value_name = 'time_consuming_value_name' ... ... def call(self, model, precomputed_data = None): ... if precomputed_data is None: ... # Parameter `precomputed_data` is optional in BaseScore ... # So this case also should be supported ... heavy_value = self._compute_heavy(model) ... elif self._heavy_value_name in precomputed_data: ... # This is going to be fast ... heavy_value = precomputed_data[self._heavy_value_name] ... else: ... # This is slow (but only one such call!) ... heavy_value = self._compute_heavy(model) ... precomputed_data[self._heavy_value_name] = heavy_value ... ... return heavy_value * self._multiplier ... ... def _compute_heavy(self, model): ... time.sleep(100) # just for demonstration ... ... return 0 """ raise NotImplementedError('Define your score here')
Subclasses
Static methods
def compute_always(fit_iteration: int) ‑> bool
-
Expand source code
@staticmethod def compute_always(fit_iteration: int) -> bool: return True
def compute_on_last(fit_iteration: int) ‑> bool
-
Expand source code
@staticmethod def compute_on_last(fit_iteration: int) -> bool: return False
def load(path)
-
Expand source code
@classmethod def load(cls, path): with open(path, "rb") as f: score = dill.load(f) return score
Methods
def call(self, model, precomputed_data: Dict[str, Any] = None)
-
Call to custom score function.
Parameters
model
:TopicModel
- a TopicNet model inherited from BaseModel
precomputed_data
- Data which scores may share between each other during one fit iteration. For example, if the model has several scores of the same score class, and there is a heavy time consuming computation inside this score class, it may be useful to perform the calculations only once, for one score instance, and then make the result visible for all other scores that might need it.
Returns
float
- score
Notes
Higher score not necessarily should correspond to better model. It is up to user to decide what the meaning is behind the score, and then use this logic in query in Experiment's
select()
method.If one need ARTM model for score (not TopicNet one), it is available as model._model
When creating a custom score class, it is recommended to use
**kwargs
in the score'scall
method, so that allBaseScore
optional parameters are also available in its successor score classes.Examples
Score which uses
precomputed_data
:>>> import time ... >>> class NewScore(BaseScore): ... def __init__(self, name: str, multiplier: float): ... super().__init__(name=name) ... ... self._multiplier = multiplier ... self._heavy_value_name = 'time_consuming_value_name' ... ... def call(self, model, precomputed_data = None): ... if precomputed_data is None: ... # Parameter <code>precomputed\_data</code> is optional in BaseScore ... # So this case also should be supported ... heavy_value = self._compute_heavy(model) ... elif self._heavy_value_name in precomputed_data: ... # This is going to be fast ... heavy_value = precomputed_data[self._heavy_value_name] ... else: ... # This is slow (but only one such call!) ... heavy_value = self._compute_heavy(model) ... precomputed_data[self._heavy_value_name] = heavy_value ... ... return heavy_value * self._multiplier ... ... def _compute_heavy(self, model): ... time.sleep(100) # just for demonstration ... ... return 0
Expand source code
def call(self, model, precomputed_data: Dict[str, Any] = None): """ Call to custom score function. Parameters ---------- model : TopicModel a TopicNet model inherited from BaseModel precomputed_data Data which scores may share between each other during *one fit iteration*. For example, if the model has several scores of the same score class, and there is a heavy time consuming computation inside this score class, it may be useful to perform the calculations *only once*, for one score instance, and then make the result visible for all other scores that might need it. Returns ------- float score Notes ----- Higher score not necessarily should correspond to better model. It is up to user to decide what the meaning is behind the score, and then use this logic in query in Experiment's `select()` method. If one need ARTM model for score (not TopicNet one), it is available as model._model When creating a custom score class, it is recommended to use `**kwargs` in the score's `call` method, so that all `BaseScore` optional parameters are also available in its successor score classes. Examples -------- Score which uses `precomputed_data`: >>> import time ... >>> class NewScore(BaseScore): ... def __init__(self, name: str, multiplier: float): ... super().__init__(name=name) ... ... self._multiplier = multiplier ... self._heavy_value_name = 'time_consuming_value_name' ... ... def call(self, model, precomputed_data = None): ... if precomputed_data is None: ... # Parameter `precomputed_data` is optional in BaseScore ... # So this case also should be supported ... heavy_value = self._compute_heavy(model) ... elif self._heavy_value_name in precomputed_data: ... # This is going to be fast ... heavy_value = precomputed_data[self._heavy_value_name] ... else: ... # This is slow (but only one such call!) ... heavy_value = self._compute_heavy(model) ... precomputed_data[self._heavy_value_name] = heavy_value ... ... return heavy_value * self._multiplier ... ... def _compute_heavy(self, model): ... time.sleep(100) # just for demonstration ... ... return 0 """ raise NotImplementedError('Define your score here')
def save(self, path)
-
Expand source code
def save(self, path): with open(path, "wb") as f: dill.dump(self, f)
def update(self, score)
-
Parameters
score
:float
- score value
Returns
Expand source code
def update(self, score): """ Parameters ---------- score : float score value Returns ------- """ known_errors = (ValueError, TypeError) try: score = float(score) except known_errors: raise ValueError(f'Score call should return float but not {score}') self.value.append(score)