Module topicnet.cooking_machine.cubes.regularizer_cube
Expand source code
from .base_cube import BaseCube
from ..routine import transform_complex_entity_to_dict
from ..rel_toolbox_lite import count_vocab_size, handle_regularizer
from ..models.base_regularizer import BaseRegularizer
from copy import deepcopy
class RegularizersModifierCube(BaseCube):
"""
Allows to create cubes of training and apply them to a topic model.
"""
def __init__(self, num_iter: int, regularizer_parameters,
reg_search='grid', use_relative_coefficients: bool = True, strategy=None,
tracked_score_function=None,
verbose: bool = False, separate_thread: bool = True):
"""
Initialize stage. Checks params and update internal attributes.
Parameters
----------
num_iter : int
number of iterations or method
regularizer_parameters : list[dict] or dict
regularizers params
reg_search : str
"grid", "pair", "add" or "mul".
"pair" for elementwise grid search in the case of several regularizers
"grid" for the fullgrid search in the case of several regularizers
"add" and "mul" for the ariphmetic and geometric progression
respectively for PerplexityStrategy
(Default value = "grid")
use_relative_coefficients : bool
forces the regularizer coefficient to be in relative form
i.e. normalized over collection properties
strategy : BaseStrategy
optimization approach (Default value = None)
tracked_score_function : retrieve_score_for_strategy
optimizable function for strategy (Default value = None)
verbose : bool
visualization flag (Default value = False)
separate_thread : bool
will train models inside a separate thread if True
""" # noqa: W291
super().__init__(num_iter=num_iter, action='reg_modifier',
reg_search=reg_search, strategy=strategy,
tracked_score_function=tracked_score_function, verbose=verbose,
separate_thread=separate_thread)
self._relative = use_relative_coefficients
if isinstance(regularizer_parameters, dict):
regularizer_parameters = [regularizer_parameters]
self._add_regularizers(regularizer_parameters)
def _check_all_regularizer_parameters(self, regularizer_parameters):
"""
Checks and updates params of all regularizers. Inplace.
Parameters
----------
regularizer_parameters : list of dict
"""
if len(regularizer_parameters) <= 0:
raise ValueError("There is no parameters.")
for i, one_regularizer_parameters in enumerate(regularizer_parameters):
if not isinstance(one_regularizer_parameters, dict):
wrong_type = type(one_regularizer_parameters)
raise ValueError(f"One regularizer should be dict, not {wrong_type}")
if self.reg_search == "pair":
# TODO: infinite length support
grid_size = len(regularizer_parameters[0]["tau_grid"])
for one_regularizer_parameters in regularizer_parameters:
if len(one_regularizer_parameters["tau_grid"]) != grid_size:
raise ValueError("Grid size is not the same.")
def _add_regularizers(self, all_regularizer_parameters):
"""
Parameters
----------
all_regularizer_parameters : list of dict
"""
self._check_all_regularizer_parameters(all_regularizer_parameters)
self.raw_parameters = all_regularizer_parameters
def _retrieve_object(params):
"""
Parameters
----------
params : dict
Returns
-------
"""
if "regularizer" in params:
return params["regularizer"]
else:
return {"name": params["name"]}
self.parameters = [
{
"object": _retrieve_object(params),
"field": "tau",
"values": params.get('tau_grid', [])
}
for params in all_regularizer_parameters
]
def apply(self, topic_model, one_model_parameter, dictionary=None, model_id=None):
"""
Applies regularizers and parameters to model
Parameters
----------
topic_model : TopicModel
one_model_parameter : list or tuple
dictionary : Dictionary
(Default value = None)
model_id : str
(Default value = None)
Returns
-------
TopicModel
"""
new_model = topic_model.clone(model_id)
new_model.parent_model_id = topic_model.model_id
modalities = dict()
self.data_stats = None
if self._relative:
modalities = new_model.class_ids
if not getattr(self, 'data_stats', None):
self.data_stats = count_vocab_size(dictionary, modalities)
for regularizer_data in one_model_parameter:
regularizer, field_name, params = regularizer_data
regularizer_type = str(type(regularizer))
if isinstance(regularizer, dict):
if regularizer['name'] in new_model.all_regularizers.keys():
# TODO: do we actually need to deepcopy custom regularizers?
new_regularizer = deepcopy(new_model.all_regularizers[regularizer['name']])
if regularizer['name'] in new_model.custom_regularizers:
new_model.custom_regularizers[regularizer['name']].tau = params
else:
# if this is classic regularizer, we attempt to relativize it's coefficients
new_regularizer._tau = params
handle_regularizer(
self._relative,
new_model,
new_regularizer,
self.data_stats,
)
else:
error_msg = (f"Regularizer {regularizer['name']} does not exist. "
f"Cannot be modified.")
raise ValueError(error_msg)
elif isinstance(regularizer, BaseRegularizer):
# TODO: do we actually need to deepcopy here?
new_regularizer = deepcopy(regularizer)
new_regularizer.tau = params
new_model.custom_regularizers[regularizer.name] = new_regularizer
elif 'Regularizer' in regularizer_type:
new_regularizer = deepcopy(regularizer)
new_regularizer._tau = params
handle_regularizer(
self._relative,
new_model,
new_regularizer,
self.data_stats,
)
else:
error_msg = f"Regularizer instance or name must be specified for {regularizer}."
raise ValueError(error_msg)
return new_model
def get_jsonable_from_parameters(self):
""" """
jsonable_parameters = []
for one_model_parameters in self.raw_parameters:
one_jsonable = {"tau_grid": one_model_parameters.get("tau_grid", [])}
if "regularizer" in one_model_parameters:
one_regularizer = one_model_parameters['regularizer']
if not isinstance(one_regularizer, dict):
one_regularizer = transform_complex_entity_to_dict(one_regularizer)
one_jsonable["regularizer"] = one_regularizer
else:
one_jsonable["name"] = one_model_parameters["name"]
jsonable_parameters.append(one_jsonable)
return jsonable_parameters
Classes
class RegularizersModifierCube (num_iter: int, regularizer_parameters, reg_search='grid', use_relative_coefficients: bool = True, strategy=None, tracked_score_function=None, verbose: bool = False, separate_thread: bool = True)
-
Allows to create cubes of training and apply them to a topic model.
Initialize stage. Checks params and update internal attributes.
Parameters
num_iter
:int
- number of iterations or method
regularizer_parameters
:list[dict]
ordict
- regularizers params
reg_search
:str
- "grid", "pair", "add" or "mul". "pair" for elementwise grid search in the case of several regularizers "grid" for the fullgrid search in the case of several regularizers "add" and "mul" for the ariphmetic and geometric progression respectively for PerplexityStrategy (Default value = "grid")
use_relative_coefficients
:bool
- forces the regularizer coefficient to be in relative form i.e. normalized over collection properties
strategy
:BaseStrategy
- optimization approach (Default value = None)
tracked_score_function
:retrieve_score_for_strategy
- optimizable function for strategy (Default value = None)
verbose
:bool
- visualization flag (Default value = False)
separate_thread
:bool
- will train models inside a separate thread if True
Expand source code
class RegularizersModifierCube(BaseCube): """ Allows to create cubes of training and apply them to a topic model. """ def __init__(self, num_iter: int, regularizer_parameters, reg_search='grid', use_relative_coefficients: bool = True, strategy=None, tracked_score_function=None, verbose: bool = False, separate_thread: bool = True): """ Initialize stage. Checks params and update internal attributes. Parameters ---------- num_iter : int number of iterations or method regularizer_parameters : list[dict] or dict regularizers params reg_search : str "grid", "pair", "add" or "mul". "pair" for elementwise grid search in the case of several regularizers "grid" for the fullgrid search in the case of several regularizers "add" and "mul" for the ariphmetic and geometric progression respectively for PerplexityStrategy (Default value = "grid") use_relative_coefficients : bool forces the regularizer coefficient to be in relative form i.e. normalized over collection properties strategy : BaseStrategy optimization approach (Default value = None) tracked_score_function : retrieve_score_for_strategy optimizable function for strategy (Default value = None) verbose : bool visualization flag (Default value = False) separate_thread : bool will train models inside a separate thread if True """ # noqa: W291 super().__init__(num_iter=num_iter, action='reg_modifier', reg_search=reg_search, strategy=strategy, tracked_score_function=tracked_score_function, verbose=verbose, separate_thread=separate_thread) self._relative = use_relative_coefficients if isinstance(regularizer_parameters, dict): regularizer_parameters = [regularizer_parameters] self._add_regularizers(regularizer_parameters) def _check_all_regularizer_parameters(self, regularizer_parameters): """ Checks and updates params of all regularizers. Inplace. Parameters ---------- regularizer_parameters : list of dict """ if len(regularizer_parameters) <= 0: raise ValueError("There is no parameters.") for i, one_regularizer_parameters in enumerate(regularizer_parameters): if not isinstance(one_regularizer_parameters, dict): wrong_type = type(one_regularizer_parameters) raise ValueError(f"One regularizer should be dict, not {wrong_type}") if self.reg_search == "pair": # TODO: infinite length support grid_size = len(regularizer_parameters[0]["tau_grid"]) for one_regularizer_parameters in regularizer_parameters: if len(one_regularizer_parameters["tau_grid"]) != grid_size: raise ValueError("Grid size is not the same.") def _add_regularizers(self, all_regularizer_parameters): """ Parameters ---------- all_regularizer_parameters : list of dict """ self._check_all_regularizer_parameters(all_regularizer_parameters) self.raw_parameters = all_regularizer_parameters def _retrieve_object(params): """ Parameters ---------- params : dict Returns ------- """ if "regularizer" in params: return params["regularizer"] else: return {"name": params["name"]} self.parameters = [ { "object": _retrieve_object(params), "field": "tau", "values": params.get('tau_grid', []) } for params in all_regularizer_parameters ] def apply(self, topic_model, one_model_parameter, dictionary=None, model_id=None): """ Applies regularizers and parameters to model Parameters ---------- topic_model : TopicModel one_model_parameter : list or tuple dictionary : Dictionary (Default value = None) model_id : str (Default value = None) Returns ------- TopicModel """ new_model = topic_model.clone(model_id) new_model.parent_model_id = topic_model.model_id modalities = dict() self.data_stats = None if self._relative: modalities = new_model.class_ids if not getattr(self, 'data_stats', None): self.data_stats = count_vocab_size(dictionary, modalities) for regularizer_data in one_model_parameter: regularizer, field_name, params = regularizer_data regularizer_type = str(type(regularizer)) if isinstance(regularizer, dict): if regularizer['name'] in new_model.all_regularizers.keys(): # TODO: do we actually need to deepcopy custom regularizers? new_regularizer = deepcopy(new_model.all_regularizers[regularizer['name']]) if regularizer['name'] in new_model.custom_regularizers: new_model.custom_regularizers[regularizer['name']].tau = params else: # if this is classic regularizer, we attempt to relativize it's coefficients new_regularizer._tau = params handle_regularizer( self._relative, new_model, new_regularizer, self.data_stats, ) else: error_msg = (f"Regularizer {regularizer['name']} does not exist. " f"Cannot be modified.") raise ValueError(error_msg) elif isinstance(regularizer, BaseRegularizer): # TODO: do we actually need to deepcopy here? new_regularizer = deepcopy(regularizer) new_regularizer.tau = params new_model.custom_regularizers[regularizer.name] = new_regularizer elif 'Regularizer' in regularizer_type: new_regularizer = deepcopy(regularizer) new_regularizer._tau = params handle_regularizer( self._relative, new_model, new_regularizer, self.data_stats, ) else: error_msg = f"Regularizer instance or name must be specified for {regularizer}." raise ValueError(error_msg) return new_model def get_jsonable_from_parameters(self): """ """ jsonable_parameters = [] for one_model_parameters in self.raw_parameters: one_jsonable = {"tau_grid": one_model_parameters.get("tau_grid", [])} if "regularizer" in one_model_parameters: one_regularizer = one_model_parameters['regularizer'] if not isinstance(one_regularizer, dict): one_regularizer = transform_complex_entity_to_dict(one_regularizer) one_jsonable["regularizer"] = one_regularizer else: one_jsonable["name"] = one_model_parameters["name"] jsonable_parameters.append(one_jsonable) return jsonable_parameters
Ancestors
Methods
def apply(self, topic_model, one_model_parameter, dictionary=None, model_id=None)
-
Applies regularizers and parameters to model
Parameters
topic_model
:TopicModel
one_model_parameter
:list
ortuple
dictionary
:Dictionary
- (Default value = None)
model_id
:str
- (Default value = None)
Returns
TopicModel
Expand source code
def apply(self, topic_model, one_model_parameter, dictionary=None, model_id=None): """ Applies regularizers and parameters to model Parameters ---------- topic_model : TopicModel one_model_parameter : list or tuple dictionary : Dictionary (Default value = None) model_id : str (Default value = None) Returns ------- TopicModel """ new_model = topic_model.clone(model_id) new_model.parent_model_id = topic_model.model_id modalities = dict() self.data_stats = None if self._relative: modalities = new_model.class_ids if not getattr(self, 'data_stats', None): self.data_stats = count_vocab_size(dictionary, modalities) for regularizer_data in one_model_parameter: regularizer, field_name, params = regularizer_data regularizer_type = str(type(regularizer)) if isinstance(regularizer, dict): if regularizer['name'] in new_model.all_regularizers.keys(): # TODO: do we actually need to deepcopy custom regularizers? new_regularizer = deepcopy(new_model.all_regularizers[regularizer['name']]) if regularizer['name'] in new_model.custom_regularizers: new_model.custom_regularizers[regularizer['name']].tau = params else: # if this is classic regularizer, we attempt to relativize it's coefficients new_regularizer._tau = params handle_regularizer( self._relative, new_model, new_regularizer, self.data_stats, ) else: error_msg = (f"Regularizer {regularizer['name']} does not exist. " f"Cannot be modified.") raise ValueError(error_msg) elif isinstance(regularizer, BaseRegularizer): # TODO: do we actually need to deepcopy here? new_regularizer = deepcopy(regularizer) new_regularizer.tau = params new_model.custom_regularizers[regularizer.name] = new_regularizer elif 'Regularizer' in regularizer_type: new_regularizer = deepcopy(regularizer) new_regularizer._tau = params handle_regularizer( self._relative, new_model, new_regularizer, self.data_stats, ) else: error_msg = f"Regularizer instance or name must be specified for {regularizer}." raise ValueError(error_msg) return new_model
Inherited members