Module topicnet.cooking_machine.models.base_model

Expand source code
import json
import os
from copy import deepcopy
from numbers import Number

from ..routine import get_timestamp_in_str_format
from ..routine import transform_topic_model_description_to_jsonable

MODEL_NAME_LENGTH = 26


def padd_model_name(model_id):
    padding = MODEL_NAME_LENGTH - len(model_id)

    if padding > 0:
        add = padding // 2
        odd = padding % 2
        return '-' * add + model_id + '-' * (add + odd)
    else:
        return model_id[-MODEL_NAME_LENGTH:]  # so as not to cut off the suffix "___n"


class BaseModel(object):
    def __init__(self, model_id=None, parent_model_id=None, experiment=None, *args, **kwargs):
        """
        Initialize stage, also used for loading previously saved experiments.

        Parameters
        ----------
        model_id : str
            model id (Default value = None)
        parent_model_id : str
            model id from which current model was created (Default value = None)
        experiment : Experiment
            the experiment to which the model is bound (Default value = None)

        """
        self._parent_model_id = parent_model_id
        self.experiment = experiment

        # set unique model_id in the experiment
        if self.experiment is None:
            if model_id is None:
                self.set_model_id_as_timestamp()
            else:
                self.model_id = padd_model_name(model_id)
        else:
            experiment_save_path = getattr(experiment, 'save_path', None)
            experiment_id = getattr(experiment, 'experiment_id', None)
            save_folder = os.path.join(experiment_save_path, experiment_id)
            if model_id is None:
                candidate_name = get_timestamp_in_str_format()
            else:
                candidate_name = model_id

            model_index = 0
            index_suffix_length = 5
            new_model_id = padd_model_name(candidate_name)
            new_model_save_path = os.path.join(save_folder, new_model_id)

            while os.path.exists(new_model_save_path):
                model_index += 1
                new_model_id = padd_model_name(
                    f"{0}{1:_>{2}}".format(
                        candidate_name[:-index_suffix_length], model_index, index_suffix_length
                    )
                )
                new_model_save_path = os.path.join(save_folder, new_model_id)

            self.model_id = new_model_id

        self._description = []
        self._scores = dict()
        self._score_functions = dict()
        self._custom_scores = []

    def __repr__(self):
        if self.experiment is not None:
            experiment_id = self.experiment.experiment_id
        else:
            experiment_id = None

        return f'{self.__class__.__name__}(id={self.model_id}, ' \
               f'parent_id={self.parent_model_id}, ' \
               f'experiment_id={experiment_id}' \
               f')'

    def _fit(self, dataset_trainable, num_iterations):
        """
        Fitting stage.

        Parameters
        ----------
        dataset_trainable : optional
            TODO: describe after dataset implementation
        num_iterations : int
            number of iteration for fitting.

        """
        raise NotImplementedError

    def get_phi(self, *args, **kwargs):
        """ """
        raise NotImplementedError

    def get_theta(self, dataset=None, *args, **kwargs):
        """

        Parameters
        ----------
        dataset : Dataset
             (Default value = None)

        """
        raise NotImplementedError

    def save(self, path, *args, **kwargs):
        """

        Parameters
        ----------
        path : str

        """
        raise NotImplementedError

    @staticmethod
    def load(path, *args, **kwargs):
        """

        Parameters
        ----------
        path : str

        """
        raise NotImplementedError

    def clone(self):
        """ """
        return deepcopy(self)

    def get_jsonable_from_parameters(self):
        """ """
        raise NotImplementedError

    @property
    def score_functions(self):
        """ """
        return self._score_functions

    @property
    def scores(self):
        """ """
        return self._scores

    def add_cube(self, cube):
        """
        Adds cube to the model.

        Parameters
        ----------
        cube : dict
            training cube params.

        """
        self.description.append(cube)
        self.save_parameters()

    @property
    def depth(self):
        """
        Returns depth of the model.

        """
        return len(self.description)

    @property
    def description(self):
        """ """
        return self._description

    @property
    def parent_model_id(self):
        """ """
        return self._parent_model_id

    @parent_model_id.setter
    def parent_model_id(self, new_id):
        """
        Returns parent model id.

        Parameters
        ----------
        new_id : str

        """
        if self._check_is_model_id_in_experiment(new_id):
            self._parent_model_id = new_id
        else:
            raise ValueError(f'Model with id: {new_id} does not exist.')

    def save_parameters(self, model_save_path=None):
        """
        Saves params of the model.

        """
        if model_save_path is None:
            model_save_path = self.model_default_save_path
        if not os.path.exists(model_save_path):
            os.makedirs(model_save_path)
        parameters = self.get_parameters()
        json.dump(parameters, open(f"{model_save_path}/params.json", "w"),
                  default=transform_topic_model_description_to_jsonable)

    def get_parameters(self):
        """
        Gets all params of the model.

        Returns
        -------
        dict
            parameters of the model

        """
        parameters = {
            "model_id": self.model_id,
            "init_parameters": self.get_init_parameters(),
            "parent_model_id": self.parent_model_id,
            "data_path": self.data_path,
            "description": self.description,
            "depth": self.depth,
            "scores": self._get_short_scores()
        }
        if self.experiment is None:
            parameters["experiment_id"] = None
        else:
            parameters["experiment_id"] = self.experiment.experiment_id

        return parameters

    def _get_short_scores(self):
        short_scores = {}

        # sometimes self.scores could be None
        for score_name in self.scores or {}:
            values = self.scores[score_name]

            if len(values) == 0:
                short_scores[score_name] = []
                continue

            short_scores[score_name] = [
                v if isinstance(v, Number) else f"NaN ({type(v)})"
                for v in values
            ]

        return short_scores

    @property
    def model_default_save_path(self):
        """ """
        # Experiment may be None. If so, AttributeError is raised
        # __getattr__ catches it in case of TopicModel and redirects to artm_model
        experiment_save_path = getattr(self.experiment, 'save_path', None)
        experiment_id = getattr(self.experiment, 'experiment_id', None)

        assert self.model_id is not None

        path_components = [
            experiment_save_path,
            experiment_id,
            self.model_id
        ]

        path_possible = all(path_components)

        if path_possible:
            path_to_save = os.path.join(*path_components)
        else:
            path_to_save = self.model_id

        return path_to_save

    @property
    def model_id(self):
        """ """
        return self._model_id

    @model_id.setter
    def model_id(self, new_id):
        """

        Parameters
        ----------
        new_id : str

        """
        if self._check_is_model_id_in_experiment(new_id):
            raise ValueError(f'Model with id: {new_id} already exists.')
        else:
            self._model_id = new_id

    def set_model_id_as_timestamp(self):
        """ """
        self._model_id = padd_model_name(get_timestamp_in_str_format())

    def _check_is_model_id_in_experiment(self, model_id):
        """

        Parameters
        ----------
        model_id : str

        """
        if self.experiment is None:
            return False
        if model_id in self.experiment.models_info.keys():
            return True
        return False

Functions

def padd_model_name(model_id)
Expand source code
def padd_model_name(model_id):
    padding = MODEL_NAME_LENGTH - len(model_id)

    if padding > 0:
        add = padding // 2
        odd = padding % 2
        return '-' * add + model_id + '-' * (add + odd)
    else:
        return model_id[-MODEL_NAME_LENGTH:]  # so as not to cut off the suffix "___n"

Classes

class BaseModel (model_id=None, parent_model_id=None, experiment=None, *args, **kwargs)

Initialize stage, also used for loading previously saved experiments.

Parameters

model_id : str
model id (Default value = None)
parent_model_id : str
model id from which current model was created (Default value = None)
experiment : Experiment
the experiment to which the model is bound (Default value = None)
Expand source code
class BaseModel(object):
    def __init__(self, model_id=None, parent_model_id=None, experiment=None, *args, **kwargs):
        """
        Initialize stage, also used for loading previously saved experiments.

        Parameters
        ----------
        model_id : str
            model id (Default value = None)
        parent_model_id : str
            model id from which current model was created (Default value = None)
        experiment : Experiment
            the experiment to which the model is bound (Default value = None)

        """
        self._parent_model_id = parent_model_id
        self.experiment = experiment

        # set unique model_id in the experiment
        if self.experiment is None:
            if model_id is None:
                self.set_model_id_as_timestamp()
            else:
                self.model_id = padd_model_name(model_id)
        else:
            experiment_save_path = getattr(experiment, 'save_path', None)
            experiment_id = getattr(experiment, 'experiment_id', None)
            save_folder = os.path.join(experiment_save_path, experiment_id)
            if model_id is None:
                candidate_name = get_timestamp_in_str_format()
            else:
                candidate_name = model_id

            model_index = 0
            index_suffix_length = 5
            new_model_id = padd_model_name(candidate_name)
            new_model_save_path = os.path.join(save_folder, new_model_id)

            while os.path.exists(new_model_save_path):
                model_index += 1
                new_model_id = padd_model_name(
                    f"{0}{1:_>{2}}".format(
                        candidate_name[:-index_suffix_length], model_index, index_suffix_length
                    )
                )
                new_model_save_path = os.path.join(save_folder, new_model_id)

            self.model_id = new_model_id

        self._description = []
        self._scores = dict()
        self._score_functions = dict()
        self._custom_scores = []

    def __repr__(self):
        if self.experiment is not None:
            experiment_id = self.experiment.experiment_id
        else:
            experiment_id = None

        return f'{self.__class__.__name__}(id={self.model_id}, ' \
               f'parent_id={self.parent_model_id}, ' \
               f'experiment_id={experiment_id}' \
               f')'

    def _fit(self, dataset_trainable, num_iterations):
        """
        Fitting stage.

        Parameters
        ----------
        dataset_trainable : optional
            TODO: describe after dataset implementation
        num_iterations : int
            number of iteration for fitting.

        """
        raise NotImplementedError

    def get_phi(self, *args, **kwargs):
        """ """
        raise NotImplementedError

    def get_theta(self, dataset=None, *args, **kwargs):
        """

        Parameters
        ----------
        dataset : Dataset
             (Default value = None)

        """
        raise NotImplementedError

    def save(self, path, *args, **kwargs):
        """

        Parameters
        ----------
        path : str

        """
        raise NotImplementedError

    @staticmethod
    def load(path, *args, **kwargs):
        """

        Parameters
        ----------
        path : str

        """
        raise NotImplementedError

    def clone(self):
        """ """
        return deepcopy(self)

    def get_jsonable_from_parameters(self):
        """ """
        raise NotImplementedError

    @property
    def score_functions(self):
        """ """
        return self._score_functions

    @property
    def scores(self):
        """ """
        return self._scores

    def add_cube(self, cube):
        """
        Adds cube to the model.

        Parameters
        ----------
        cube : dict
            training cube params.

        """
        self.description.append(cube)
        self.save_parameters()

    @property
    def depth(self):
        """
        Returns depth of the model.

        """
        return len(self.description)

    @property
    def description(self):
        """ """
        return self._description

    @property
    def parent_model_id(self):
        """ """
        return self._parent_model_id

    @parent_model_id.setter
    def parent_model_id(self, new_id):
        """
        Returns parent model id.

        Parameters
        ----------
        new_id : str

        """
        if self._check_is_model_id_in_experiment(new_id):
            self._parent_model_id = new_id
        else:
            raise ValueError(f'Model with id: {new_id} does not exist.')

    def save_parameters(self, model_save_path=None):
        """
        Saves params of the model.

        """
        if model_save_path is None:
            model_save_path = self.model_default_save_path
        if not os.path.exists(model_save_path):
            os.makedirs(model_save_path)
        parameters = self.get_parameters()
        json.dump(parameters, open(f"{model_save_path}/params.json", "w"),
                  default=transform_topic_model_description_to_jsonable)

    def get_parameters(self):
        """
        Gets all params of the model.

        Returns
        -------
        dict
            parameters of the model

        """
        parameters = {
            "model_id": self.model_id,
            "init_parameters": self.get_init_parameters(),
            "parent_model_id": self.parent_model_id,
            "data_path": self.data_path,
            "description": self.description,
            "depth": self.depth,
            "scores": self._get_short_scores()
        }
        if self.experiment is None:
            parameters["experiment_id"] = None
        else:
            parameters["experiment_id"] = self.experiment.experiment_id

        return parameters

    def _get_short_scores(self):
        short_scores = {}

        # sometimes self.scores could be None
        for score_name in self.scores or {}:
            values = self.scores[score_name]

            if len(values) == 0:
                short_scores[score_name] = []
                continue

            short_scores[score_name] = [
                v if isinstance(v, Number) else f"NaN ({type(v)})"
                for v in values
            ]

        return short_scores

    @property
    def model_default_save_path(self):
        """ """
        # Experiment may be None. If so, AttributeError is raised
        # __getattr__ catches it in case of TopicModel and redirects to artm_model
        experiment_save_path = getattr(self.experiment, 'save_path', None)
        experiment_id = getattr(self.experiment, 'experiment_id', None)

        assert self.model_id is not None

        path_components = [
            experiment_save_path,
            experiment_id,
            self.model_id
        ]

        path_possible = all(path_components)

        if path_possible:
            path_to_save = os.path.join(*path_components)
        else:
            path_to_save = self.model_id

        return path_to_save

    @property
    def model_id(self):
        """ """
        return self._model_id

    @model_id.setter
    def model_id(self, new_id):
        """

        Parameters
        ----------
        new_id : str

        """
        if self._check_is_model_id_in_experiment(new_id):
            raise ValueError(f'Model with id: {new_id} already exists.')
        else:
            self._model_id = new_id

    def set_model_id_as_timestamp(self):
        """ """
        self._model_id = padd_model_name(get_timestamp_in_str_format())

    def _check_is_model_id_in_experiment(self, model_id):
        """

        Parameters
        ----------
        model_id : str

        """
        if self.experiment is None:
            return False
        if model_id in self.experiment.models_info.keys():
            return True
        return False

Subclasses

Static methods

def load(path, *args, **kwargs)

Parameters

path : str
 
Expand source code
@staticmethod
def load(path, *args, **kwargs):
    """

    Parameters
    ----------
    path : str

    """
    raise NotImplementedError

Instance variables

var depth

Returns depth of the model.

Expand source code
@property
def depth(self):
    """
    Returns depth of the model.

    """
    return len(self.description)
var description
Expand source code
@property
def description(self):
    """ """
    return self._description
var model_default_save_path
Expand source code
@property
def model_default_save_path(self):
    """ """
    # Experiment may be None. If so, AttributeError is raised
    # __getattr__ catches it in case of TopicModel and redirects to artm_model
    experiment_save_path = getattr(self.experiment, 'save_path', None)
    experiment_id = getattr(self.experiment, 'experiment_id', None)

    assert self.model_id is not None

    path_components = [
        experiment_save_path,
        experiment_id,
        self.model_id
    ]

    path_possible = all(path_components)

    if path_possible:
        path_to_save = os.path.join(*path_components)
    else:
        path_to_save = self.model_id

    return path_to_save
var model_id
Expand source code
@property
def model_id(self):
    """ """
    return self._model_id
var parent_model_id
Expand source code
@property
def parent_model_id(self):
    """ """
    return self._parent_model_id
var score_functions
Expand source code
@property
def score_functions(self):
    """ """
    return self._score_functions
var scores
Expand source code
@property
def scores(self):
    """ """
    return self._scores

Methods

def add_cube(self, cube)

Adds cube to the model.

Parameters

cube : dict
training cube params.
Expand source code
def add_cube(self, cube):
    """
    Adds cube to the model.

    Parameters
    ----------
    cube : dict
        training cube params.

    """
    self.description.append(cube)
    self.save_parameters()
def clone(self)
Expand source code
def clone(self):
    """ """
    return deepcopy(self)
def get_jsonable_from_parameters(self)
Expand source code
def get_jsonable_from_parameters(self):
    """ """
    raise NotImplementedError
def get_parameters(self)

Gets all params of the model.

Returns

dict
parameters of the model
Expand source code
def get_parameters(self):
    """
    Gets all params of the model.

    Returns
    -------
    dict
        parameters of the model

    """
    parameters = {
        "model_id": self.model_id,
        "init_parameters": self.get_init_parameters(),
        "parent_model_id": self.parent_model_id,
        "data_path": self.data_path,
        "description": self.description,
        "depth": self.depth,
        "scores": self._get_short_scores()
    }
    if self.experiment is None:
        parameters["experiment_id"] = None
    else:
        parameters["experiment_id"] = self.experiment.experiment_id

    return parameters
def get_phi(self, *args, **kwargs)
Expand source code
def get_phi(self, *args, **kwargs):
    """ """
    raise NotImplementedError
def get_theta(self, dataset=None, *args, **kwargs)

Parameters

dataset : Dataset
(Default value = None)
Expand source code
def get_theta(self, dataset=None, *args, **kwargs):
    """

    Parameters
    ----------
    dataset : Dataset
         (Default value = None)

    """
    raise NotImplementedError
def save(self, path, *args, **kwargs)

Parameters

path : str
 
Expand source code
def save(self, path, *args, **kwargs):
    """

    Parameters
    ----------
    path : str

    """
    raise NotImplementedError
def save_parameters(self, model_save_path=None)

Saves params of the model.

Expand source code
def save_parameters(self, model_save_path=None):
    """
    Saves params of the model.

    """
    if model_save_path is None:
        model_save_path = self.model_default_save_path
    if not os.path.exists(model_save_path):
        os.makedirs(model_save_path)
    parameters = self.get_parameters()
    json.dump(parameters, open(f"{model_save_path}/params.json", "w"),
              default=transform_topic_model_description_to_jsonable)
def set_model_id_as_timestamp(self)
Expand source code
def set_model_id_as_timestamp(self):
    """ """
    self._model_id = padd_model_name(get_timestamp_in_str_format())