Module topicnet.cooking_machine.recipes.artm_baseline_pipeline

Expand source code
from typing import List

from .recipe_wrapper import BaseRecipe
from .. import Dataset


ARTM_baseline_template = '''
# This config follows a strategy described by Murat Apishev
# one of the core programmers of BigARTM library in personal correspondence.
# According to his letter 'decent' topic model can be obtained by
# Decorrelating model topics simultaneously looking at retrieved TopTokens


# Use .format(modality_list=modality_list, main_modality=main_modality, dataset_path=dataset_path,
# specific_topics=specific_topics, background_topics=background_topics)
# when loading the recipe to adjust for your dataset

topics:
# Describes number of model topics, better left to the user to define optimal topic number
    specific_topics: {specific_topics}
    background_topics: {background_topics}

# Here is example of model with one modality
regularizers:
    - DecorrelatorPhiRegularizer:
        name: decorrelation_phi
        topic_names: specific_topics
        class_ids: {modality_list}
    - SmoothSparsePhiRegularizer:
        name: smooth_phi_bcg
        topic_names: background_topics
        class_ids: {modality_list}
        tau: 0.1
        relative: true
    - SmoothSparseThetaRegularizer:
        name: smooth_theta_bcg
        topic_names: background_topics
        tau: 0.1
        relative: true
scores:
    - BleiLaffertyScore:
        num_top_tokens: 30
model:
    dataset_path: {dataset_path}
    {dictionary_filter_parameters}
    modalities_to_use: {modality_list}
    main_modality: '{main_modality}'

stages:
- RegularizersModifierCube:
    num_iter: 20
    reg_search: add
    regularizer_parameters:
        name: decorrelation_phi
    selection:
        - PerplexityScore@all < 1.05 * MINIMUM(PerplexityScore@all) and BleiLaffertyScore -> max
    strategy: PerplexityStrategy
    # parameters of this strategy are intended for revision
    strategy_params:
        start_point: 0
        step: 0.01
        max_len: 50
    tracked_score_function: PerplexityScore@all
    verbose: false
    use_relative_coefficients: true
'''

ONE_CONFIG_INDENT = 4 * ' '


class BaselineRecipe(BaseRecipe):
    """
    Class for baseline recipe creation and
    unification of recipe interface
    """
    def __init__(self):
        super().__init__(recipe_template=ARTM_baseline_template)

    def format_recipe(
        self,
        dataset_path: str,
        dictionary_filter_parameters: dict = None,
        modality_list: List[str] = None,
        topic_number: int = 20,
        background_topic_number: int = 1,
        num_iter: int = 20,
    ):
        if modality_list is None:
            modality_list = list(Dataset(dataset_path).get_possible_modalities())

        specific_topics = [f'topic_{i}' for i in range(topic_number)]
        background_topics = [f'bcg_{i}' for i in range(
            len(specific_topics), len(specific_topics) + background_topic_number)]

        if dictionary_filter_parameters is None:
            dictionary_filter_parameters = dict()

        dictionary_filter_parameters_as_yml = self._format_dictionary_filter_parameters(
            dictionary_filter_parameters,
            indent=2 * ONE_CONFIG_INDENT,
        )

        self._recipe = self.recipe_template.format(
            dataset_path=dataset_path,
            dictionary_filter_parameters=dictionary_filter_parameters_as_yml,
            modality_list=modality_list,
            main_modality=modality_list[0],
            specific_topics=specific_topics,
            background_topics=background_topics,
        )

        return self._recipe

Classes

class BaselineRecipe

Class for baseline recipe creation and unification of recipe interface

Expand source code
class BaselineRecipe(BaseRecipe):
    """
    Class for baseline recipe creation and
    unification of recipe interface
    """
    def __init__(self):
        super().__init__(recipe_template=ARTM_baseline_template)

    def format_recipe(
        self,
        dataset_path: str,
        dictionary_filter_parameters: dict = None,
        modality_list: List[str] = None,
        topic_number: int = 20,
        background_topic_number: int = 1,
        num_iter: int = 20,
    ):
        if modality_list is None:
            modality_list = list(Dataset(dataset_path).get_possible_modalities())

        specific_topics = [f'topic_{i}' for i in range(topic_number)]
        background_topics = [f'bcg_{i}' for i in range(
            len(specific_topics), len(specific_topics) + background_topic_number)]

        if dictionary_filter_parameters is None:
            dictionary_filter_parameters = dict()

        dictionary_filter_parameters_as_yml = self._format_dictionary_filter_parameters(
            dictionary_filter_parameters,
            indent=2 * ONE_CONFIG_INDENT,
        )

        self._recipe = self.recipe_template.format(
            dataset_path=dataset_path,
            dictionary_filter_parameters=dictionary_filter_parameters_as_yml,
            modality_list=modality_list,
            main_modality=modality_list[0],
            specific_topics=specific_topics,
            background_topics=background_topics,
        )

        return self._recipe

Ancestors

Inherited members