Module topicnet.cooking_machine.recipes.exploratory_search_pipeline
Expand source code
from .recipe_wrapper import BaseRecipe
from .. import Dataset
modality_selection_template = (
'PerplexityScore{modality}'
' < 1.01 * MINIMUM(PerplexityScore{modality}) and SparsityPhiScore{modality} -> max'
)
general_selection_template = (
'PerplexityScore@all'
' < 1.01 * MINIMUM(PerplexityScore@all) and SparsityPhiScore{modality} -> max'
)
exploratory_search_template = '''
# This config follows a strategy described in the article
# Multi-objective Topic Modeling for Exploratory Search in Tech News
# by Anastasya Yanina, Lev Golitsyn and Konstantin Vorontsov, Jan 2018
# Use .format(modality=modality, dataset_path=dataset_path,
# specific_topics=specific_topics, background_topics=background_topics)
# when loading the recipe to adjust for your dataset
# If you have more than one modaity you want to use, we recommend employing
# more advanced MultimodalSearchRecipe from multimodal_exploratory_search_pipeline instead
topics:
# Describes number of model topics, in the actuall article 200 topics were found to be optimal
specific_topics: {{specific_topics}}
background_topics: {{background_topics}}
regularizers:
- DecorrelatorPhiRegularizer:
name: decorrelation_phi_{{modality}}
topic_names: specific_topics
tau: 1
class_ids: ['{{modality}}']
- SmoothSparsePhiRegularizer:
name: smooth_phi_{{modality}}
topic_names: specific_topics
tau: 1
class_ids: ['{{modality}}']
- SmoothSparseThetaRegularizer:
name: sparse_theta
topic_names: specific_topics
tau: 1
model:
dataset_path: {{dataset_path}}
modalities_to_use: ['{{modality}}']
main_modality: '{{modality}}'
stages:
# repeat the following two cubes for every modality in the dataset
- RegularizersModifierCube:
num_iter: 8
reg_search: mul
regularizer_parameters:
name: decorrelation_phi_{{modality}}
selection:
- {0}
strategy: PerplexityStrategy
strategy_params:
start_point: 100000
step: 10
max_len: 6
tracked_score_function: PerplexityScore@all
verbose: false
use_relative_coefficients: false
- RegularizersModifierCube:
num_iter: 8
reg_search: add
regularizer_parameters:
name: smooth_phi_{{modality}}
selection:
- {0}
strategy: PerplexityStrategy
strategy_params:
start_point: 0.25
step: 0.25
max_len: 6
tracked_score_function: PerplexityScore{{modality}}
verbose: false
use_relative_coefficients: false
#last cube is independent of modalities and can be used only once
- RegularizersModifierCube:
num_iter: 8
reg_search: add
regularizer_parameters:
name: sparse_theta
selection:
- {1}
strategy: PerplexityStrategy
strategy_params:
start_point: -0.5
step: -0.5
max_len: 6
tracked_score_function: PerplexityScore@all
verbose: false
use_relative_coefficients: false
'''.format(modality_selection_template, general_selection_template)
class SearchRecipe(BaseRecipe):
"""
Class for baseline recipe creation and
unification of recipe interface
"""
def __init__(self):
super().__init__(recipe_template=exploratory_search_template)
def format_recipe(
self,
dataset_path: str,
modality: str = None,
topic_number: int = 20,
background_topic_number: int = 1,
):
if modality is None:
modality = list(Dataset(dataset_path).get_possible_modalities())[0]
specific_topics = [f'topic_{i}' for i in range(topic_number)]
background_topics = [f'bcg_{i}' for i in range(
len(specific_topics), len(specific_topics) + background_topic_number)]
self._recipe = self.recipe_template.format(
dataset_path=dataset_path,
modality=modality,
specific_topics=specific_topics,
background_topics=background_topics,
)
return self._recipe
Classes
class SearchRecipe
-
Class for baseline recipe creation and unification of recipe interface
Expand source code
class SearchRecipe(BaseRecipe): """ Class for baseline recipe creation and unification of recipe interface """ def __init__(self): super().__init__(recipe_template=exploratory_search_template) def format_recipe( self, dataset_path: str, modality: str = None, topic_number: int = 20, background_topic_number: int = 1, ): if modality is None: modality = list(Dataset(dataset_path).get_possible_modalities())[0] specific_topics = [f'topic_{i}' for i in range(topic_number)] background_topics = [f'bcg_{i}' for i in range( len(specific_topics), len(specific_topics) + background_topic_number)] self._recipe = self.recipe_template.format( dataset_path=dataset_path, modality=modality, specific_topics=specific_topics, background_topics=background_topics, ) return self._recipe
Ancestors
Inherited members