Source code for autogl.module.ensemble.voting

"""
Ensemble module.
"""

from collections import Counter
import numpy as np

from .base import BaseEnsembler
from . import register_ensembler
from ...utils import get_logger

VOTE_LOGGER = get_logger("voting")


[docs]@register_ensembler("voting") class Voting(BaseEnsembler): """ An ensembler using the voting method. Parameters ---------- ensemble_size : int The number of base models selected by the voter. These selected models can be redundant. Default as 10. """ def __init__(self, ensemble_size=10, *args, **kwargs): super().__init__() self.ensemble_size = ensemble_size
[docs] def fit(self, predictions, label, identifiers, feval, *args, **kwargs): """ Fit the ensembler to the given data using Rich Caruana's ensemble selection method. Parameters ---------- predictions : a list of np.ndarray Predictions of base learners (corresponding to the elements in identifiers). labels : a list of int Class labels of instances. identifiers : a list of str The names of base models. feval : (a list of) instances in autogl.module.train.evaluate Performance evaluation metrices. Returns ------- (a list of) ``float`` The validation performance of the final voter. """ self._re_initialize(identifiers, len(predictions)) if not isinstance(feval, list): feval = [feval] weights = self._specify_weights(predictions, label, feval) self.model_to_weight = dict(zip(self.identifiers, weights)) VOTE_LOGGER.debug(self.identifiers, weights) training_score = self._eval(predictions, label, feval) return training_score
[docs] def ensemble(self, predictions, identifiers, *args, **kwargs): """ Ensemble the predictions of base models. Parameters ---------- predictions : a list of np.ndarray Predictions of base learners (corresponding to the elements in identifiers). identifiers : a list of str The names of base models. Returns ------- np.ndarray The ensembled predictions. """ weights = np.zeros([len(predictions)]) for idx, model in enumerate(identifiers): weights[idx] = self.model_to_weight[model] weights = weights / np.sum(weights) return np.average(predictions, axis=0, weights=weights)
def _specify_weights(self, predictions, label, feval): ensemble_prediction = [] combinations = [] history = [] for i in range(self.ensemble_size): eval_score_full = [] eval_score = np.zeros([self.n_models]) for j, pred in enumerate(predictions): ensemble_prediction.append(pred) pred_mean = np.mean(ensemble_prediction, axis=0) eval_score_full.append( [ fx.evaluate(pred_mean, label) * (1 if fx.is_higher_better else -1) for fx in feval ] ) eval_score[j] = eval_score_full[-1][0] ensemble_prediction.pop() best_model = np.argmax(eval_score) ensemble_prediction.append(predictions[best_model]) history.append(eval_score_full[best_model]) combinations.append(best_model) frequency = Counter(combinations).most_common() weights = np.zeros([self.n_models]) for model, freq in frequency: weights[model] = float(freq) weights = weights / np.sum(weights) return weights def _re_initialize(self, identifiers, n_models): self.identifiers = identifiers self.n_models = n_models def _eval(self, predictions, label, feval): pred_ensemble = self.ensemble(predictions, self.identifiers) return [fx.evaluate(pred_ensemble, label) for fx in feval]