Source code for fairdo.optimize.operators.selection

"""
This module implements various selection methods used in genetic algorithms.
In multi objective optimization, the selection methods are used to select the parents based on the Pareto front
and it is assumed that the fitness values are to be **minimized**.
In single objective optimization, the selection methods are used to select the parents based on the fitness values
and it is assumed that the fitness values are to be **maximized**.

These methods are used to select the individuals from the current generation that will be used to produce the offspring
for the next generation.
Each function in this module takes a population of individuals and their fitness values as input,
and returns a subset of the population to be used as parents for the next generation and the parents' fitness.
The number of parents to select can be specified by the user.

These selection methods are based on the works of the following references:

Goldberg, D. E. (1989). Genetic Algorithms in Search, Optimization, and Machine Learning. Addison-Wesley.

Baker, J. E. (1985). Adaptive selection methods for genetic algorithms.
Proceedings of an International Conference on Genetic Algorithms and their Applications.

Whitley, D. (1989). The GENITOR algorithm and selection pressure: Why rank-based allocation of
reproductive trials is best. Proceedings of the Third International Conference on Genetic Algorithms.
"""

import numpy as np

# from fairdo.optimize.multi import crowding_distance # circular import error


[docs]def elitist_selection_multi(population, fitness_values, fronts_lengths, num_parents=2, tournament_size=3):
    """
    Randomly selects from the first front.

    Parameters
    ----------
    population: ndarray, shape (n, d)
        Population of individuals.
    fitness_values: ndarray, shape (n, m)
        Fitness of each individual.
    fronts_lengths: list of int
        Lengths of each front.
    num_parents: int
        Number of parents to select.
    tournament_size: int
        Number of individuals participating in each tournament.

    Returns
    -------
    parents: ndarray, shape (num_parents, d)
        Selected parents.
    fitness: ndarray, shape (num_parents,)
        Fitness of the selected parents.
    """
    if population.shape[0] < tournament_size:
        raise ValueError("Tournament size cannot be larger than the population size.")
    if len(population.shape) != 2:
        population = population.reshape(-1, 1)

    if fronts_lengths[0] > num_parents:
        # Select the first front and calculate crowding distance
        crowding_dists = crowding_distance(fitness_values[:fronts_lengths[0]])
        # Select the individuals with the largest crowding distance
        # elitist_indices = np.argsort(crowding_dists)[-num_parents:]
        elitist_indices = np.argpartition(crowding_dists, -num_parents)[-num_parents:]
    else:
        # If the first front is smaller than the number of parents, select all first num_parents individuals
        elitist_indices = np.arange(num_parents)

    parents = population[elitist_indices]

    return parents


[docs]def tournament_selection_multi(population, fitness_values, fronts_lengths, num_parents=2, tournament_size=3):
    """
    Select parents using Tournament Selection.
    This method randomly selects a few individuals and chooses the best out of them to become a parent.
    The process is repeated until the desired number of parents are selected.

    Parameters
    ----------
    population: ndarray, shape (n, d)
        Population of individuals.
    fitness_values: ndarray, shape (n, m)
        Fitness of each individual.
    fronts_lengths: list of int
        Lengths of each front.
    num_parents: int
        Number of parents to select.
    tournament_size: int
        Number of individuals participating in each tournament.

    Returns
    -------
    parents: ndarray, shape (num_parents, d)
        Selected parents.
    fitness: ndarray, shape (num_parents,)
        Fitness of the selected parents.
    """
    if population.shape[0] < tournament_size:
        raise ValueError("Tournament size cannot be larger than the population size.")
    if len(population.shape) != 2:
        population = population.reshape(-1, 1)
    
    # Get the lengths of individual arrays in the original list
    cum_fronts_lengths = np.cumsum(fronts_lengths)

    # Initialize parents
    parents = np.empty((num_parents, population.shape[1]))

    for i in range(num_parents):
        # tournament_candidates = np.sort(np.random.choice(len(population), size=tournament_size, replace=False))
        tournament_candidates = np.random.choice(len(population), size=tournament_size, replace=False)

        # Lower front wins
        dominating_mask = tournament_candidates < np.broadcast_to(cum_fronts_lengths, (tournament_size, len(cum_fronts_lengths))).T
        # Each tournament candidate is counted how many fronts + 1 it dominates 
        dominating_counts = np.sum(dominating_mask, axis=0)
        # Select candidates with the most dominating counts
        best_candidates = np.where(np.max(dominating_counts) == dominating_counts)[0]
        
        # Select the winner
        if len(best_candidates) == 1:
            # If there is only one candidate in the front, select it
            winner_index = tournament_candidates[best_candidates[0]]
        else:
            # If there are multiple candidates in the same front, select one with the largest crowding distance
            current_front = len(fronts_lengths) - np.max(dominating_counts)
            if current_front == 0:
                crowding_dists = crowding_distance(fitness_values[:fronts_lengths[current_front]])
                # Select the candidate from the tournament with the largest crowding distance
                best_tournament_candidate_in_current_front = np.argmax(crowding_dists[tournament_candidates[best_candidates]])
                # Select the real index of the candidate in the population
                winner_index = tournament_candidates[best_candidates][best_tournament_candidate_in_current_front]
            else:
                # Calculate the crowding distance for the current front
                crowding_dists = crowding_distance(fitness_values[cum_fronts_lengths[current_front - 1]:cum_fronts_lengths[current_front]])
                # Select the best candidates from the tournament and adjust position for the current front
                tournament_candidates_adjusted = tournament_candidates[best_candidates] - cum_fronts_lengths[current_front - 1]
                # Select the candidate from the tournament with the largest crowding distance
                best_tournament_candidate_in_current_front = np.argmax(crowding_dists[tournament_candidates_adjusted])
                # Select the real index of the candidate in the population
                winner_index = tournament_candidates_adjusted[best_tournament_candidate_in_current_front] + cum_fronts_lengths[current_front - 1]

        parents[i, :] = population[winner_index, :]
    
    return parents


[docs]def elitist_selection(population, fitness, num_parents=2):
    """
    This function selects the fittest (max fitness) parents from the population.

    Parameters
    ----------
    population: ndarray, shape (n, d)
        population of individuals
    fitness: ndarray, shape (n,)
        fitness of each individual
    num_parents: int
        number of parents to select

    Returns
    -------
    parents: ndarray, shape (num_parents, d)
    fitness: ndarray, shape (num_parents,)
    """
    # select the best individuals from the population to be parents
    idx = np.argpartition(fitness, -num_parents)[-num_parents:]
    parents = population[idx[-num_parents:]]
    parents_fitness = fitness[idx[-num_parents:]]
    return parents, parents_fitness


[docs]def tournament_selection(population, fitness, num_parents=2, tournament_size=3):
    """
    Select parents using Tournament Selection.
    This method randomly selects a few individuals and chooses the best out of them to become a parent.
    The process is repeated until the desired number of parents are selected.

    Parameters
    ----------
    population: ndarray, shape (n, d)
        Population of individuals.
    fitness: ndarray, shape (n,)
        Fitness of each individual.
    num_parents: int
        Number of parents to select.
    tournament_size: int
        Number of individuals participating in each tournament.

    Returns
    -------
    parents: ndarray, shape (num_parents, d)
        Selected parents.
    fitness: ndarray, shape (num_parents,)
        Fitness of the selected parents.
    """
    if population.shape[0] < tournament_size:
        raise ValueError("Tournament size cannot be larger than the population size.")
    if len(population.shape) != 2:
        population = population.reshape(-1, 1)

    parents = np.empty((num_parents, population.shape[1]))
    parents_fitness = np.empty(num_parents)
    for i in range(num_parents):
        tournament_indices = np.random.randint(len(population), size=tournament_size)
        tournament_fitnesses = fitness[tournament_indices]
        winner_index = tournament_indices[np.argmax(tournament_fitnesses)]
        parents[i, :] = population[winner_index, :]
        parents_fitness[i] = fitness[winner_index]
    return parents, parents_fitness


[docs]def roulette_wheel_selection(population, fitness, num_parents=2):
    """
    Select parents using Roulette Wheel Selection. The probability of selecting an individual is proportional to its
    fitness. The higher the fitness, the higher the chance of being selected.
    This function assumes that the fitness is non-negative.

    Parameters
    ----------
    population: ndarray, shape (n, d)
        Population of individuals.
    fitness: ndarray, shape (n,)
        Fitness of each individual.
    num_parents: int
        Number of parents to select.

    Returns
    -------
    parents: ndarray, shape (num_parents, d)
        Selected parents.
    fitness: ndarray, shape (num_parents,)
        Fitness of the selected parents.

    Notes
    -----
    This function normally assumes that the fitness is non-negative.
    However, if the fitness is negative, then the fitness values are shifted to be non-negative.

    References
    ----------
    This function is based on the work:
    Holland, J. H. (1975). Adaptation in natural and artificial systems: An introductory analysis with applications
    to biology, control, and artificial intelligence. The Michigan Press.
    """
    # Check if the fitness is non-negative
    if np.any(fitness < 0):
        # shift the fitness values to be non-negative
        # also prevent the fitness values from being zero to avoid division by zero
        fitness = fitness - 2*np.min(fitness)
    fitness_sum = np.sum(fitness)
    selection_probs = fitness / fitness_sum
    parents_idx = np.random.choice(len(population), size=num_parents, p=selection_probs)
    return population[parents_idx], fitness[parents_idx]


[docs]def stochastic_universal_sampling(population, fitness, num_parents=2):
    """
    This function selects parents from the population using the Stochastic Universal Sampling (SUS) method.

    Parameters
    ----------
    population: ndarray, shape (n, d)
        The population of individuals.
    fitness: ndarray, shape (n,)
        The fitness of each individual in the population.
    num_parents: int
        The number of parents to select.

    Returns
    -------
    parents: ndarray, shape (num_parents, d)
        The selected parents.
    fitness: ndarray, shape (num_parents,)
        The fitness of the selected parents.

    Notes
    -----
    This function normally assumes that the fitness is non-negative.
    However, if the fitness is negative, then the fitness values are shifted to be non-negative.

    References
    ----------
    This function is based on the work of Baker (1987) in the following paper:
    Baker, J. E. (1987). "Reducing Bias and Inefficiency in the Selection Algorithm".
    Proceedings of the Second International Conference on Genetic Algorithms and Their Application.
    """
    # Check if the population is a 2D array
    if len(population.shape) != 2:
        population = population.reshape(-1, 1)
    # Check if the fitness is non-negative
    if np.any(fitness < 0):
        # shift the fitness values to be non-negative
        # also prevent the fitness values from being zero to avoid division by zero
        fitness = fitness - 2*np.min(fitness)
    # Normalize the fitness values
    fitness_sum = np.sum(fitness)
    normalized_fitness = fitness / fitness_sum

    # Calculate the distance between the pointers
    distance = 1.0 / num_parents

    # Initialize the start of the pointers
    start = np.random.uniform(distance)

    # Initialize the parents
    parents = np.empty((num_parents, population.shape[1]))
    parents_fitness = np.empty(num_parents)
    # Perform the SUS
    for i in range(num_parents):
        pointer = start + i * distance
        sum_fitness = 0
        for j in range(len(normalized_fitness)):
            sum_fitness += normalized_fitness[j]
            if sum_fitness >= pointer:
                parents[i] = population[j]
                parents_fitness[i] = fitness[j]
                break

    return parents, parents_fitness


[docs]def rank_selection(population, fitness, num_parents=2):
    """
    This function selects parents from the population based on their rank.
    The rank is determined by the fitness of the individual.
    The higher the fitness, the higher the rank.
    The probability of selecting an individual is proportional to its rank.

    Parameters
    ----------
    population: ndarray, shape (n, d)
        The population of individuals.
    fitness: ndarray, shape (n,)
        The fitness of each individual.
    num_parents: int
        The number of parents to select.

    Returns
    -------
    parents: ndarray, shape (num_parents, d)
        The selected parents.
    fitness: ndarray, shape (num_parents,)
        The fitness of the selected parents.
    """
    # Rank individuals based on fitness
    ranks = np.argsort(np.argsort(fitness))

    # Calculate selection probabilities based on rank
    total_ranks = np.sum(ranks)
    selection_probabilities = ranks / total_ranks

    # Select parents based on selection probabilities
    parent_indices = np.random.choice(len(population), size=num_parents, p=selection_probabilities)

    parents = population[parent_indices]
    fitness = fitness[parent_indices]

    return parents, fitness


[docs]def crowding_distance(fitness_values):
    """
    Calculate crowding distance for each individual in the population.

    Parameters
    ----------
    fitness_values : ndarray, shape (N, num_fitness_functions)
        Fitness values of the population.

    Returns
    -------
    crowding_distances : ndarray, shape (N,)
        Crowding distances for each individual.
    """
    pop_size, num_objectives = fitness_values.shape
    crowding_distances = np.zeros(pop_size)

    for obj_index in range(num_objectives):
        # Sort the fitness values based on the current objective in ascending order. Best values first.
        sorted_indices = np.argsort(fitness_values[:, obj_index])
        crowding_distances[sorted_indices[0]] = np.inf
        crowding_distances[sorted_indices[-1]] = np.inf

        f_max = fitness_values[sorted_indices[-1], obj_index]
        f_min = fitness_values[sorted_indices[0], obj_index]

        if f_max == f_min:
            continue

        # Crowding distance is the sum of the distances to the previous and next individuals.
        # It geometrically describes the sum of the lengths of a cuboid.
        for i in range(1, pop_size - 1):
            crowding_distances[sorted_indices[i]] += (fitness_values[sorted_indices[i + 1], obj_index]
                                                      - fitness_values[sorted_indices[i - 1], obj_index])/ (f_max - f_min)
    return crowding_distances