"""
This module implements various selection methods used in genetic algorithms.
In multi objective optimization, the selection methods are used to select the parents based on the Pareto front
and it is assumed that the fitness values are to be **minimized**.
In single objective optimization, the selection methods are used to select the parents based on the fitness values
and it is assumed that the fitness values are to be **maximized**.
These methods are used to select the individuals from the current generation that will be used to produce the offspring
for the next generation.
Each function in this module takes a population of individuals and their fitness values as input,
and returns a subset of the population to be used as parents for the next generation and the parents' fitness.
The number of parents to select can be specified by the user.
These selection methods are based on the works of the following references:
Goldberg, D. E. (1989). Genetic Algorithms in Search, Optimization, and Machine Learning. Addison-Wesley.
Baker, J. E. (1985). Adaptive selection methods for genetic algorithms.
Proceedings of an International Conference on Genetic Algorithms and their Applications.
Whitley, D. (1989). The GENITOR algorithm and selection pressure: Why rank-based allocation of
reproductive trials is best. Proceedings of the Third International Conference on Genetic Algorithms.
"""
import numpy as np
# from fairdo.optimize.multi import crowding_distance # circular import error
[docs]def elitist_selection_multi(population, fitness_values, fronts_lengths, num_parents=2, tournament_size=3):
"""
Randomly selects from the first front.
Parameters
----------
population: ndarray, shape (n, d)
Population of individuals.
fitness_values: ndarray, shape (n, m)
Fitness of each individual.
fronts_lengths: list of int
Lengths of each front.
num_parents: int
Number of parents to select.
tournament_size: int
Number of individuals participating in each tournament.
Returns
-------
parents: ndarray, shape (num_parents, d)
Selected parents.
fitness: ndarray, shape (num_parents,)
Fitness of the selected parents.
"""
if population.shape[0] < tournament_size:
raise ValueError("Tournament size cannot be larger than the population size.")
if len(population.shape) != 2:
population = population.reshape(-1, 1)
if fronts_lengths[0] > num_parents:
# Select the first front and calculate crowding distance
crowding_dists = crowding_distance(fitness_values[:fronts_lengths[0]])
# Select the individuals with the largest crowding distance
# elitist_indices = np.argsort(crowding_dists)[-num_parents:]
elitist_indices = np.argpartition(crowding_dists, -num_parents)[-num_parents:]
else:
# If the first front is smaller than the number of parents, select all first num_parents individuals
elitist_indices = np.arange(num_parents)
parents = population[elitist_indices]
return parents
[docs]def tournament_selection_multi(population, fitness_values, fronts_lengths, num_parents=2, tournament_size=3):
"""
Select parents using Tournament Selection.
This method randomly selects a few individuals and chooses the best out of them to become a parent.
The process is repeated until the desired number of parents are selected.
Parameters
----------
population: ndarray, shape (n, d)
Population of individuals.
fitness_values: ndarray, shape (n, m)
Fitness of each individual.
fronts_lengths: list of int
Lengths of each front.
num_parents: int
Number of parents to select.
tournament_size: int
Number of individuals participating in each tournament.
Returns
-------
parents: ndarray, shape (num_parents, d)
Selected parents.
fitness: ndarray, shape (num_parents,)
Fitness of the selected parents.
"""
if population.shape[0] < tournament_size:
raise ValueError("Tournament size cannot be larger than the population size.")
if len(population.shape) != 2:
population = population.reshape(-1, 1)
# Get the lengths of individual arrays in the original list
cum_fronts_lengths = np.cumsum(fronts_lengths)
# Initialize parents
parents = np.empty((num_parents, population.shape[1]))
for i in range(num_parents):
# tournament_candidates = np.sort(np.random.choice(len(population), size=tournament_size, replace=False))
tournament_candidates = np.random.choice(len(population), size=tournament_size, replace=False)
# Lower front wins
dominating_mask = tournament_candidates < np.broadcast_to(cum_fronts_lengths, (tournament_size, len(cum_fronts_lengths))).T
# Each tournament candidate is counted how many fronts + 1 it dominates
dominating_counts = np.sum(dominating_mask, axis=0)
# Select candidates with the most dominating counts
best_candidates = np.where(np.max(dominating_counts) == dominating_counts)[0]
# Select the winner
if len(best_candidates) == 1:
# If there is only one candidate in the front, select it
winner_index = tournament_candidates[best_candidates[0]]
else:
# If there are multiple candidates in the same front, select one with the largest crowding distance
current_front = len(fronts_lengths) - np.max(dominating_counts)
if current_front == 0:
crowding_dists = crowding_distance(fitness_values[:fronts_lengths[current_front]])
# Select the candidate from the tournament with the largest crowding distance
best_tournament_candidate_in_current_front = np.argmax(crowding_dists[tournament_candidates[best_candidates]])
# Select the real index of the candidate in the population
winner_index = tournament_candidates[best_candidates][best_tournament_candidate_in_current_front]
else:
# Calculate the crowding distance for the current front
crowding_dists = crowding_distance(fitness_values[cum_fronts_lengths[current_front - 1]:cum_fronts_lengths[current_front]])
# Select the best candidates from the tournament and adjust position for the current front
tournament_candidates_adjusted = tournament_candidates[best_candidates] - cum_fronts_lengths[current_front - 1]
# Select the candidate from the tournament with the largest crowding distance
best_tournament_candidate_in_current_front = np.argmax(crowding_dists[tournament_candidates_adjusted])
# Select the real index of the candidate in the population
winner_index = tournament_candidates_adjusted[best_tournament_candidate_in_current_front] + cum_fronts_lengths[current_front - 1]
parents[i, :] = population[winner_index, :]
return parents
[docs]def elitist_selection(population, fitness, num_parents=2):
"""
This function selects the fittest (max fitness) parents from the population.
Parameters
----------
population: ndarray, shape (n, d)
population of individuals
fitness: ndarray, shape (n,)
fitness of each individual
num_parents: int
number of parents to select
Returns
-------
parents: ndarray, shape (num_parents, d)
fitness: ndarray, shape (num_parents,)
"""
# select the best individuals from the population to be parents
idx = np.argpartition(fitness, -num_parents)[-num_parents:]
parents = population[idx[-num_parents:]]
parents_fitness = fitness[idx[-num_parents:]]
return parents, parents_fitness
[docs]def tournament_selection(population, fitness, num_parents=2, tournament_size=3):
"""
Select parents using Tournament Selection.
This method randomly selects a few individuals and chooses the best out of them to become a parent.
The process is repeated until the desired number of parents are selected.
Parameters
----------
population: ndarray, shape (n, d)
Population of individuals.
fitness: ndarray, shape (n,)
Fitness of each individual.
num_parents: int
Number of parents to select.
tournament_size: int
Number of individuals participating in each tournament.
Returns
-------
parents: ndarray, shape (num_parents, d)
Selected parents.
fitness: ndarray, shape (num_parents,)
Fitness of the selected parents.
"""
if population.shape[0] < tournament_size:
raise ValueError("Tournament size cannot be larger than the population size.")
if len(population.shape) != 2:
population = population.reshape(-1, 1)
parents = np.empty((num_parents, population.shape[1]))
parents_fitness = np.empty(num_parents)
for i in range(num_parents):
tournament_indices = np.random.randint(len(population), size=tournament_size)
tournament_fitnesses = fitness[tournament_indices]
winner_index = tournament_indices[np.argmax(tournament_fitnesses)]
parents[i, :] = population[winner_index, :]
parents_fitness[i] = fitness[winner_index]
return parents, parents_fitness
[docs]def roulette_wheel_selection(population, fitness, num_parents=2):
"""
Select parents using Roulette Wheel Selection. The probability of selecting an individual is proportional to its
fitness. The higher the fitness, the higher the chance of being selected.
This function assumes that the fitness is non-negative.
Parameters
----------
population: ndarray, shape (n, d)
Population of individuals.
fitness: ndarray, shape (n,)
Fitness of each individual.
num_parents: int
Number of parents to select.
Returns
-------
parents: ndarray, shape (num_parents, d)
Selected parents.
fitness: ndarray, shape (num_parents,)
Fitness of the selected parents.
Notes
-----
This function normally assumes that the fitness is non-negative.
However, if the fitness is negative, then the fitness values are shifted to be non-negative.
References
----------
This function is based on the work:
Holland, J. H. (1975). Adaptation in natural and artificial systems: An introductory analysis with applications
to biology, control, and artificial intelligence. The Michigan Press.
"""
# Check if the fitness is non-negative
if np.any(fitness < 0):
# shift the fitness values to be non-negative
# also prevent the fitness values from being zero to avoid division by zero
fitness = fitness - 2*np.min(fitness)
fitness_sum = np.sum(fitness)
selection_probs = fitness / fitness_sum
parents_idx = np.random.choice(len(population), size=num_parents, p=selection_probs)
return population[parents_idx], fitness[parents_idx]
[docs]def stochastic_universal_sampling(population, fitness, num_parents=2):
"""
This function selects parents from the population using the Stochastic Universal Sampling (SUS) method.
Parameters
----------
population: ndarray, shape (n, d)
The population of individuals.
fitness: ndarray, shape (n,)
The fitness of each individual in the population.
num_parents: int
The number of parents to select.
Returns
-------
parents: ndarray, shape (num_parents, d)
The selected parents.
fitness: ndarray, shape (num_parents,)
The fitness of the selected parents.
Notes
-----
This function normally assumes that the fitness is non-negative.
However, if the fitness is negative, then the fitness values are shifted to be non-negative.
References
----------
This function is based on the work of Baker (1987) in the following paper:
Baker, J. E. (1987). "Reducing Bias and Inefficiency in the Selection Algorithm".
Proceedings of the Second International Conference on Genetic Algorithms and Their Application.
"""
# Check if the population is a 2D array
if len(population.shape) != 2:
population = population.reshape(-1, 1)
# Check if the fitness is non-negative
if np.any(fitness < 0):
# shift the fitness values to be non-negative
# also prevent the fitness values from being zero to avoid division by zero
fitness = fitness - 2*np.min(fitness)
# Normalize the fitness values
fitness_sum = np.sum(fitness)
normalized_fitness = fitness / fitness_sum
# Calculate the distance between the pointers
distance = 1.0 / num_parents
# Initialize the start of the pointers
start = np.random.uniform(distance)
# Initialize the parents
parents = np.empty((num_parents, population.shape[1]))
parents_fitness = np.empty(num_parents)
# Perform the SUS
for i in range(num_parents):
pointer = start + i * distance
sum_fitness = 0
for j in range(len(normalized_fitness)):
sum_fitness += normalized_fitness[j]
if sum_fitness >= pointer:
parents[i] = population[j]
parents_fitness[i] = fitness[j]
break
return parents, parents_fitness
[docs]def rank_selection(population, fitness, num_parents=2):
"""
This function selects parents from the population based on their rank.
The rank is determined by the fitness of the individual.
The higher the fitness, the higher the rank.
The probability of selecting an individual is proportional to its rank.
Parameters
----------
population: ndarray, shape (n, d)
The population of individuals.
fitness: ndarray, shape (n,)
The fitness of each individual.
num_parents: int
The number of parents to select.
Returns
-------
parents: ndarray, shape (num_parents, d)
The selected parents.
fitness: ndarray, shape (num_parents,)
The fitness of the selected parents.
"""
# Rank individuals based on fitness
ranks = np.argsort(np.argsort(fitness))
# Calculate selection probabilities based on rank
total_ranks = np.sum(ranks)
selection_probabilities = ranks / total_ranks
# Select parents based on selection probabilities
parent_indices = np.random.choice(len(population), size=num_parents, p=selection_probabilities)
parents = population[parent_indices]
fitness = fitness[parent_indices]
return parents, fitness
[docs]def crowding_distance(fitness_values):
"""
Calculate crowding distance for each individual in the population.
Parameters
----------
fitness_values : ndarray, shape (N, num_fitness_functions)
Fitness values of the population.
Returns
-------
crowding_distances : ndarray, shape (N,)
Crowding distances for each individual.
"""
pop_size, num_objectives = fitness_values.shape
crowding_distances = np.zeros(pop_size)
for obj_index in range(num_objectives):
# Sort the fitness values based on the current objective in ascending order. Best values first.
sorted_indices = np.argsort(fitness_values[:, obj_index])
crowding_distances[sorted_indices[0]] = np.inf
crowding_distances[sorted_indices[-1]] = np.inf
f_max = fitness_values[sorted_indices[-1], obj_index]
f_min = fitness_values[sorted_indices[0], obj_index]
if f_max == f_min:
continue
# Crowding distance is the sum of the distances to the previous and next individuals.
# It geometrically describes the sum of the lengths of a cuboid.
for i in range(1, pop_size - 1):
crowding_distances[sorted_indices[i]] += (fitness_values[sorted_indices[i + 1], obj_index]
- fitness_values[sorted_indices[i - 1], obj_index])/ (f_max - f_min)
return crowding_distances