I'm pretty sure a beginner with rust, used python/pytorch before mainly for some visualizations and simple tasks (DS/ML). Now I want to improve/optimize some code to write it in more idiomatic way.
use conv::prelude::*;
use derivative::Derivative;
use rand::{distributions::Uniform, Rng};
use serde::{Deserialize, Serialize};
use std::{
collections::HashMap,
ops::{self},
vec,
};
/// Class that implements genetic optimization.
/// Whole population is made of equal-length arrays consist of 0 and 1 — so-called `bitstrings`.
/// There are 3 main functions of algorithm: selection, mutation, crossover.
/// Decode is helper for translating values to real values (since initially bitstring
/// is integer)
///
/// # Parameters
///
/// * `n_pop` — size of population
/// * `n_dim` — width of one bitstring
/// * `n_bits_per_group` — bits in each dimension
/// * `bounds` — bounds for resticting our space of search
/// * `pop` — population array, 2-dimensional
/// * `select_k` — value for tournament in `selection` part
/// * `mut_prob` — mutability probability for `mutation` part
/// * `crossover_prob` — probability for crossover
/// * `n_epochs` — number of iterations
/// * `minimize`: `bool` — flag for minimization or maximization
/// * `decoded`: `Vec<Vec<f32>>` — array of decoded values with shape (n_pop, n_dim)
/// * `scores`: `Vec<f32>` — array of scores for every object in population
/// * `score_function`: `fn(&Vec<Vec<f32>>) -> Vec<f32>` — main function for score estimation
/// * `best_scores`: `Vec<f32>` — array with history of best scores via simulation
/// * `best_decodes`: `Vec<Vec<f32>>` — array with history of best decodes via simulation
///
#[derive(Derivative, Serialize, Deserialize)]
#[derivative(Debug, Default)]
#[serde(default)]
pub struct GeneticAlgorithm {
// `derivative` crate makes possible to just use default parameters like this:
// use row from above for necessary parameter
#[derivative(Default(value = "4"))]
pub n_pop: usize,
#[derivative(Default(value = "2"))]
pub n_dim: usize,
#[derivative(Default(value = "2"))]
pub n_bits_per_group: usize,
pub bounds: Vec<Vec<f32>>,
pub pop: Vec<Vec<usize>>,
#[derivative(Default(value = "3"))]
pub select_k: usize,
#[derivative(Default(value = "0.2"))]
pub mut_prob: f32,
#[derivative(Default(value = "0.9"))]
pub crossover_prob: f32,
#[derivative(Default(value = "5"))]
pub n_epochs: usize,
#[derivative(Default(value = "true"))]
pub minimize: bool,
pub decoded: Vec<Vec<f32>>,
pub best_scores: Vec<f32>,
pub best_decodes: Vec<Vec<f32>>,
pub scores: Vec<f32>,
pub best_score: f32,
}
/// Matrix representation
impl GeneticAlgorithm {
/// Taking a binary array of arrays (or bitstrings)
/// function returns array of decoded to decimal
/// numeric system array of arrays of numers.
///
/// putting values into an interval [a, b]
/// formulae is: a_i + k*(b_i - a_i)/2^n, where a, b — borders, n — number of bits
/// Initializes population using uniform distribution
fn _pop_init(&self) -> Vec<Vec<usize>> {
let range = Uniform::from(0..2);
let mut population: Vec<Vec<usize>> = Vec::new();
for _ in 0..self.n_pop {
let values: Vec<usize> = rand::thread_rng()
.sample_iter(&range)
.take(self.n_bits_per_group * self.n_dim)
.collect();
population.push(values);
}
population
}
/// f32 vec to a const
fn _mult_to_const<L>(&self, a: &Vec<L>, constant: f32) -> Vec<f32>
where
f32: conv::ValueFrom<L>,
L: Copy,
{
a.iter()
.map(|&x| x.value_as::<f32>().unwrap() * constant)
.collect()
}
fn _add_const<L>(&self, a: &Vec<L>, constant: L) -> Vec<L>
where
L: ops::Add + Copy,
Vec<L>: FromIterator<<L as ops::Add>::Output>,
{
a.iter().map(|&x| x + constant).collect()
}
/// helper for folding values for given cost function
/// # Parameters
///
/// * `cost_function`: `fn(&Vec<Vec<f32>>) -> Vec<f32>` — function for evaluation
fn _fold_2d_function(&self, cost_function: fn(f32, f32) -> f32) -> Vec<f32> {
self.decoded
.iter()
.map(|row| {
cost_function(
row[0], //.value_as::<f32>().unwrap(),
row[1], //.value_as::<f32>().unwrap(),
)
})
.collect::<Vec<f32>>()
}
/// Conversion of bitstring to decimals
fn decode(&self) -> Vec<Vec<f32>> {
let mut decoded = vec![vec![0.; self.n_dim]; self.n_pop];
for group_num in 0..self.n_dim {
let begin_ind = group_num * self.n_bits_per_group;
let end_ind = self.n_bits_per_group * (group_num + 1);
// slicing elements from current group
let mut vec_slice = Vec::new();
for row in &self.pop {
vec_slice.push(&row[begin_ind..end_ind])
}
let slice_of_slice: &[&[usize]] = vec_slice.as_slice(); // valid slice
// transforming to decimals
let decimal_vector = slice_of_slice
.iter()
.map(|row| row.iter().fold(0, |acc, digit| (acc << 1) + digit))
.collect::<Vec<usize>>();
let denom = u32::pow(2, (self.n_bits_per_group).try_into().unwrap());
let a = self.bounds[group_num][0]; // left
let b = self.bounds[group_num][1]; // right
// formulae is: a_i + k*(b_i - a_i)/2^n, where a, b — borders, n — number of bits
let inv_denom = 1. / denom.value_as::<f32>().unwrap();
let difference = b - a;
let biased_decimals = self._mult_to_const(&decimal_vector, difference);
let mult = self._mult_to_const(&biased_decimals, inv_denom);
let temp_result = self._add_const(&mult, a);
for ind in 0..self.n_pop {
decoded[ind][group_num] = temp_result[ind]
}
}
decoded // works just fine
}
/// Searching for the best candidate pursuing to continue the population.
fn selection(&self) -> Vec<Vec<usize>> {
// take general range just one time
let range_indices = Uniform::from(0..self.n_pop);
// making rand_ids 2d matrix shape: (n_pop, select_k-1)
// rand_ids is matrix for tournament: we have a lot of rows, which are objects
// competing with each other
let rand_ids: Vec<Vec<usize>> = (0..self.n_pop)
.into_iter()
.map(|_| {
let values: Vec<usize> = rand::thread_rng()
.sample_iter(&range_indices)
.take(self.select_k - 1)
.collect();
values
})
.collect();
// searching for max in rand_ids, making it shape (n_pop)
let mut mult_if_minimize = -1.;
let optimal_ids: Vec<usize>;
if self.minimize {
// iterating over the scores of the matrix, finding the best in a row
optimal_ids = rand_ids
.iter()
.map(|row| {
*row.iter()
.min_by(|&&a, &&b| self.scores[a].total_cmp(&self.scores[b]))
.unwrap()
})
.collect();
} else {
// iterating over the scores of the matrix, finding the best in a row
mult_if_minimize = 1.;
optimal_ids = rand_ids
.iter()
.map(|row| {
*row.iter()
.max_by(|&&a, &&b| self.scores[a].total_cmp(&self.scores[b]))
.unwrap()
})
.collect();
}
// taking yet another best
let best_ids: Vec<usize> = rand::thread_rng()
.sample_iter(&range_indices)
.take(self.n_pop)
.collect();
let mut best_candidates: Vec<Vec<usize>> = Vec::new();
for ind in 0..self.n_pop {
// possibly we could omit this cycle
// since it is one more "tournament"
// (one more comparison), but using another
// level of randomness
if mult_if_minimize * self.scores[optimal_ids[ind]]
> mult_if_minimize * self.scores[best_ids[ind]]
{
best_candidates.push(self.pop[optimal_ids[ind]].clone())
} else {
best_candidates.push(self.pop[best_ids[ind]].clone())
}
}
best_candidates
}
/// Takes n parents sequentially (e.g. 30 pairs if there were 30 parents) samples n numbers of random index of bit.
/// n is parents size.
/// Swaps tails of bitstrings for pair of parents from 'k' index with given probability.
/// # Returns
/// `children` — array of next generation
fn crossover(&self, parents: &[Vec<usize>]) -> Vec<Vec<usize>> {
let mut children: Vec<Vec<usize>> = Vec::new();
// attempt with for loops
let range_probs = Uniform::from(0. ..1.);
// [1, bits-2) to exclude cases with null vec
let range_over_indices = Uniform::from(1..self.n_bits_per_group * self.n_dim - 2);
let prob_vec: Vec<f32> = rand::thread_rng()
.sample_iter(&range_probs)
.take(self.n_pop / 2)
.collect();
// cutting of here in half since we're stepping by 2: [0, 1], [2, 3]...
for ind in (0..self.n_pop).step_by(2) {
let mut temp_child1 = parents[ind].clone();
let mut temp_child2 = parents[ind + 1].clone();
if prob_vec[ind / 2] < self.crossover_prob {
let change_ptr = rand::thread_rng().sample(&range_over_indices);
// first part remains unchanged
// swapping only tails todo("perform swap more efficiently")
for change_ind in change_ptr..self.n_bits_per_group * self.n_dim {
temp_child1[change_ind] = parents[ind + 1][change_ind];
temp_child2[change_ind] = parents[ind][change_ind];
}
}
children.push(temp_child1);
children.push(temp_child2);
}
children
}
///Performs mutation over children with given probability.
/// # Returns
/// `Vec<Vec<usize>>` of mutated elements
fn mutation(&self, children: &Vec<Vec<usize>>) -> Vec<Vec<usize>> {
let range = Uniform::from(0. ..1.);
children
.iter()
.map(|child_vec| {
let prob_vec: Vec<f32> = rand::thread_rng()
.sample_iter(&range)
.take(self.n_bits_per_group * self.n_dim)
.collect();
prob_vec
.into_iter()
.zip(child_vec)
.map(|(prob, &child)| {
if prob > self.mut_prob {
1 - child
} else {
child
}
})
.collect()
})
.collect()
}
/// Checks if there is an update of maximum(minimum) and updates `self.best_score`, `self.best_scores` and `self.best_decodes`.
/// If there was an update, prints current `epoch`, `best_ind` — index of best element in current population.
/// Prints element in binary and decimal representations.
/// If there was no updates, just copies last best element.
fn _if_update_best(&mut self, epoch: usize) {
let mut update_best = false;
let best_ind_temp;
if self.minimize {
// argmin function
best_ind_temp = self
.scores
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| a.total_cmp(b))
.map(|(index, _)| index)
.unwrap();
if self.scores[best_ind_temp] < self.best_score {
update_best = true;
}
} else {
// argmax function
best_ind_temp = self
.scores
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.total_cmp(b))
.map(|(index, _)| index)
.unwrap();
if self.scores[best_ind_temp] > self.best_score {
update_best = true;
}
}
if update_best {
self.best_score = self.scores[best_ind_temp];
self.best_scores.push(self.best_score);
self.best_decodes.push(self.decoded[best_ind_temp].clone());
println!(
"Epoch {epoch}. The best element now is: {best_ind} with value: {best_score}.",
epoch = epoch,
best_ind = best_ind_temp,
best_score = self.best_score
);
// println!("Population: {:?}", self.pop);
println!(
"Best element itself in binary code:\n{best_pop:?}\n and decode in decimal:\n{best_dec:?}",
best_pop = self.pop[best_ind_temp],
best_dec = self.decoded.as_slice().last().unwrap()
);
println!("------------------------------------------------------------------");
} else {
self.best_scores
.push(self.best_scores.as_slice().last().cloned().unwrap());
self.best_decodes
.push(self.best_decodes.as_slice().last().cloned().unwrap());
}
}
/// Start of evaluation. Connects all of the modules from struct.
/// Initializes population, decodes and scores, subsequently iterates over the `epochs` evaluating
/// `GeneticAlgorithm` using `selection`, `crossover` and `mutation` functions.
///
/// # Parameters
///
/// * `cost_function`: `fn(&Vec<Vec<f32>>) -> Vec<f32>` — function for evaluation
/// of team management
pub fn simulate(&mut self, cost_function: fn(f32, f32) -> f32) {
if self.pop.len() == 0 {
// initializing if we don't have initial population before
self.pop = self._pop_init(); // getting binary bitstrings
}
// initializing
self.decoded = self.decode(); // converting binary to decimals
self.scores = self._fold_2d_function(cost_function); // getting scores
self.best_score = self.scores[0];
self.best_scores.push(self.best_score);
self.best_decodes.push(self.decoded[0].clone());
for epoch in 0..self.n_epochs {
// main cycle
let parents = self.selection();
let children = self.crossover(&parents);
let mutated = self.mutation(&children);
// recalculation
self.pop = mutated;
self.decoded = self.decode();
self.scores = self._fold_2d_function(cost_function);
// update
self._if_update_best(epoch);
}
}
}
I'm using Deserialize
and Serialize
from serde
for possibility to change parameters using console (also using serde_json
).
Guesses:
- Probably not the best idea to use
derivative
crate to imitate default parameters in rust. - Think to get rid of generics — as using this functions for determined types.
- Not sure which way is more correct to use when iterating over something.
for
loop seems to be more expensive since it checks for borders, butmap
is hard to read and understand. - How to correctly pass the parameters — is it better to use refernce all of the time (
&
) to avoid making the copy (it may be a long vector so not effective)? - How better to pass
Vec<T>
as a parameter? Like&[T]
as it will be converted to the slice or just as a&Vec<T>
?