Source code for modopt.core.benchmarking
import warnings
import numpy as np
import time
try:
import matplotlib as mpl
import matplotlib.pyplot as plt
except ImportError:
warnings.warn("matplotlib not found, plotting disabled.")
plt = None
# import seaborn as sns
# import matplotlib.pyplot as plt
# sns.set_theme(rc={'text.usetex': True})
# sns.set_style("ticks")
[docs]def generate_performance_profiles(data):
'''
Compute performance profiles and return them along
with their corresponding performance ratio (`Tau`) values.
Depending on the input data, the function returns either two or four outputs:
- If `'nev'` exists in `list(data.values())[0]`:
- `Tau` (np.ndarray): Array of `Tau` values for the primary (time) performance profiles.
- `performance_profiles` (np.ndarray): Performance profiles corresponding to `Tau`.
- `Tau_n` (np.ndarray): Array of `Tau` values for secondary (nev) performance profiles.
- `performance_profiles_n` (np.ndarray): Performance profiles corresponding to `Tau_n`.
- Otherwise:
- `Tau` (np.ndarray): Array of `Tau` values for the primary (time) performance profiles.
- `performance_profiles` (np.ndarray): Performance profiles corresponding to `Tau`.
Parameters
----------
data : dict
Dictionary containing the performance data for each solver.
The keys are the (problem_name: str, solver_name: str) and the values are
dictionaries containing `'time'` and `'success'` as keys with corresponding
values denoting the time (`float`) taken for `solver_name` to solve `problem_name`
and the success (`bool`) of the solver.
Additionally, if the number of evaluations is available,
the dictionary can also contain `'nev'` as a key with
the corresponding `int` value denoting the number of evaluations.
Returns
-------
Tau : numpy.ndarray
Array of log-scaled performance ratios.
performance_profiles : dict
Dictionary containing the performance profiles for each solver.
The keys are the solver names and the values are the proportion of problems
solved under the performance ratio corresponding to entries in Tau.
Tau_n : numpy.ndarray
Array of log-scaled performance ratios for the number of evaluations.
Only returned if the number of evaluations `'nev'` is available in the data.
performance_profiles_n : dict
Dictionary containing the performance profiles for the number of evaluations.
The keys are the solver names and the values are the proportion of problems
solved under the performance ratio corresponding to entries in Tau_n.
Only returned if the number of evaluations `'nev'` is available in the data.
Examples
--------
>>> from modopt.benchmarking import generate_performance_profiles
>>> data = {('problem1', 'solver1'): {'time': 0.1, 'success': True},
... ('problem1', 'solver2'): {'time': 0.2, 'success': True},
... ('problem2', 'solver1'): {'time': 0.3, 'success': True},
... ('problem2', 'solver2'): {'time': 0.4, 'success': False}}
>>> Tau, performance_profiles = generate_performance_profiles(data)
Total number of problems: 2
<BLANKLINE>
Solver: solver1
--------------------------------------------------
Number of problems solved: 2
Percentage of problems solved: 100.0
--------------------------------------------------
<BLANKLINE>
Solver: solver2
--------------------------------------------------
Number of problems solved: 1
Percentage of problems solved: 50.0
--------------------------------------------------
<BLANKLINE>
>>> print(Tau) # doctest: +SKIP
'''
# Get the unique solvers and problems
solvers = np.unique([key[1] for key in data.keys()])
problems = np.unique([key[0] for key in data.keys()])
# Get the minimum time taken by any solver for a given problem
min_times = {}
for problem in problems:
successful_times = [data[(problem, solver)]['time'] for solver in solvers if data[(problem, solver)]['success']]
if successful_times == []:
min_times[problem] = 1.0 # Put any non-zero value since all solvers will be using a large time
else:
min_times[problem] = np.min(successful_times)
if min_times[problem] == 0:
raise ValueError('Time taken by a successful solver for problem {} is 0.'.format(problem))
# Compute the performance ratio - time
perf_ratio = {}
for solver in solvers:
for problem in problems:
if data[(problem, solver)]['success']:
perf_ratio[(problem, solver)] = data[(problem, solver)]['time'] / min_times[problem]
else:
perf_ratio[(problem, solver)] = np.inf
# Get the maximum performance ratio over all problems
successful_perf_ratios = [value for value in perf_ratio.values() if value != np.inf]
if successful_perf_ratios == []:
raise ValueError('All solvers failed on all problems.')
max_perf_ratio = np.max(successful_perf_ratios)
# Replace inf with 10 * max_perf_ratio
# perf_ratio = {key: 10 * max_perf_ratio if value == np.inf else value for key, value in perf_ratio.items()}
for key, value in perf_ratio.items():
if value == np.inf:
perf_ratio[key] = 10 * max_perf_ratio
# Compute the performance ratio - number of evaluations
if 'nev' in data[(problems[0], solvers[0])]:
min_nevs = {}
for problem in problems:
successful_nevs = [data[(problem, solver)]['nev'] for solver in solvers if data[(problem, solver)]['success']]
if successful_nevs == []:
min_nevs[problem] = 1 # Put any non-zero value since all solvers will be using a large nev
else:
min_nevs[problem] = np.min(successful_nevs)
if min_nevs[problem] == 0:
raise ValueError('Number of evaluations by a successful solver for problem {} is 0.'.format(problem))
perf_ratio_n = {}
for solver in solvers:
for problem in problems:
if data[(problem, solver)]['success']:
perf_ratio_n[(problem, solver)] = data[(problem, solver)]['nev'] / min_nevs[problem]
else:
perf_ratio_n[(problem, solver)] = np.inf
# The following block is redundant since this is already done for time
# successful_perf_ratios_n = [value for value in perf_ratio_n.values() if value != np.inf]
# if successful_perf_ratios_n == []:
# raise ValueError('All solvers failed on all problems.')
max_perf_ratio_n = np.max([value for value in perf_ratio_n.values() if value != np.inf])
# Replace inf with 10 * max_perf_ratio_n
for key, value in perf_ratio_n.items():
if value == np.inf:
perf_ratio_n[key] = 10 * max_perf_ratio_n
def performance_function(Tau, perf_ratio):
performance_profiles = {}
for solver in solvers:
performance_profiles[solver] = []
for t in Tau:
# Number of problems solved under tau performance ratio
n_solved = np.sum([1 if np.log2(value) <= t else 0 for key, value in perf_ratio.items() if key[1] == solver])
performance_profiles[solver].append(n_solved / len(problems))
return performance_profiles
Tau = np.linspace(0, np.log2(max_perf_ratio*10), 100)[:-1] # upper bound 10*max_perf_ratio needs to be omitted
performance_profiles = performance_function(Tau, perf_ratio)
print('Total number of problems:', len(problems), '\n')
for solver in solvers:
print('Solver:', solver)
print('-'*50)
print('Number of problems solved:', round(performance_profiles[solver][-2]*len(problems)))
print('Percentage of problems solved:', performance_profiles[solver][-2]*100)
print('-'*50, '\n')
if 'nev' in data[(problems[0], solvers[0])]:
Tau_n = np.linspace(0, np.log2(max_perf_ratio_n*10), 100)[:-1] # upper bound 10*max_perf_ratio_n needs to be omitted
performance_profiles_n = performance_function(Tau_n, perf_ratio_n)
return Tau, performance_profiles, Tau_n, performance_profiles_n
return Tau, performance_profiles
[docs]def plot_performance_profiles(data, save_figname='performance.pdf'):
'''
Plot the performance profiles for the given data.
Parameters
----------
data : dict
Dictionary containing the performance data for each solver.
The keys are the (problem_name: str, solver_name: str) and the values are
dictionaries containing `'time'` and `'success'` as keys with corresponding
values denoting the time (`float`) taken for `solver_name` to solve `problem_name`
and the success (`bool`) of the solver.
Additionally, if the number of evaluations is available,
the dictionary can also contain `'nev'` as a key with
the corresponding `int` value denoting the number of evaluations.
save_figname : str, default='performance.pdf'
Path to save the plot with the performance profiles.
If the number of evaluations is available in `data`,
the performance profiles for the number of evaluations is also plotted
and saved to the path with `save_figname` appended with `_nev` before the extension.
Examples
--------
>>> from modopt.benchmarking import plot_performance_profiles
>>> data = {('problem1', 'solver1'): {'time': 0.1, 'success': True},
... ('problem1', 'solver2'): {'time': 0.2, 'success': True},
... ('problem2', 'solver1'): {'time': 0.3, 'success': True},
... ('problem2', 'solver2'): {'time': 0.4, 'success': False}}
>>> plot_performance_profiles(data, save_figname='performance.pdf')
Total number of problems: 2
<BLANKLINE>
Solver: solver1
--------------------------------------------------
Number of problems solved: 2
Percentage of problems solved: 100.0
--------------------------------------------------
<BLANKLINE>
Solver: solver2
--------------------------------------------------
Number of problems solved: 1
Percentage of problems solved: 50.0
--------------------------------------------------
<BLANKLINE>
'''
if plt is None:
raise ImportError("matplotlib not found, cannot plot performance profile.")
plt.rcParams['xtick.labelsize']=20
plt.rcParams['ytick.labelsize']=20
fig, ax = plt.subplots()
ax.set_title('Performance Profile (time)', fontsize=24)
ax.set_xlabel('Logarithmic performance ratio, $log_2(\\tau)$', fontsize=24)
ax.set_ylabel('Proportion of problems solved', fontsize=24)
if 'nev' not in data[(list(data.keys())[0][0], list(data.keys())[0][1])]:
Tau, performance_profiles = generate_performance_profiles(data)
else:
Tau, performance_profiles, Tau_n, performance_profiles_n = generate_performance_profiles(data)
for solver, profile in performance_profiles.items():
ax.plot(Tau, profile, label=solver, linewidth = 2.0)
ax.legend(fontsize=18)
ax.set_xlim([0., Tau[-1]])
ax.set_ylim([0., 1.])
plt.minorticks_off()
fig.set_size_inches(8, 6)
plt.savefig(save_figname, bbox_inches='tight')
plt.show()
if 'nev' in data[(list(data.keys())[0][0], list(data.keys())[0][1])]:
fig, ax = plt.subplots()
ax.set_title('Data Profile (function evaluations)', fontsize=24)
ax.set_xlabel('Logarithmic performance ratio, $log_2(\\tau)$', fontsize=24)
ax.set_ylabel('Proportion of problems solved', fontsize=24)
for solver, profile in performance_profiles_n.items():
ax.plot(Tau_n, profile, label=solver, linewidth = 2.0)
ax.legend(fontsize=18)
ax.set_xlim([0., Tau_n[-1]])
ax.set_ylim([0., 1.])
plt.minorticks_off()
fig.set_size_inches(8, 6)
plt.savefig(save_figname.replace('.pdf', '_nev.pdf'), bbox_inches='tight')
plt.show()
[docs]def filter_cutest_problems(num_vars=[0,1], num_cons=[0,0], tags=[], return_metadata=False):
'''
Filter CUTEst problems based on the number of variables, number of constraints, and problem tags.
Parameters
----------
num_vars : list, default=[0,1]
List of two integers denoting the minimum and maximum number of variables.
num_cons : list, default=[0,0]
List of two integers denoting the minimum and maximum number of constraints.
tags : list, default=[]
List of tags indicating the desired optimization problem types.
For example, `['UC']` for unconstrained problems, `['BC']` for bound-constrained problems,
`['QP']` for quadratic programming problems, `['LP']` for linear programming problems,
`['FP']` for feasible point problems, and any valid combination of these tags
for problems that satisfy multiple criteria.
A problem is selected if it contains all the specified tags.
return_metadata : bool, default=False
If True, also return a dictionary containing the metadata of the filtered problems.
Returns
-------
filtered_problems : list
List of CUTEst problems that have the specified tags and
the number of variables and constraints within the specified range.
metadata : dict, optional
Dictionary containing the metadata of the filtered problems.
The keys are the filtered problem names and the values are dictionaries
containing the number of variables, number of constraints, tags,
and the cutest classification string for the corresponding filtered problem.
Only returned if `return_metadata` is True.
Examples
--------
>>> from modopt.benchmarking import filter_cutest_problems
>>> problem_names = filter_cutest_problems(num_vars=[1,5], num_cons=[1,1])
>>> len(problem_names)
45
>>> pnames, metadata = filter_cutest_problems(num_vars=[1,2], tags=['BC'], return_metadata=True)
>>> len(pnames)
24
>>> pnames[0]
'BQP1VAR'
>>> metadata[pnames[0]]
{'nx': 1, 'nc': 0, 'tags': ['BC', 'QP'], 'cutest_cls': 'QBR2-AN-1-0'}
'''
import os
here = os.path.abspath(os.path.dirname(__file__))
file_path = os.path.join(here, '../../docs/src/benchmarking/cutest_problem_table.md')
file_path = os.path.normpath(file_path)
# Load the dataset from the Markdown file
with open(file_path, 'r') as file:
lines = file.readlines()
# Extract the table part from the Markdown file
table_lines = [line.strip() for line in lines if line.strip() and "|" in line]
header_line = table_lines[0]
data_lines = table_lines[2:] # Skip header and separator lines
header = [col.strip() for col in header_line.split('|') if col]
data = [[col.strip() for col in line.split('|') if col] for line in data_lines]
# Filter problems based on the table data
filtered_problems = []
metadata = {}
for line in data:
name, nx, nc = line[1], int(line[2]), int(line[3])
ptags = [item.strip(" '") for item in line[4].split(',')]
ptags = [] if ptags==[''] else ptags
cutest_cls = line[5]
if nx >= num_vars[0] and nx <= num_vars[1] and \
nc >= num_cons[0] and nc <= num_cons[1] and \
set(tags).issubset(ptags):
filtered_problems.append(name)
metadata[name] = {'nx': nx, 'nc': nc, 'tags': ptags, 'cutest_cls': cutest_cls}
if return_metadata:
return filtered_problems, metadata
return filtered_problems
if __name__ == "__main__":
import doctest
doctest.testmod()