Reproducibility 4: figure 5

Comparison between different models
See folder: models_comp for detailed calculation

Import packages

[ ]:

import seaborn as sns
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
from scipy.stats import pearsonr
from sklearn.metrics import r2_score
from numpy import mean, logspace, std
from numpy.random import choice, seed
import matplotlib.pyplot as plt
from multiprocessing import Pool
from dlim.utils import Data_model
import joblib
import numpy as np
import pandas as pd

[ ]:

fig, ax = plt.subplots(1, figsize = (5, 2.5))
df_data = pd.read_csv("../data/data_env_1.csv", sep = ',', header = None)


data = Data_model(data=df_data, n_variables=2)
color1 = '#e78ac3'  # pink
color2 = '#ffa500'  # modern orange
color3 = '#607d8b'  # light gray-blue
color4 = '#8da0cb' #
colors = [color1, color2, color3, color4]

colors += ['#41afaa', 'k']
all_result = {}
# Save all the accuracy results into data_dict
data_dict = {
    'model': [],
    'acc': [],
    'data_name': []
    }
for id_data, data_name in enumerate(['fitness', 'epistasis']):
    if data_name == 'fitness':
        plot_f = 'left'

    else:
        plot_f = 'right'
#   'LANTERN'
    for id, model in enumerate(['D-LIM', 'LR', 'ALM',  'LANTERN', 'MAVE-NN']):

        if model != 'D-LIM':
            result_nn_reg = joblib.load('../models_comp/results/harry_' + str(data_name) + '/reg_' + str(model) + '_frac_1.0.joblib')
        else:
            result_nn_reg = joblib.load('./results/harry_' + str(data_name) + '/reg_' + str(model) + '_frac_1.0.joblib')



        res_w = result_nn_reg[1.0]
        data_dict['acc'] += res_w
        if model == 'dlim':
            model = 'D-LIM'
        data_dict['model'] += [str(model) for el in res_w]
        data_dict['data_name'] += [str(data_name) for el in res_w]



lgd = fig.legend(frameon=False, ncol = 5, loc='lower center',bbox_to_anchor=(0.5, -0.05, 0, 0),)
df = pd.DataFrame.from_dict(data_dict)
axe = sns.violinplot(data=df, x="model", y="acc", hue="data_name",  palette=["C0", "C1"], ax=ax, inner=None)

axe.legend(loc = 'lower right')
# Remove the top and right spines
axe.spines["top"].set_visible(False)
axe.spines["right"].set_visible(False)
# statistical annotation
axe.set_ylim([0.85, 1.0])
axe.set_xlabel(None, fontsize = 10)
axe.set_ylabel(f"$\\rho$", fontsize = 10)
fig.tight_layout()
fig.savefig('S5b_model_comp.svg', dpi = 300, transparent = True, bbox_extra_artists=(lgd,), bbox_inches='tight')
fig.savefig('S5b_model_comp.png', dpi = 300, transparent = True, bbox_extra_artists=(lgd,), bbox_inches='tight')
fig.show()

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
/home/swang/miniconda3/envs/drug/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):
/home/swang/miniconda3/envs/drug/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):
/home/swang/miniconda3/envs/drug/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):
/home/swang/miniconda3/envs/drug/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):

../_images/reproducibility_figure5_3_1.png

[24]:

df_stast = df.groupby(['model', 'data_name'])['acc'].agg(['mean', 'std', 'median']).set_axis(['mean', 'std', 'median'], axis=1).reset_index()
df_stast.to_csv('results_modle_comp.csv', index = None)

[25]:

df_stast

[25]:

	model	data_name	mean	std	median
0	ALM	epistasis	0.976804	0.002238	0.977143
1	ALM	fitness	0.989807	0.003854	0.990763
2	D-LIM	epistasis	0.979625	0.004014	0.980168
3	D-LIM	fitness	0.982860	0.004803	0.983347
4	LANTERN	epistasis	0.975455	0.002067	0.975712
5	LANTERN	fitness	0.984628	0.002625	0.984317
6	LR	epistasis	0.931366	0.009481	0.930844
7	LR	fitness	0.970674	0.004354	0.970719
8	MAVE-NN	epistasis	0.970627	0.005645	0.972682
9	MAVE-NN	fitness	0.979475	0.002903	0.979280

[ ]: