{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"X = pd.read_csv('xtraining_SC_GGP_AXA_FR.csv')\n",
"y = pd.read_csv('ytraining_SC_GGP_AXA_FR.csv')\n",
"Xscore = pd.read_csv('xscoring_SC_GGP_AXA_FR.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" target\n",
"count 1000.000000\n",
"mean 0.170000\n",
"std 0.375821\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 1.000000"
],
"text/html": "
\n\n
\n \n \n | \n target | \n
\n \n \n \n count | \n 1000.000000 | \n
\n \n mean | \n 0.170000 | \n
\n \n std | \n 0.375821 | \n
\n \n min | \n 0.000000 | \n
\n \n 25% | \n 0.000000 | \n
\n \n 50% | \n 0.000000 | \n
\n \n 75% | \n 0.000000 | \n
\n \n max | \n 1.000000 | \n
\n \n
\n
"
},
"metadata": {},
"execution_count": 3
}
],
"source": [
"y.describe()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" age surface montant_assure \\\n",
"count 1000.000000 1000.000000 1000.000000 \n",
"mean 42.198000 86.863400 22461.069000 \n",
"std 14.710853 28.498626 9076.273609 \n",
"min 15.000000 31.270000 4478.000000 \n",
"25% 31.000000 66.235000 15574.500000 \n",
"50% 42.000000 80.605000 21201.000000 \n",
"75% 52.000000 104.842500 27597.000000 \n",
"max 94.000000 179.780000 55852.000000 \n",
"\n",
" nb_sinistres_depuis_ouverture cotisation_n-1 cotisation_n \\\n",
"count 1000.000000 1000.000000 1000.000000 \n",
"mean 1.492000 272.762170 286.046570 \n",
"std 0.954383 76.378759 82.097481 \n",
"min 0.000000 175.380000 181.360000 \n",
"25% 1.000000 211.842500 220.230000 \n",
"50% 1.000000 227.525000 240.705000 \n",
"75% 2.000000 360.062500 372.647500 \n",
"max 4.000000 440.870000 510.490000 \n",
"\n",
" duree_contrat duree_dernier_sinistre duree_zero_sinistre \\\n",
"count 1000.000000 1000.000000 1000.000000 \n",
"mean 4883.704000 40.023000 3444.616000 \n",
"std 2716.954916 41.793913 2826.615494 \n",
"min 49.000000 0.000000 0.000000 \n",
"25% 2637.750000 1.000000 565.000000 \n",
"50% 4819.000000 29.000000 3111.500000 \n",
"75% 7353.250000 68.000000 6037.000000 \n",
"max 9526.000000 202.000000 9429.000000 \n",
"\n",
" depa_01 ... cate_Artisans, cate_Cadres cate_Employés \\\n",
"count 1000.000000 ... 1000.000000 1000.000000 1000.000000 \n",
"mean 0.009000 ... 0.174000 0.162000 0.341000 \n",
"std 0.094488 ... 0.379299 0.368635 0.474283 \n",
"min 0.000000 ... 0.000000 0.000000 0.000000 \n",
"25% 0.000000 ... 0.000000 0.000000 0.000000 \n",
"50% 0.000000 ... 0.000000 0.000000 0.000000 \n",
"75% 0.000000 ... 0.000000 0.000000 1.000000 \n",
"max 1.000000 ... 1.000000 1.000000 1.000000 \n",
"\n",
" cate_Inactifs cate_Ouvriers type_Appartement type_Maison \\\n",
"count 1000.000000 1000.000000 1000.00000 1000.00000 \n",
"mean 0.067000 0.125000 0.63400 0.36600 \n",
"std 0.250147 0.330884 0.48195 0.48195 \n",
"min 0.000000 0.000000 0.00000 0.00000 \n",
"25% 0.000000 0.000000 0.00000 0.00000 \n",
"50% 0.000000 0.000000 1.00000 0.00000 \n",
"75% 0.000000 0.000000 1.00000 1.00000 \n",
"max 1.000000 1.000000 1.00000 1.00000 \n",
"\n",
" stat_Locataire stat_Occupant stat_Propriétaire \n",
"count 1000.000000 1000.000000 1000.000000 \n",
"mean 0.586000 0.028000 0.386000 \n",
"std 0.492795 0.165055 0.487074 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 \n",
"50% 1.000000 0.000000 0.000000 \n",
"75% 1.000000 0.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 \n",
"\n",
"[8 rows x 121 columns]"
],
"text/html": "\n\n
\n \n \n | \n age | \n surface | \n montant_assure | \n nb_sinistres_depuis_ouverture | \n cotisation_n-1 | \n cotisation_n | \n duree_contrat | \n duree_dernier_sinistre | \n duree_zero_sinistre | \n depa_01 | \n ... | \n cate_Artisans, | \n cate_Cadres | \n cate_Employés | \n cate_Inactifs | \n cate_Ouvriers | \n type_Appartement | \n type_Maison | \n stat_Locataire | \n stat_Occupant | \n stat_Propriétaire | \n
\n \n \n \n count | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n ... | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.00000 | \n 1000.00000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n
\n \n mean | \n 42.198000 | \n 86.863400 | \n 22461.069000 | \n 1.492000 | \n 272.762170 | \n 286.046570 | \n 4883.704000 | \n 40.023000 | \n 3444.616000 | \n 0.009000 | \n ... | \n 0.174000 | \n 0.162000 | \n 0.341000 | \n 0.067000 | \n 0.125000 | \n 0.63400 | \n 0.36600 | \n 0.586000 | \n 0.028000 | \n 0.386000 | \n
\n \n std | \n 14.710853 | \n 28.498626 | \n 9076.273609 | \n 0.954383 | \n 76.378759 | \n 82.097481 | \n 2716.954916 | \n 41.793913 | \n 2826.615494 | \n 0.094488 | \n ... | \n 0.379299 | \n 0.368635 | \n 0.474283 | \n 0.250147 | \n 0.330884 | \n 0.48195 | \n 0.48195 | \n 0.492795 | \n 0.165055 | \n 0.487074 | \n
\n \n min | \n 15.000000 | \n 31.270000 | \n 4478.000000 | \n 0.000000 | \n 175.380000 | \n 181.360000 | \n 49.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.00000 | \n 0.00000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 25% | \n 31.000000 | \n 66.235000 | \n 15574.500000 | \n 1.000000 | \n 211.842500 | \n 220.230000 | \n 2637.750000 | \n 1.000000 | \n 565.000000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.00000 | \n 0.00000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 50% | \n 42.000000 | \n 80.605000 | \n 21201.000000 | \n 1.000000 | \n 227.525000 | \n 240.705000 | \n 4819.000000 | \n 29.000000 | \n 3111.500000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 1.00000 | \n 0.00000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 75% | \n 52.000000 | \n 104.842500 | \n 27597.000000 | \n 2.000000 | \n 360.062500 | \n 372.647500 | \n 7353.250000 | \n 68.000000 | \n 6037.000000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n 1.00000 | \n 1.00000 | \n 1.000000 | \n 0.000000 | \n 1.000000 | \n
\n \n max | \n 94.000000 | \n 179.780000 | \n 55852.000000 | \n 4.000000 | \n 440.870000 | \n 510.490000 | \n 9526.000000 | \n 202.000000 | \n 9429.000000 | \n 1.000000 | \n ... | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.00000 | \n 1.00000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n
\n \n
\n
8 rows × 121 columns
\n
"
},
"metadata": {},
"execution_count": 4
}
],
"source": [
"X.describe()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" age surface montant_assure nb_sinistres_depuis_ouverture \\\n",
"count 300.00000 300.000000 300.000000 300.000000 \n",
"mean 42.59000 86.704767 22894.856667 1.466667 \n",
"std 14.62949 30.453970 9374.614330 0.992727 \n",
"min 15.00000 29.360000 7217.000000 0.000000 \n",
"25% 31.00000 63.672500 15281.750000 1.000000 \n",
"50% 43.00000 78.530000 20810.500000 1.000000 \n",
"75% 54.00000 107.357500 28494.750000 2.000000 \n",
"max 86.00000 169.560000 50948.000000 5.000000 \n",
"\n",
" cotisation_n-1 cotisation_n duree_contrat duree_dernier_sinistre \\\n",
"count 300.000000 300.000000 300.000000 300.000000 \n",
"mean 274.989200 288.086200 4810.156667 34.820000 \n",
"std 80.104622 86.567491 2673.566074 39.671826 \n",
"min 172.470000 180.700000 88.000000 0.000000 \n",
"25% 212.580000 221.105000 2521.000000 1.000000 \n",
"50% 227.590000 240.395000 5101.000000 19.500000 \n",
"75% 364.185000 370.237500 7099.500000 62.000000 \n",
"max 450.560000 599.500000 9530.000000 189.000000 \n",
"\n",
" duree_zero_sinistre depa_01 ... cate_Artisans, cate_Cadres \\\n",
"count 300.000000 300.000000 ... 300.000000 300.000000 \n",
"mean 3174.016667 0.010000 ... 0.156667 0.186667 \n",
"std 2745.057079 0.099665 ... 0.364094 0.390295 \n",
"min 0.000000 0.000000 ... 0.000000 0.000000 \n",
"25% 472.500000 0.000000 ... 0.000000 0.000000 \n",
"50% 2566.500000 0.000000 ... 0.000000 0.000000 \n",
"75% 5547.250000 0.000000 ... 0.000000 0.000000 \n",
"max 9019.000000 1.000000 ... 1.000000 1.000000 \n",
"\n",
" cate_Employés cate_Inactifs cate_Ouvriers type_Appartement \\\n",
"count 300.000000 300.000000 300.000000 300.000000 \n",
"mean 0.370000 0.023333 0.116667 0.626667 \n",
"std 0.483611 0.151212 0.321559 0.484498 \n",
"min 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 1.000000 \n",
"75% 1.000000 0.000000 0.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" type_Maison stat_Locataire stat_Occupant stat_Propriétaire \n",
"count 300.000000 300.000000 300.000000 300.000000 \n",
"mean 0.373333 0.560000 0.016667 0.423333 \n",
"std 0.484498 0.497216 0.128233 0.494913 \n",
"min 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 0.000000 \n",
"50% 0.000000 1.000000 0.000000 0.000000 \n",
"75% 1.000000 1.000000 0.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
"[8 rows x 121 columns]"
],
"text/html": "\n\n
\n \n \n | \n age | \n surface | \n montant_assure | \n nb_sinistres_depuis_ouverture | \n cotisation_n-1 | \n cotisation_n | \n duree_contrat | \n duree_dernier_sinistre | \n duree_zero_sinistre | \n depa_01 | \n ... | \n cate_Artisans, | \n cate_Cadres | \n cate_Employés | \n cate_Inactifs | \n cate_Ouvriers | \n type_Appartement | \n type_Maison | \n stat_Locataire | \n stat_Occupant | \n stat_Propriétaire | \n
\n \n \n \n count | \n 300.00000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n ... | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n
\n \n mean | \n 42.59000 | \n 86.704767 | \n 22894.856667 | \n 1.466667 | \n 274.989200 | \n 288.086200 | \n 4810.156667 | \n 34.820000 | \n 3174.016667 | \n 0.010000 | \n ... | \n 0.156667 | \n 0.186667 | \n 0.370000 | \n 0.023333 | \n 0.116667 | \n 0.626667 | \n 0.373333 | \n 0.560000 | \n 0.016667 | \n 0.423333 | \n
\n \n std | \n 14.62949 | \n 30.453970 | \n 9374.614330 | \n 0.992727 | \n 80.104622 | \n 86.567491 | \n 2673.566074 | \n 39.671826 | \n 2745.057079 | \n 0.099665 | \n ... | \n 0.364094 | \n 0.390295 | \n 0.483611 | \n 0.151212 | \n 0.321559 | \n 0.484498 | \n 0.484498 | \n 0.497216 | \n 0.128233 | \n 0.494913 | \n
\n \n min | \n 15.00000 | \n 29.360000 | \n 7217.000000 | \n 0.000000 | \n 172.470000 | \n 180.700000 | \n 88.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 25% | \n 31.00000 | \n 63.672500 | \n 15281.750000 | \n 1.000000 | \n 212.580000 | \n 221.105000 | \n 2521.000000 | \n 1.000000 | \n 472.500000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 50% | \n 43.00000 | \n 78.530000 | \n 20810.500000 | \n 1.000000 | \n 227.590000 | \n 240.395000 | \n 5101.000000 | \n 19.500000 | \n 2566.500000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 75% | \n 54.00000 | \n 107.357500 | \n 28494.750000 | \n 2.000000 | \n 364.185000 | \n 370.237500 | \n 7099.500000 | \n 62.000000 | \n 5547.250000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 0.000000 | \n 1.000000 | \n
\n \n max | \n 86.00000 | \n 169.560000 | \n 50948.000000 | \n 5.000000 | \n 450.560000 | \n 599.500000 | \n 9530.000000 | \n 189.000000 | \n 9019.000000 | \n 1.000000 | \n ... | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n
\n \n
\n
8 rows × 121 columns
\n
"
},
"metadata": {},
"execution_count": 5
}
],
"source": [
"Xscore.describe()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import MinMaxScaler\n",
"\n",
"scaler = MinMaxScaler()\n",
"X = scaler.fit_transform(X)\n",
"Xscore = scaler.transform(Xscore)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" 0 1 2 3 4 \\\n",
"count 1000.000000 1000.000000 1000.000000 1000.000000 1000.000000 \n",
"mean 0.344278 0.374341 0.350042 0.373000 0.366802 \n",
"std 0.186213 0.191897 0.176671 0.238596 0.287690 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.202532 0.235439 0.215994 0.250000 0.137340 \n",
"50% 0.341772 0.332200 0.325515 0.250000 0.196410 \n",
"75% 0.468354 0.495404 0.450014 0.500000 0.695629 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" 5 6 7 8 9 ... \\\n",
"count 1000.000000 1000.000000 1000.000000 1000.000000 1000.000000 ... \n",
"mean 0.318071 0.510151 0.198134 0.365321 0.009000 ... \n",
"std 0.249438 0.286689 0.206901 0.299779 0.094488 ... \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 ... \n",
"25% 0.118099 0.273161 0.004950 0.059922 0.000000 ... \n",
"50% 0.180309 0.503324 0.143564 0.329993 0.000000 ... \n",
"75% 0.581191 0.770734 0.336634 0.640259 0.000000 ... \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 ... \n",
"\n",
" 111 112 113 114 115 \\\n",
"count 1000.000000 1000.000000 1000.000000 1000.000000 1000.000000 \n",
"mean 0.174000 0.162000 0.341000 0.067000 0.125000 \n",
"std 0.379299 0.368635 0.474283 0.250147 0.330884 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"75% 0.000000 0.000000 1.000000 0.000000 0.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" 116 117 118 119 120 \n",
"count 1000.00000 1000.00000 1000.000000 1000.000000 1000.000000 \n",
"mean 0.63400 0.36600 0.586000 0.028000 0.386000 \n",
"std 0.48195 0.48195 0.492795 0.165055 0.487074 \n",
"min 0.00000 0.00000 0.000000 0.000000 0.000000 \n",
"25% 0.00000 0.00000 0.000000 0.000000 0.000000 \n",
"50% 1.00000 0.00000 1.000000 0.000000 0.000000 \n",
"75% 1.00000 1.00000 1.000000 0.000000 1.000000 \n",
"max 1.00000 1.00000 1.000000 1.000000 1.000000 \n",
"\n",
"[8 rows x 121 columns]"
],
"text/html": "\n\n
\n \n \n | \n 0 | \n 1 | \n 2 | \n 3 | \n 4 | \n 5 | \n 6 | \n 7 | \n 8 | \n 9 | \n ... | \n 111 | \n 112 | \n 113 | \n 114 | \n 115 | \n 116 | \n 117 | \n 118 | \n 119 | \n 120 | \n
\n \n \n \n count | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n ... | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n 1000.00000 | \n 1000.00000 | \n 1000.000000 | \n 1000.000000 | \n 1000.000000 | \n
\n \n mean | \n 0.344278 | \n 0.374341 | \n 0.350042 | \n 0.373000 | \n 0.366802 | \n 0.318071 | \n 0.510151 | \n 0.198134 | \n 0.365321 | \n 0.009000 | \n ... | \n 0.174000 | \n 0.162000 | \n 0.341000 | \n 0.067000 | \n 0.125000 | \n 0.63400 | \n 0.36600 | \n 0.586000 | \n 0.028000 | \n 0.386000 | \n
\n \n std | \n 0.186213 | \n 0.191897 | \n 0.176671 | \n 0.238596 | \n 0.287690 | \n 0.249438 | \n 0.286689 | \n 0.206901 | \n 0.299779 | \n 0.094488 | \n ... | \n 0.379299 | \n 0.368635 | \n 0.474283 | \n 0.250147 | \n 0.330884 | \n 0.48195 | \n 0.48195 | \n 0.492795 | \n 0.165055 | \n 0.487074 | \n
\n \n min | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.00000 | \n 0.00000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 25% | \n 0.202532 | \n 0.235439 | \n 0.215994 | \n 0.250000 | \n 0.137340 | \n 0.118099 | \n 0.273161 | \n 0.004950 | \n 0.059922 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.00000 | \n 0.00000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 50% | \n 0.341772 | \n 0.332200 | \n 0.325515 | \n 0.250000 | \n 0.196410 | \n 0.180309 | \n 0.503324 | \n 0.143564 | \n 0.329993 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 1.00000 | \n 0.00000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 75% | \n 0.468354 | \n 0.495404 | \n 0.450014 | \n 0.500000 | \n 0.695629 | \n 0.581191 | \n 0.770734 | \n 0.336634 | \n 0.640259 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n 1.00000 | \n 1.00000 | \n 1.000000 | \n 0.000000 | \n 1.000000 | \n
\n \n max | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n ... | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.00000 | \n 1.00000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n
\n \n
\n
8 rows × 121 columns
\n
"
},
"metadata": {},
"execution_count": 7
}
],
"source": [
"pd.DataFrame(X).describe()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" 0 1 2 3 4 5 \\\n",
"count 300.000000 300.000000 300.000000 300.000000 300.000000 300.000000 \n",
"mean 0.349241 0.373273 0.358486 0.366667 0.375190 0.324268 \n",
"std 0.185183 0.205063 0.182478 0.248182 0.301724 0.263019 \n",
"min 0.000000 -0.012861 0.053315 0.000000 -0.010961 -0.002005 \n",
"25% 0.202532 0.218184 0.210296 0.250000 0.140118 0.120758 \n",
"50% 0.354430 0.318228 0.317914 0.250000 0.196655 0.179367 \n",
"75% 0.493671 0.512339 0.467488 0.500000 0.711157 0.573869 \n",
"max 0.898734 0.931183 0.904543 1.250000 1.036499 1.270440 \n",
"\n",
" 6 7 8 9 ... 111 \\\n",
"count 300.000000 300.000000 300.000000 300.000000 ... 300.000000 \n",
"mean 0.502391 0.172376 0.336623 0.010000 ... 0.156667 \n",
"std 0.282111 0.196395 0.291129 0.099665 ... 0.364094 \n",
"min 0.004115 0.000000 0.000000 0.000000 ... 0.000000 \n",
"25% 0.260842 0.004950 0.050111 0.000000 ... 0.000000 \n",
"50% 0.533080 0.096535 0.272192 0.000000 ... 0.000000 \n",
"75% 0.743959 0.306931 0.588318 0.000000 ... 0.000000 \n",
"max 1.000422 0.935644 0.956517 1.000000 ... 1.000000 \n",
"\n",
" 112 113 114 115 116 117 \\\n",
"count 300.000000 300.000000 300.000000 300.000000 300.000000 300.000000 \n",
"mean 0.186667 0.370000 0.023333 0.116667 0.626667 0.373333 \n",
"std 0.390295 0.483611 0.151212 0.321559 0.484498 0.484498 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 \n",
"75% 0.000000 1.000000 0.000000 0.000000 1.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" 118 119 120 \n",
"count 300.000000 300.000000 300.000000 \n",
"mean 0.560000 0.016667 0.423333 \n",
"std 0.497216 0.128233 0.494913 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 \n",
"50% 1.000000 0.000000 0.000000 \n",
"75% 1.000000 0.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 \n",
"\n",
"[8 rows x 121 columns]"
],
"text/html": "\n\n
\n \n \n | \n 0 | \n 1 | \n 2 | \n 3 | \n 4 | \n 5 | \n 6 | \n 7 | \n 8 | \n 9 | \n ... | \n 111 | \n 112 | \n 113 | \n 114 | \n 115 | \n 116 | \n 117 | \n 118 | \n 119 | \n 120 | \n
\n \n \n \n count | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n ... | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n 300.000000 | \n
\n \n mean | \n 0.349241 | \n 0.373273 | \n 0.358486 | \n 0.366667 | \n 0.375190 | \n 0.324268 | \n 0.502391 | \n 0.172376 | \n 0.336623 | \n 0.010000 | \n ... | \n 0.156667 | \n 0.186667 | \n 0.370000 | \n 0.023333 | \n 0.116667 | \n 0.626667 | \n 0.373333 | \n 0.560000 | \n 0.016667 | \n 0.423333 | \n
\n \n std | \n 0.185183 | \n 0.205063 | \n 0.182478 | \n 0.248182 | \n 0.301724 | \n 0.263019 | \n 0.282111 | \n 0.196395 | \n 0.291129 | \n 0.099665 | \n ... | \n 0.364094 | \n 0.390295 | \n 0.483611 | \n 0.151212 | \n 0.321559 | \n 0.484498 | \n 0.484498 | \n 0.497216 | \n 0.128233 | \n 0.494913 | \n
\n \n min | \n 0.000000 | \n -0.012861 | \n 0.053315 | \n 0.000000 | \n -0.010961 | \n -0.002005 | \n 0.004115 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 25% | \n 0.202532 | \n 0.218184 | \n 0.210296 | \n 0.250000 | \n 0.140118 | \n 0.120758 | \n 0.260842 | \n 0.004950 | \n 0.050111 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 50% | \n 0.354430 | \n 0.318228 | \n 0.317914 | \n 0.250000 | \n 0.196655 | \n 0.179367 | \n 0.533080 | \n 0.096535 | \n 0.272192 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 75% | \n 0.493671 | \n 0.512339 | \n 0.467488 | \n 0.500000 | \n 0.711157 | \n 0.573869 | \n 0.743959 | \n 0.306931 | \n 0.588318 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 0.000000 | \n 1.000000 | \n
\n \n max | \n 0.898734 | \n 0.931183 | \n 0.904543 | \n 1.250000 | \n 1.036499 | \n 1.270440 | \n 1.000422 | \n 0.935644 | \n 0.956517 | \n 1.000000 | \n ... | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n
\n \n
\n
8 rows × 121 columns
\n
"
},
"metadata": {},
"execution_count": 8
}
],
"source": [
"pd.DataFrame(Xscore).describe()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.2"
]
},
"metadata": {},
"execution_count": 9
}
],
"source": [
"from sklearn.model_selection import train_test_split \n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"len(X_test)/len(X)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" 0 1 2 3 4 5 \\\n",
"count 800.000000 800.000000 800.000000 800.000000 800.000000 800.000000 \n",
"mean 0.344747 0.373978 0.352949 0.368750 0.368131 0.318731 \n",
"std 0.185341 0.189740 0.180912 0.234917 0.286502 0.249066 \n",
"min 0.000000 0.010774 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.202532 0.235439 0.215002 0.250000 0.138178 0.119444 \n",
"50% 0.341772 0.335903 0.328045 0.250000 0.199066 0.180445 \n",
"75% 0.468354 0.488570 0.460938 0.500000 0.694037 0.581207 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" 6 7 8 9 ... 111 \\\n",
"count 800.000000 800.000000 800.000000 800.000000 ... 800.000000 \n",
"mean 0.506860 0.197333 0.358045 0.006250 ... 0.172500 \n",
"std 0.288273 0.201852 0.298163 0.078859 ... 0.378051 \n",
"min 0.001688 0.000000 0.000000 0.000000 ... 0.000000 \n",
"25% 0.266092 0.004950 0.057376 0.000000 ... 0.000000 \n",
"50% 0.499156 0.153465 0.312971 0.000000 ... 0.000000 \n",
"75% 0.768756 0.336634 0.627559 0.000000 ... 0.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 ... 1.000000 \n",
"\n",
" 112 113 114 115 116 117 \\\n",
"count 800.000000 800.000000 800.000000 800.000000 800.00000 800.00000 \n",
"mean 0.165000 0.342500 0.073750 0.122500 0.63375 0.36625 \n",
"std 0.371413 0.474843 0.261527 0.328068 0.48208 0.48208 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.00000 0.00000 \n",
"25% 0.000000 0.000000 0.000000 0.000000 0.00000 0.00000 \n",
"50% 0.000000 0.000000 0.000000 0.000000 1.00000 0.00000 \n",
"75% 0.000000 1.000000 0.000000 0.000000 1.00000 1.00000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.00000 1.00000 \n",
"\n",
" 118 119 120 \n",
"count 800.000000 800.000000 800.000000 \n",
"mean 0.580000 0.027500 0.392500 \n",
"std 0.493867 0.163637 0.488613 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 \n",
"50% 1.000000 0.000000 0.000000 \n",
"75% 1.000000 0.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 \n",
"\n",
"[8 rows x 121 columns]"
],
"text/html": "\n\n
\n \n \n | \n 0 | \n 1 | \n 2 | \n 3 | \n 4 | \n 5 | \n 6 | \n 7 | \n 8 | \n 9 | \n ... | \n 111 | \n 112 | \n 113 | \n 114 | \n 115 | \n 116 | \n 117 | \n 118 | \n 119 | \n 120 | \n
\n \n \n \n count | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n ... | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n 800.00000 | \n 800.00000 | \n 800.000000 | \n 800.000000 | \n 800.000000 | \n
\n \n mean | \n 0.344747 | \n 0.373978 | \n 0.352949 | \n 0.368750 | \n 0.368131 | \n 0.318731 | \n 0.506860 | \n 0.197333 | \n 0.358045 | \n 0.006250 | \n ... | \n 0.172500 | \n 0.165000 | \n 0.342500 | \n 0.073750 | \n 0.122500 | \n 0.63375 | \n 0.36625 | \n 0.580000 | \n 0.027500 | \n 0.392500 | \n
\n \n std | \n 0.185341 | \n 0.189740 | \n 0.180912 | \n 0.234917 | \n 0.286502 | \n 0.249066 | \n 0.288273 | \n 0.201852 | \n 0.298163 | \n 0.078859 | \n ... | \n 0.378051 | \n 0.371413 | \n 0.474843 | \n 0.261527 | \n 0.328068 | \n 0.48208 | \n 0.48208 | \n 0.493867 | \n 0.163637 | \n 0.488613 | \n
\n \n min | \n 0.000000 | \n 0.010774 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.001688 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.00000 | \n 0.00000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 25% | \n 0.202532 | \n 0.235439 | \n 0.215002 | \n 0.250000 | \n 0.138178 | \n 0.119444 | \n 0.266092 | \n 0.004950 | \n 0.057376 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.00000 | \n 0.00000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 50% | \n 0.341772 | \n 0.335903 | \n 0.328045 | \n 0.250000 | \n 0.199066 | \n 0.180445 | \n 0.499156 | \n 0.153465 | \n 0.312971 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 1.00000 | \n 0.00000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 75% | \n 0.468354 | \n 0.488570 | \n 0.460938 | \n 0.500000 | \n 0.694037 | \n 0.581207 | \n 0.768756 | \n 0.336634 | \n 0.627559 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n 1.00000 | \n 1.00000 | \n 1.000000 | \n 0.000000 | \n 1.000000 | \n
\n \n max | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n ... | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.00000 | \n 1.00000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n
\n \n
\n
8 rows × 121 columns
\n
"
},
"metadata": {},
"execution_count": 10
}
],
"source": [
"pd.DataFrame(X_train).describe()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" 0 1 2 3 4 5 \\\n",
"count 200.000000 200.000000 200.000000 200.000000 200.000000 200.000000 \n",
"mean 0.342405 0.375793 0.338414 0.390000 0.361484 0.315430 \n",
"std 0.190125 0.200777 0.158461 0.252674 0.293059 0.251531 \n",
"min 0.000000 0.000000 0.053295 0.000000 0.016498 0.028742 \n",
"25% 0.215190 0.235506 0.231868 0.250000 0.129421 0.111909 \n",
"50% 0.341772 0.319541 0.310498 0.500000 0.185242 0.179564 \n",
"75% 0.443038 0.518955 0.424106 0.500000 0.707051 0.580143 \n",
"max 0.949367 0.982291 0.834294 1.000000 0.890956 0.831921 \n",
"\n",
" 6 7 8 9 ... 111 \\\n",
"count 200.000000 200.000000 200.000000 200.000000 ... 200.000000 \n",
"mean 0.523317 0.201337 0.394428 0.020000 ... 0.180000 \n",
"std 0.280589 0.226485 0.305180 0.140351 ... 0.385152 \n",
"min 0.000000 0.000000 0.000000 0.000000 ... 0.000000 \n",
"25% 0.287802 0.004950 0.077871 0.000000 ... 0.000000 \n",
"50% 0.532605 0.123762 0.390550 0.000000 ... 0.000000 \n",
"75% 0.772475 0.329208 0.671148 0.000000 ... 0.000000 \n",
"max 0.996307 0.930693 0.968607 1.000000 ... 1.000000 \n",
"\n",
" 112 113 114 115 116 117 \\\n",
"count 200.000000 200.000000 200.000000 200.000000 200.000000 200.000000 \n",
"mean 0.150000 0.335000 0.040000 0.135000 0.635000 0.365000 \n",
"std 0.357967 0.473175 0.196451 0.342581 0.482638 0.482638 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 \n",
"75% 0.000000 1.000000 0.000000 0.000000 1.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" 118 119 120 \n",
"count 200.000000 200.000000 200.000000 \n",
"mean 0.610000 0.030000 0.360000 \n",
"std 0.488974 0.171015 0.481205 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 \n",
"50% 1.000000 0.000000 0.000000 \n",
"75% 1.000000 0.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 \n",
"\n",
"[8 rows x 121 columns]"
],
"text/html": "\n\n
\n \n \n | \n 0 | \n 1 | \n 2 | \n 3 | \n 4 | \n 5 | \n 6 | \n 7 | \n 8 | \n 9 | \n ... | \n 111 | \n 112 | \n 113 | \n 114 | \n 115 | \n 116 | \n 117 | \n 118 | \n 119 | \n 120 | \n
\n \n \n \n count | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n ... | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n 200.000000 | \n
\n \n mean | \n 0.342405 | \n 0.375793 | \n 0.338414 | \n 0.390000 | \n 0.361484 | \n 0.315430 | \n 0.523317 | \n 0.201337 | \n 0.394428 | \n 0.020000 | \n ... | \n 0.180000 | \n 0.150000 | \n 0.335000 | \n 0.040000 | \n 0.135000 | \n 0.635000 | \n 0.365000 | \n 0.610000 | \n 0.030000 | \n 0.360000 | \n
\n \n std | \n 0.190125 | \n 0.200777 | \n 0.158461 | \n 0.252674 | \n 0.293059 | \n 0.251531 | \n 0.280589 | \n 0.226485 | \n 0.305180 | \n 0.140351 | \n ... | \n 0.385152 | \n 0.357967 | \n 0.473175 | \n 0.196451 | \n 0.342581 | \n 0.482638 | \n 0.482638 | \n 0.488974 | \n 0.171015 | \n 0.481205 | \n
\n \n min | \n 0.000000 | \n 0.000000 | \n 0.053295 | \n 0.000000 | \n 0.016498 | \n 0.028742 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 25% | \n 0.215190 | \n 0.235506 | \n 0.231868 | \n 0.250000 | \n 0.129421 | \n 0.111909 | \n 0.287802 | \n 0.004950 | \n 0.077871 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 50% | \n 0.341772 | \n 0.319541 | \n 0.310498 | \n 0.500000 | \n 0.185242 | \n 0.179564 | \n 0.532605 | \n 0.123762 | \n 0.390550 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 75% | \n 0.443038 | \n 0.518955 | \n 0.424106 | \n 0.500000 | \n 0.707051 | \n 0.580143 | \n 0.772475 | \n 0.329208 | \n 0.671148 | \n 0.000000 | \n ... | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 0.000000 | \n 1.000000 | \n
\n \n max | \n 0.949367 | \n 0.982291 | \n 0.834294 | \n 1.000000 | \n 0.890956 | \n 0.831921 | \n 0.996307 | \n 0.930693 | \n 0.968607 | \n 1.000000 | \n ... | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n
\n \n
\n
8 rows × 121 columns
\n
"
},
"metadata": {},
"execution_count": 11
}
],
"source": [
"pd.DataFrame(X_test).describe()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" target\n",
"count 800.000000\n",
"mean 0.170000\n",
"std 0.375868\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 1.000000"
],
"text/html": "\n\n
\n \n \n | \n target | \n
\n \n \n \n count | \n 800.000000 | \n
\n \n mean | \n 0.170000 | \n
\n \n std | \n 0.375868 | \n
\n \n min | \n 0.000000 | \n
\n \n 25% | \n 0.000000 | \n
\n \n 50% | \n 0.000000 | \n
\n \n 75% | \n 0.000000 | \n
\n \n max | \n 1.000000 | \n
\n \n
\n
"
},
"metadata": {},
"execution_count": 12
}
],
"source": [
"pd.DataFrame(y_train).describe()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" target\n",
"count 200.000000\n",
"mean 0.170000\n",
"std 0.376575\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 1.000000"
],
"text/html": "\n\n
\n \n \n | \n target | \n
\n \n \n \n count | \n 200.000000 | \n
\n \n mean | \n 0.170000 | \n
\n \n std | \n 0.376575 | \n
\n \n min | \n 0.000000 | \n
\n \n 25% | \n 0.000000 | \n
\n \n 50% | \n 0.000000 | \n
\n \n 75% | \n 0.000000 | \n
\n \n max | \n 1.000000 | \n
\n \n
\n
"
},
"metadata": {},
"execution_count": 13
}
],
"source": [
"pd.DataFrame(y_test).describe()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"target\n0.0 664\n1.0 136\ndtype: int64 target\n1.0 664\n0.0 664\ndtype: int64\n"
]
}
],
"source": [
"from imblearn.over_sampling import SMOTE\n",
"\n",
"sm = SMOTE(random_state=42)\n",
"x_train_res, y_train_res = sm.fit_sample(X_train, y_train)\n",
"\n",
"print(y_train.value_counts(), y_train_res.value_counts())"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" precision recall f1-score support\n\n non résilié 0.93 0.92 0.92 166\n resilié 0.62 0.68 0.65 34\n\n accuracy 0.88 200\n macro avg 0.78 0.80 0.79 200\nweighted avg 0.88 0.88 0.88 200\n\n"
]
}
],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import classification_report\n",
"\n",
"model_RF = RandomForestClassifier(n_estimators=1000)\n",
"model_RF.fit(x_train_res , y_train_res)\n",
"y_pred = model_RF.predict(X_test)\n",
"target_names = ['non résilié', 'resilié'] # 1 résilié, 0 pas résilié\n",
"print(classification_report(y_test, y_pred,target_names=target_names))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" precision recall f1-score support\n\n non résilié 0.94 0.91 0.92 166\n resilié 0.62 0.71 0.66 34\n\n accuracy 0.88 200\n macro avg 0.78 0.81 0.79 200\nweighted avg 0.88 0.88 0.88 200\n\n"
]
}
],
"source": [
"from sklearn.ensemble import GradientBoostingClassifier\n",
"\n",
"model_GB = GradientBoostingClassifier(n_estimators=1000)\n",
"model_GB.fit(x_train_res , y_train_res)\n",
"y_pred = model_GB.predict(X_test)\n",
"target_names = ['non résilié', 'resilié'] \n",
"print(classification_report(y_test, y_pred,target_names=target_names))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" precision recall f1-score support\n\n non résilié 0.93 0.88 0.90 166\n resilié 0.53 0.68 0.60 34\n\n accuracy 0.84 200\n macro avg 0.73 0.78 0.75 200\nweighted avg 0.86 0.84 0.85 200\n\n"
]
}
],
"source": [
"from sklearn.ensemble import AdaBoostClassifier\n",
"\n",
"model_AB = AdaBoostClassifier()\n",
"model_AB.fit(x_train_res , y_train_res)\n",
"y_pred = model_AB.predict(X_test)\n",
"target_names = ['non résilié', 'resilié'] \n",
"print(classification_report(y_test, y_pred,target_names=target_names))"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"tags": []
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"| iter | target | learni... | n_esti... |\n",
"-------------------------------------------------\n",
"| \u001b[0m 1 \u001b[0m | \u001b[0m 0.7089 \u001b[0m | \u001b[0m-0.008298\u001b[0m | \u001b[0m 24.41 \u001b[0m |\n",
"| \u001b[0m 2 \u001b[0m | \u001b[0m 0.6923 \u001b[0m | \u001b[0m-0.04999 \u001b[0m | \u001b[0m 16.05 \u001b[0m |\n",
"| \u001b[0m 3 \u001b[0m | \u001b[0m 0.7073 \u001b[0m | \u001b[0m-0.03532 \u001b[0m | \u001b[0m 11.85 \u001b[0m |\n",
"| \u001b[0m 4 \u001b[0m | \u001b[0m 0.6897 \u001b[0m | \u001b[0m-0.03137 \u001b[0m | \u001b[0m 16.91 \u001b[0m |\n",
"| \u001b[0m 5 \u001b[0m | \u001b[0m 0.6829 \u001b[0m | \u001b[0m-0.01032 \u001b[0m | \u001b[0m 20.78 \u001b[0m |\n",
"| \u001b[95m 6 \u001b[0m | \u001b[95m 0.72 \u001b[0m | \u001b[95m-0.008081\u001b[0m | \u001b[95m 23.7 \u001b[0m |\n",
"| \u001b[0m 7 \u001b[0m | \u001b[0m 0.6753 \u001b[0m | \u001b[0m-0.02955 \u001b[0m | \u001b[0m 27.56 \u001b[0m |\n",
"| \u001b[0m 8 \u001b[0m | \u001b[0m 0.7105 \u001b[0m | \u001b[0m-0.04726 \u001b[0m | \u001b[0m 23.41 \u001b[0m |\n",
"| \u001b[95m 9 \u001b[0m | \u001b[95m 0.7273 \u001b[0m | \u001b[95m-0.00827 \u001b[0m | \u001b[95m 21.17 \u001b[0m |\n",
"| \u001b[0m 10 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.03596 \u001b[0m | \u001b[0m 13.96 \u001b[0m |\n",
"| \u001b[0m 11 \u001b[0m | \u001b[0m 0.7179 \u001b[0m | \u001b[0m-0.007011\u001b[0m | \u001b[0m 21.28 \u001b[0m |\n",
"| \u001b[0m 12 \u001b[0m | \u001b[0m 0.7073 \u001b[0m | \u001b[0m 0.01039 \u001b[0m | \u001b[0m 13.78 \u001b[0m |\n",
"| \u001b[95m 13 \u001b[0m | \u001b[95m 0.7317 \u001b[0m | \u001b[95m-0.03507 \u001b[0m | \u001b[95m 22.58 \u001b[0m |\n",
"| \u001b[0m 14 \u001b[0m | \u001b[0m 0.7013 \u001b[0m | \u001b[0m 0.01136 \u001b[0m | \u001b[0m 22.72 \u001b[0m |\n",
"| \u001b[0m 15 \u001b[0m | \u001b[0m 0.6977 \u001b[0m | \u001b[0m-0.01044 \u001b[0m | \u001b[0m 13.23 \u001b[0m |\n",
"| \u001b[0m 16 \u001b[0m | \u001b[0m 0.7105 \u001b[0m | \u001b[0m-0.05 \u001b[0m | \u001b[0m 22.49 \u001b[0m |\n",
"| \u001b[0m 17 \u001b[0m | \u001b[0m 0.6829 \u001b[0m | \u001b[0m-0.01468 \u001b[0m | \u001b[0m 21.17 \u001b[0m |\n",
"| \u001b[0m 18 \u001b[0m | \u001b[0m 0.72 \u001b[0m | \u001b[0m 0.01099 \u001b[0m | \u001b[0m 17.86 \u001b[0m |\n",
"| \u001b[0m 19 \u001b[0m | \u001b[0m 0.6923 \u001b[0m | \u001b[0m 0.00532 \u001b[0m | \u001b[0m 24.04 \u001b[0m |\n",
"| \u001b[0m 20 \u001b[0m | \u001b[0m 0.6966 \u001b[0m | \u001b[0m-0.03516 \u001b[0m | \u001b[0m 18.13 \u001b[0m |\n",
"| \u001b[0m 21 \u001b[0m | \u001b[0m 0.7073 \u001b[0m | \u001b[0m 0.01034 \u001b[0m | \u001b[0m 13.78 \u001b[0m |\n",
"| \u001b[0m 22 \u001b[0m | \u001b[0m 0.7273 \u001b[0m | \u001b[0m-0.03031 \u001b[0m | \u001b[0m 22.57 \u001b[0m |\n",
"| \u001b[0m 23 \u001b[0m | \u001b[0m 0.6829 \u001b[0m | \u001b[0m-0.01265 \u001b[0m | \u001b[0m 21.18 \u001b[0m |\n",
"| \u001b[0m 24 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.03232 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 25 \u001b[0m | \u001b[0m 0.6835 \u001b[0m | \u001b[0m-0.03319 \u001b[0m | \u001b[0m 23.68 \u001b[0m |\n",
"| \u001b[0m 26 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.03413 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 27 \u001b[0m | \u001b[0m 0.7297 \u001b[0m | \u001b[0m-0.02612 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 28 \u001b[0m | \u001b[0m 0.7013 \u001b[0m | \u001b[0m 0.000315\u001b[0m | \u001b[0m 21.18 \u001b[0m |\n",
"| \u001b[0m 29 \u001b[0m | \u001b[0m 0.7073 \u001b[0m | \u001b[0m-0.02756 \u001b[0m | \u001b[0m 22.59 \u001b[0m |\n",
"| \u001b[0m 30 \u001b[0m | \u001b[0m 0.72 \u001b[0m | \u001b[0m 0.000387\u001b[0m | \u001b[0m 23.7 \u001b[0m |\n",
"| \u001b[0m 31 \u001b[0m | \u001b[0m 0.6923 \u001b[0m | \u001b[0m 0.003145\u001b[0m | \u001b[0m 23.71 \u001b[0m |\n",
"| \u001b[0m 32 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.02833 \u001b[0m | \u001b[0m 13.96 \u001b[0m |\n",
"| \u001b[0m 33 \u001b[0m | \u001b[0m 0.7105 \u001b[0m | \u001b[0m-0.04397 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 34 \u001b[0m | \u001b[0m 0.6842 \u001b[0m | \u001b[0m-0.003856\u001b[0m | \u001b[0m 19.76 \u001b[0m |\n",
"| \u001b[0m 35 \u001b[0m | \u001b[0m 0.7059 \u001b[0m | \u001b[0m-0.03591 \u001b[0m | \u001b[0m 22.57 \u001b[0m |\n",
"| \u001b[0m 36 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.01857 \u001b[0m | \u001b[0m 14.29 \u001b[0m |\n",
"| \u001b[0m 37 \u001b[0m | \u001b[0m 0.72 \u001b[0m | \u001b[0m-0.00727 \u001b[0m | \u001b[0m 23.7 \u001b[0m |\n",
"| \u001b[95m 38 \u001b[0m | \u001b[95m 0.7368 \u001b[0m | \u001b[95m-0.02643 \u001b[0m | \u001b[95m 14.29 \u001b[0m |\n",
"| \u001b[0m 39 \u001b[0m | \u001b[0m 0.6923 \u001b[0m | \u001b[0m-0.01537 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 40 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.02017 \u001b[0m | \u001b[0m 14.28 \u001b[0m |\n",
"| \u001b[0m 41 \u001b[0m | \u001b[0m 0.7143 \u001b[0m | \u001b[0m-0.02964 \u001b[0m | \u001b[0m 14.29 \u001b[0m |\n",
"| \u001b[0m 42 \u001b[0m | \u001b[0m 0.7143 \u001b[0m | \u001b[0m-0.03763 \u001b[0m | \u001b[0m 14.28 \u001b[0m |\n",
"| \u001b[0m 43 \u001b[0m | \u001b[0m 0.6977 \u001b[0m | \u001b[0m-0.02414 \u001b[0m | \u001b[0m 14.28 \u001b[0m |\n",
"| \u001b[0m 44 \u001b[0m | \u001b[0m 0.72 \u001b[0m | \u001b[0m-0.004892\u001b[0m | \u001b[0m 23.7 \u001b[0m |\n",
"| \u001b[0m 45 \u001b[0m | \u001b[0m 0.725 \u001b[0m | \u001b[0m-0.006783\u001b[0m | \u001b[0m 27.31 \u001b[0m |\n",
"| \u001b[0m 46 \u001b[0m | \u001b[0m 0.7143 \u001b[0m | \u001b[0m-0.0317 \u001b[0m | \u001b[0m 14.29 \u001b[0m |\n",
"| \u001b[0m 47 \u001b[0m | \u001b[0m 0.7059 \u001b[0m | \u001b[0m-0.03789 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 48 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.02697 \u001b[0m | \u001b[0m 13.96 \u001b[0m |\n",
"| \u001b[0m 49 \u001b[0m | \u001b[0m 0.7273 \u001b[0m | \u001b[0m-0.02878 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 50 \u001b[0m | \u001b[0m 0.725 \u001b[0m | \u001b[0m-0.004436\u001b[0m | \u001b[0m 27.31 \u001b[0m |\n",
"| \u001b[0m 51 \u001b[0m | \u001b[0m 0.7179 \u001b[0m | \u001b[0m-0.006697\u001b[0m | \u001b[0m 21.28 \u001b[0m |\n",
"| \u001b[0m 52 \u001b[0m | \u001b[0m 0.7273 \u001b[0m | \u001b[0m-0.02933 \u001b[0m | \u001b[0m 22.59 \u001b[0m |\n",
"| \u001b[0m 53 \u001b[0m | \u001b[0m 0.7143 \u001b[0m | \u001b[0m-0.01339 \u001b[0m | \u001b[0m 14.28 \u001b[0m |\n",
"| \u001b[0m 54 \u001b[0m | \u001b[0m 0.6835 \u001b[0m | \u001b[0m-0.03413 \u001b[0m | \u001b[0m 23.39 \u001b[0m |\n",
"| \u001b[0m 55 \u001b[0m | \u001b[0m 0.6923 \u001b[0m | \u001b[0m-0.02074 \u001b[0m | \u001b[0m 29.96 \u001b[0m |\n",
"| \u001b[0m 56 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.03278 \u001b[0m | \u001b[0m 13.96 \u001b[0m |\n",
"| \u001b[0m 57 \u001b[0m | \u001b[0m 0.6977 \u001b[0m | \u001b[0m-0.02405 \u001b[0m | \u001b[0m 14.29 \u001b[0m |\n",
"| \u001b[0m 58 \u001b[0m | \u001b[0m 0.7297 \u001b[0m | \u001b[0m-0.02543 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 59 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.03681 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 60 \u001b[0m | \u001b[0m 0.6494 \u001b[0m | \u001b[0m-0.009551\u001b[0m | \u001b[0m 27.31 \u001b[0m |\n",
"| \u001b[0m 61 \u001b[0m | \u001b[0m 0.6829 \u001b[0m | \u001b[0m 0.006223\u001b[0m | \u001b[0m 16.56 \u001b[0m |\n",
"| \u001b[0m 62 \u001b[0m | \u001b[0m 0.6933 \u001b[0m | \u001b[0m 0.02765 \u001b[0m | \u001b[0m 13.92 \u001b[0m |\n",
"| \u001b[0m 63 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.03269 \u001b[0m | \u001b[0m 22.59 \u001b[0m |\n",
"| \u001b[0m 64 \u001b[0m | \u001b[0m 0.6753 \u001b[0m | \u001b[0m-0.02335 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 65 \u001b[0m | \u001b[0m 0.7073 \u001b[0m | \u001b[0m-0.03869 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 66 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.03417 \u001b[0m | \u001b[0m 13.96 \u001b[0m |\n",
"| \u001b[0m 67 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.0304 \u001b[0m | \u001b[0m 13.96 \u001b[0m |\n",
"| \u001b[0m 68 \u001b[0m | \u001b[0m 0.7105 \u001b[0m | \u001b[0m 0.02672 \u001b[0m | \u001b[0m 23.46 \u001b[0m |\n",
"| \u001b[0m 69 \u001b[0m | \u001b[0m 0.7073 \u001b[0m | \u001b[0m-0.03992 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 70 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.03472 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 71 \u001b[0m | \u001b[0m 0.6753 \u001b[0m | \u001b[0m 0.01585 \u001b[0m | \u001b[0m 19.81 \u001b[0m |\n",
"| \u001b[0m 72 \u001b[0m | \u001b[0m 0.7297 \u001b[0m | \u001b[0m-0.02563 \u001b[0m | \u001b[0m 22.57 \u001b[0m |\n",
"| \u001b[0m 73 \u001b[0m | \u001b[0m 0.7297 \u001b[0m | \u001b[0m-0.02604 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 74 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.03239 \u001b[0m | \u001b[0m 22.57 \u001b[0m |\n",
"| \u001b[0m 75 \u001b[0m | \u001b[0m 0.7105 \u001b[0m | \u001b[0m-0.02205 \u001b[0m | \u001b[0m 22.57 \u001b[0m |\n",
"| \u001b[0m 76 \u001b[0m | \u001b[0m 0.7209 \u001b[0m | \u001b[0m-0.02868 \u001b[0m | \u001b[0m 19.62 \u001b[0m |\n",
"| \u001b[0m 77 \u001b[0m | \u001b[0m 0.7059 \u001b[0m | \u001b[0m-0.03574 \u001b[0m | \u001b[0m 22.57 \u001b[0m |\n",
"| \u001b[0m 78 \u001b[0m | \u001b[0m 0.725 \u001b[0m | \u001b[0m-0.002359\u001b[0m | \u001b[0m 27.31 \u001b[0m |\n",
"| \u001b[0m 79 \u001b[0m | \u001b[0m 0.7273 \u001b[0m | \u001b[0m-0.02864 \u001b[0m | \u001b[0m 22.57 \u001b[0m |\n",
"| \u001b[0m 80 \u001b[0m | \u001b[0m 0.6753 \u001b[0m | \u001b[0m 0.03513 \u001b[0m | \u001b[0m 20.67 \u001b[0m |\n",
"| \u001b[0m 81 \u001b[0m | \u001b[0m 0.7143 \u001b[0m | \u001b[0m-0.02885 \u001b[0m | \u001b[0m 14.29 \u001b[0m |\n",
"| \u001b[0m 82 \u001b[0m | \u001b[0m 0.72 \u001b[0m | \u001b[0m-0.000691\u001b[0m | \u001b[0m 23.7 \u001b[0m |\n",
"| \u001b[0m 83 \u001b[0m | \u001b[0m 0.7143 \u001b[0m | \u001b[0m-0.02838 \u001b[0m | \u001b[0m 14.28 \u001b[0m |\n",
"| \u001b[0m 84 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.0215 \u001b[0m | \u001b[0m 14.27 \u001b[0m |\n",
"| \u001b[0m 85 \u001b[0m | \u001b[0m 0.725 \u001b[0m | \u001b[0m-0.001135\u001b[0m | \u001b[0m 27.31 \u001b[0m |\n",
"| \u001b[0m 86 \u001b[0m | \u001b[0m 0.7273 \u001b[0m | \u001b[0m-0.02949 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 87 \u001b[0m | \u001b[0m 0.7143 \u001b[0m | \u001b[0m-0.01783 \u001b[0m | \u001b[0m 14.27 \u001b[0m |\n",
"| \u001b[0m 88 \u001b[0m | \u001b[0m 0.7143 \u001b[0m | \u001b[0m-0.01541 \u001b[0m | \u001b[0m 14.28 \u001b[0m |\n",
"| \u001b[0m 89 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.03371 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 90 \u001b[0m | \u001b[0m 0.6667 \u001b[0m | \u001b[0m 0.03289 \u001b[0m | \u001b[0m 29.7 \u001b[0m |\n",
"| \u001b[0m 91 \u001b[0m | \u001b[0m 0.7013 \u001b[0m | \u001b[0m-0.004525\u001b[0m | \u001b[0m 21.17 \u001b[0m |\n",
"| \u001b[0m 92 \u001b[0m | \u001b[0m 0.725 \u001b[0m | \u001b[0m-0.006537\u001b[0m | \u001b[0m 27.3 \u001b[0m |\n",
"| \u001b[0m 93 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.03363 \u001b[0m | \u001b[0m 22.58 \u001b[0m |\n",
"| \u001b[0m 94 \u001b[0m | \u001b[0m 0.72 \u001b[0m | \u001b[0m 0.01181 \u001b[0m | \u001b[0m 17.85 \u001b[0m |\n",
"| \u001b[0m 95 \u001b[0m | \u001b[0m 0.7273 \u001b[0m | \u001b[0m-0.02811 \u001b[0m | \u001b[0m 22.56 \u001b[0m |\n",
"| \u001b[0m 96 \u001b[0m | \u001b[0m 0.7179 \u001b[0m | \u001b[0m 0.03583 \u001b[0m | \u001b[0m 11.03 \u001b[0m |\n",
"| \u001b[0m 97 \u001b[0m | \u001b[0m 0.6988 \u001b[0m | \u001b[0m-0.04844 \u001b[0m | \u001b[0m 11.03 \u001b[0m |\n",
"| \u001b[0m 98 \u001b[0m | \u001b[0m 0.72 \u001b[0m | \u001b[0m-0.005421\u001b[0m | \u001b[0m 23.69 \u001b[0m |\n",
"| \u001b[0m 99 \u001b[0m | \u001b[0m 0.725 \u001b[0m | \u001b[0m-0.003373\u001b[0m | \u001b[0m 27.31 \u001b[0m |\n",
"| \u001b[0m 100 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.02875 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 101 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.0317 \u001b[0m | \u001b[0m 22.57 \u001b[0m |\n",
"| \u001b[0m 102 \u001b[0m | \u001b[0m 0.725 \u001b[0m | \u001b[0m-0.003994\u001b[0m | \u001b[0m 27.3 \u001b[0m |\n",
"| \u001b[0m 103 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.02606 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 104 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.03203 \u001b[0m | \u001b[0m 22.56 \u001b[0m |\n",
"| \u001b[0m 105 \u001b[0m | \u001b[0m 0.6667 \u001b[0m | \u001b[0m-0.01108 \u001b[0m | \u001b[0m 23.7 \u001b[0m |\n",
"| \u001b[0m 106 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.03034 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 107 \u001b[0m | \u001b[0m 0.7273 \u001b[0m | \u001b[0m-0.02879 \u001b[0m | \u001b[0m 22.56 \u001b[0m |\n",
"| \u001b[0m 108 \u001b[0m | \u001b[0m 0.725 \u001b[0m | \u001b[0m 0.001284\u001b[0m | \u001b[0m 27.3 \u001b[0m |\n",
"| \u001b[0m 109 \u001b[0m | \u001b[0m 0.6977 \u001b[0m | \u001b[0m-0.02414 \u001b[0m | \u001b[0m 13.96 \u001b[0m |\n",
"| \u001b[0m 110 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.02835 \u001b[0m | \u001b[0m 13.96 \u001b[0m |\n",
"| \u001b[0m 111 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.02569 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 112 \u001b[0m | \u001b[0m 0.7059 \u001b[0m | \u001b[0m-0.03629 \u001b[0m | \u001b[0m 22.56 \u001b[0m |\n",
"| \u001b[0m 113 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.03362 \u001b[0m | \u001b[0m 22.55 \u001b[0m |\n",
"| \u001b[0m 114 \u001b[0m | \u001b[0m 0.7297 \u001b[0m | \u001b[0m-0.0264 \u001b[0m | \u001b[0m 22.55 \u001b[0m |\n",
"| \u001b[0m 115 \u001b[0m | \u001b[0m 0.7105 \u001b[0m | \u001b[0m-0.02175 \u001b[0m | \u001b[0m 22.55 \u001b[0m |\n",
"| \u001b[0m 116 \u001b[0m | \u001b[0m 0.6753 \u001b[0m | \u001b[0m 0.02889 \u001b[0m | \u001b[0m 20.23 \u001b[0m |\n",
"| \u001b[0m 117 \u001b[0m | \u001b[0m 0.7105 \u001b[0m | \u001b[0m-0.03839 \u001b[0m | \u001b[0m 22.55 \u001b[0m |\n",
"| \u001b[0m 118 \u001b[0m | \u001b[0m 0.6494 \u001b[0m | \u001b[0m-0.00918 \u001b[0m | \u001b[0m 27.3 \u001b[0m |\n",
"| \u001b[0m 119 \u001b[0m | \u001b[0m 0.7027 \u001b[0m | \u001b[0m 0.02998 \u001b[0m | \u001b[0m 27.67 \u001b[0m |\n",
"| \u001b[0m 120 \u001b[0m | \u001b[0m 0.6818 \u001b[0m | \u001b[0m-0.01368 \u001b[0m | \u001b[0m 10.26 \u001b[0m |\n",
"| \u001b[0m 121 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.02186 \u001b[0m | \u001b[0m 13.98 \u001b[0m |\n",
"| \u001b[0m 122 \u001b[0m | \u001b[0m 0.7273 \u001b[0m | \u001b[0m-0.02985 \u001b[0m | \u001b[0m 22.55 \u001b[0m |\n",
"| \u001b[0m 123 \u001b[0m | \u001b[0m 0.72 \u001b[0m | \u001b[0m 0.01326 \u001b[0m | \u001b[0m 17.85 \u001b[0m |\n",
"| \u001b[0m 124 \u001b[0m | \u001b[0m 0.7073 \u001b[0m | \u001b[0m-0.03763 \u001b[0m | \u001b[0m 12.63 \u001b[0m |\n",
"| \u001b[0m 125 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.02649 \u001b[0m | \u001b[0m 13.98 \u001b[0m |\n",
"| \u001b[0m 126 \u001b[0m | \u001b[0m 0.725 \u001b[0m | \u001b[0m 0.000708\u001b[0m | \u001b[0m 27.31 \u001b[0m |\n",
"| \u001b[0m 127 \u001b[0m | \u001b[0m 0.7073 \u001b[0m | \u001b[0m-0.04029 \u001b[0m | \u001b[0m 13.49 \u001b[0m |\n",
"| \u001b[0m 128 \u001b[0m | \u001b[0m 0.6818 \u001b[0m | \u001b[0m-0.0342 \u001b[0m | \u001b[0m 19.62 \u001b[0m |\n",
"| \u001b[0m 129 \u001b[0m | \u001b[0m 0.6977 \u001b[0m | \u001b[0m-0.02281 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 130 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04295 \u001b[0m | \u001b[0m 20.19 \u001b[0m |\n",
"| \u001b[0m 131 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04592 \u001b[0m | \u001b[0m 20.19 \u001b[0m |\n",
"| \u001b[0m 132 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.0392 \u001b[0m | \u001b[0m 20.18 \u001b[0m |\n",
"| \u001b[0m 133 \u001b[0m | \u001b[0m 0.6842 \u001b[0m | \u001b[0m 0.002835\u001b[0m | \u001b[0m 11.47 \u001b[0m |\n",
"| \u001b[0m 134 \u001b[0m | \u001b[0m 0.7105 \u001b[0m | \u001b[0m-0.022 \u001b[0m | \u001b[0m 22.56 \u001b[0m |\n",
"| \u001b[0m 135 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.03169 \u001b[0m | \u001b[0m 22.55 \u001b[0m |\n",
"| \u001b[0m 136 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04505 \u001b[0m | \u001b[0m 20.19 \u001b[0m |\n",
"| \u001b[0m 137 \u001b[0m | \u001b[0m 0.6923 \u001b[0m | \u001b[0m-0.01419 \u001b[0m | \u001b[0m 22.83 \u001b[0m |\n",
"| \u001b[0m 138 \u001b[0m | \u001b[0m 0.6966 \u001b[0m | \u001b[0m-0.03732 \u001b[0m | \u001b[0m 18.91 \u001b[0m |\n",
"| \u001b[0m 139 \u001b[0m | \u001b[0m 0.6977 \u001b[0m | \u001b[0m-0.03507 \u001b[0m | \u001b[0m 20.18 \u001b[0m |\n",
"| \u001b[0m 140 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.02949 \u001b[0m | \u001b[0m 13.97 \u001b[0m |\n",
"| \u001b[0m 141 \u001b[0m | \u001b[0m 0.7143 \u001b[0m | \u001b[0m-0.01192 \u001b[0m | \u001b[0m 14.29 \u001b[0m |\n",
"| \u001b[95m 142 \u001b[0m | \u001b[95m 0.7436 \u001b[0m | \u001b[95m-0.04955 \u001b[0m | \u001b[95m 20.19 \u001b[0m |\n",
"| \u001b[0m 143 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m-0.02579 \u001b[0m | \u001b[0m 13.98 \u001b[0m |\n",
"| \u001b[0m 144 \u001b[0m | \u001b[0m 0.7059 \u001b[0m | \u001b[0m-0.03712 \u001b[0m | \u001b[0m 22.55 \u001b[0m |\n",
"| \u001b[0m 145 \u001b[0m | \u001b[0m 0.7436 \u001b[0m | \u001b[0m-0.04984 \u001b[0m | \u001b[0m 20.19 \u001b[0m |\n",
"| \u001b[0m 146 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04167 \u001b[0m | \u001b[0m 20.18 \u001b[0m |\n",
"| \u001b[0m 147 \u001b[0m | \u001b[0m 0.7105 \u001b[0m | \u001b[0m-0.03834 \u001b[0m | \u001b[0m 22.59 \u001b[0m |\n",
"| \u001b[0m 148 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04779 \u001b[0m | \u001b[0m 20.2 \u001b[0m |\n",
"| \u001b[0m 149 \u001b[0m | \u001b[0m 0.6494 \u001b[0m | \u001b[0m-0.0114 \u001b[0m | \u001b[0m 27.3 \u001b[0m |\n",
"| \u001b[0m 150 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04754 \u001b[0m | \u001b[0m 20.18 \u001b[0m |\n",
"| \u001b[0m 151 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.02047 \u001b[0m | \u001b[0m 11.35 \u001b[0m |\n",
"| \u001b[0m 152 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.02718 \u001b[0m | \u001b[0m 13.98 \u001b[0m |\n",
"| \u001b[0m 153 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04512 \u001b[0m | \u001b[0m 20.18 \u001b[0m |\n",
"| \u001b[0m 154 \u001b[0m | \u001b[0m 0.7294 \u001b[0m | \u001b[0m-0.0198 \u001b[0m | \u001b[0m 19.62 \u001b[0m |\n",
"| \u001b[0m 155 \u001b[0m | \u001b[0m 0.7294 \u001b[0m | \u001b[0m-0.02184 \u001b[0m | \u001b[0m 19.61 \u001b[0m |\n",
"| \u001b[95m 156 \u001b[0m | \u001b[95m 0.7654 \u001b[0m | \u001b[95m-0.02304 \u001b[0m | \u001b[95m 19.61 \u001b[0m |\n",
"| \u001b[0m 157 \u001b[0m | \u001b[0m 0.7294 \u001b[0m | \u001b[0m-0.01931 \u001b[0m | \u001b[0m 19.61 \u001b[0m |\n",
"| \u001b[0m 158 \u001b[0m | \u001b[0m 0.6966 \u001b[0m | \u001b[0m-0.03566 \u001b[0m | \u001b[0m 18.6 \u001b[0m |\n",
"| \u001b[0m 159 \u001b[0m | \u001b[0m 0.7436 \u001b[0m | \u001b[0m-0.04827 \u001b[0m | \u001b[0m 20.2 \u001b[0m |\n",
"| \u001b[0m 160 \u001b[0m | \u001b[0m 0.6923 \u001b[0m | \u001b[0m-0.02572 \u001b[0m | \u001b[0m 19.61 \u001b[0m |\n",
"| \u001b[0m 161 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04471 \u001b[0m | \u001b[0m 20.18 \u001b[0m |\n",
"| \u001b[0m 162 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04209 \u001b[0m | \u001b[0m 20.18 \u001b[0m |\n",
"| \u001b[0m 163 \u001b[0m | \u001b[0m 0.6316 \u001b[0m | \u001b[0m-0.01572 \u001b[0m | \u001b[0m 24.41 \u001b[0m |\n",
"| \u001b[0m 164 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04472 \u001b[0m | \u001b[0m 20.17 \u001b[0m |\n",
"| \u001b[0m 165 \u001b[0m | \u001b[0m 0.6966 \u001b[0m | \u001b[0m-0.02401 \u001b[0m | \u001b[0m 19.62 \u001b[0m |\n",
"| \u001b[0m 166 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.01921 \u001b[0m | \u001b[0m 13.98 \u001b[0m |\n",
"| \u001b[0m 167 \u001b[0m | \u001b[0m 0.7229 \u001b[0m | \u001b[0m-0.03052 \u001b[0m | \u001b[0m 13.98 \u001b[0m |\n",
"| \u001b[0m 168 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04705 \u001b[0m | \u001b[0m 20.2 \u001b[0m |\n",
"| \u001b[0m 169 \u001b[0m | \u001b[0m 0.7 \u001b[0m | \u001b[0m 0.04646 \u001b[0m | \u001b[0m 12.87 \u001b[0m |\n",
"| \u001b[0m 170 \u001b[0m | \u001b[0m 0.6923 \u001b[0m | \u001b[0m-0.02636 \u001b[0m | \u001b[0m 19.62 \u001b[0m |\n",
"| \u001b[0m 171 \u001b[0m | \u001b[0m 0.7436 \u001b[0m | \u001b[0m-0.04914 \u001b[0m | \u001b[0m 20.17 \u001b[0m |\n",
"| \u001b[0m 172 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04669 \u001b[0m | \u001b[0m 20.17 \u001b[0m |\n",
"| \u001b[0m 173 \u001b[0m | \u001b[0m 0.7342 \u001b[0m | \u001b[0m-0.04572 \u001b[0m | \u001b[0m 20.2 \u001b[0m |\n",
"| \u001b[0m 174 \u001b[0m | \u001b[0m 0.7273 \u001b[0m | \u001b[0m-0.04561 \u001b[0m | \u001b[0m 18.47 \u001b[0m |\n",
"| \u001b[95m 175 \u001b[0m | \u001b[95m 0.7848 \u001b[0m | \u001b[95m 0.04772 \u001b[0m | \u001b[95m 17.78 \u001b[0m |\n",
"| \u001b[0m 176 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.04406 \u001b[0m | \u001b[0m 17.78 \u001b[0m |\n",
"| \u001b[0m 177 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.04471 \u001b[0m | \u001b[0m 17.78 \u001b[0m |\n",
"| \u001b[0m 178 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m 0.03386 \u001b[0m | \u001b[0m 18.93 \u001b[0m |\n",
"| \u001b[0m 179 \u001b[0m | \u001b[0m 0.7632 \u001b[0m | \u001b[0m 0.04011 \u001b[0m | \u001b[0m 18.93 \u001b[0m |\n",
"| \u001b[0m 180 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.04769 \u001b[0m | \u001b[0m 17.78 \u001b[0m |\n",
"| \u001b[0m 181 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m 0.03206 \u001b[0m | \u001b[0m 18.94 \u001b[0m |\n",
"| \u001b[0m 182 \u001b[0m | \u001b[0m 0.7532 \u001b[0m | \u001b[0m 0.04485 \u001b[0m | \u001b[0m 18.92 \u001b[0m |\n",
"| \u001b[0m 183 \u001b[0m | \u001b[0m 0.7632 \u001b[0m | \u001b[0m 0.042 \u001b[0m | \u001b[0m 18.93 \u001b[0m |\n",
"| \u001b[0m 184 \u001b[0m | \u001b[0m 0.72 \u001b[0m | \u001b[0m 0.01095 \u001b[0m | \u001b[0m 17.86 \u001b[0m |\n",
"| \u001b[0m 185 \u001b[0m | \u001b[0m 0.7143 \u001b[0m | \u001b[0m-0.04837 \u001b[0m | \u001b[0m 15.47 \u001b[0m |\n",
"| \u001b[0m 186 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.0473 \u001b[0m | \u001b[0m 17.78 \u001b[0m |\n",
"| \u001b[0m 187 \u001b[0m | \u001b[0m 0.6829 \u001b[0m | \u001b[0m-0.0407 \u001b[0m | \u001b[0m 23.65 \u001b[0m |\n",
"| \u001b[0m 188 \u001b[0m | \u001b[0m 0.6923 \u001b[0m | \u001b[0m-0.01221 \u001b[0m | \u001b[0m 26.04 \u001b[0m |\n",
"| \u001b[0m 189 \u001b[0m | \u001b[0m 0.7532 \u001b[0m | \u001b[0m 0.04529 \u001b[0m | \u001b[0m 18.94 \u001b[0m |\n",
"| \u001b[0m 190 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.04348 \u001b[0m | \u001b[0m 17.79 \u001b[0m |\n",
"| \u001b[0m 191 \u001b[0m | \u001b[0m 0.7 \u001b[0m | \u001b[0m 0.04061 \u001b[0m | \u001b[0m 12.46 \u001b[0m |\n",
"| \u001b[0m 192 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.04312 \u001b[0m | \u001b[0m 17.79 \u001b[0m |\n",
"| \u001b[0m 193 \u001b[0m | \u001b[0m 0.7532 \u001b[0m | \u001b[0m 0.04991 \u001b[0m | \u001b[0m 18.93 \u001b[0m |\n",
"| \u001b[0m 194 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.04101 \u001b[0m | \u001b[0m 17.79 \u001b[0m |\n",
"| \u001b[0m 195 \u001b[0m | \u001b[0m 0.7073 \u001b[0m | \u001b[0m 0.0367 \u001b[0m | \u001b[0m 17.79 \u001b[0m |\n",
"| \u001b[0m 196 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.0435 \u001b[0m | \u001b[0m 17.78 \u001b[0m |\n",
"| \u001b[0m 197 \u001b[0m | \u001b[0m 0.6923 \u001b[0m | \u001b[0m 0.02793 \u001b[0m | \u001b[0m 24.72 \u001b[0m |\n",
"| \u001b[0m 198 \u001b[0m | \u001b[0m 0.7013 \u001b[0m | \u001b[0m 0.01218 \u001b[0m | \u001b[0m 22.56 \u001b[0m |\n",
"| \u001b[0m 199 \u001b[0m | \u001b[0m 0.7073 \u001b[0m | \u001b[0m 0.007552\u001b[0m | \u001b[0m 13.13 \u001b[0m |\n",
"| \u001b[0m 200 \u001b[0m | \u001b[0m 0.6761 \u001b[0m | \u001b[0m 0.03671 \u001b[0m | \u001b[0m 28.28 \u001b[0m |\n",
"| \u001b[0m 201 \u001b[0m | \u001b[0m 0.7105 \u001b[0m | \u001b[0m 0.03921 \u001b[0m | \u001b[0m 18.93 \u001b[0m |\n",
"| \u001b[0m 202 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.04867 \u001b[0m | \u001b[0m 17.78 \u001b[0m |\n",
"| \u001b[0m 203 \u001b[0m | \u001b[0m 0.7436 \u001b[0m | \u001b[0m 0.03745 \u001b[0m | \u001b[0m 18.93 \u001b[0m |\n",
"| \u001b[0m 204 \u001b[0m | \u001b[0m 0.7532 \u001b[0m | \u001b[0m 0.04703 \u001b[0m | \u001b[0m 18.93 \u001b[0m |\n",
"| \u001b[0m 205 \u001b[0m | \u001b[0m 0.7317 \u001b[0m | \u001b[0m 0.02761 \u001b[0m | \u001b[0m 18.92 \u001b[0m |\n",
"| \u001b[0m 206 \u001b[0m | \u001b[0m 0.7532 \u001b[0m | \u001b[0m 0.04951 \u001b[0m | \u001b[0m 18.93 \u001b[0m |\n",
"| \u001b[0m 207 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.04445 \u001b[0m | \u001b[0m 17.77 \u001b[0m |\n",
"| \u001b[0m 208 \u001b[0m | \u001b[0m 0.7848 \u001b[0m | \u001b[0m 0.04138 \u001b[0m | \u001b[0m 17.79 \u001b[0m |\n",
"| \u001b[0m 209 \u001b[0m | \u001b[0m 0.6667 \u001b[0m | \u001b[0m-0.02362 \u001b[0m | \u001b[0m 23.7 \u001b[0m |\n",
"| \u001b[0m 210 \u001b[0m | \u001b[0m 0.7632 \u001b[0m | \u001b[0m 0.04247 \u001b[0m | \u001b[0m 18.93 \u001b[0m |\n",
"=================================================\n",
"max\n",
"{'target': 0.7848101265822784, 'params': {'learning_rate': 0.047715766463314324, 'n_estimators': 17.779424609189796}}\n"
]
}
],
"source": [
"from bayes_opt import BayesianOptimization\n",
"from sklearn.metrics import f1_score\n",
"\n",
"# Basic Example : https://github.com/fmfn/BayesianOptimization/blob/master/examples/basic-tour.ipynb\n",
"\n",
"def black_box_function(n_estimators, learning_rate=0):\n",
" model = AdaBoostClassifier(n_estimators = int(n_estimators),\n",
" learning_rate = 10**learning_rate,\n",
" random_state = 42)\n",
" model.fit(x_train_res , y_train_res)\n",
" y_pred = model.predict(X_test)\n",
" return f1_score(y_test, y_pred)\n",
"\n",
"pbounds = {'n_estimators':(10, 30), 'learning_rate':(-0.05, 0.05)}\n",
"\n",
"optimizer = BayesianOptimization(\n",
" f=black_box_function,\n",
" pbounds=pbounds,\n",
" verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent\n",
" random_state=1,\n",
")\n",
"\n",
"optimizer.maximize(\n",
" init_points=10,\n",
" n_iter=200,\n",
")\n",
"\n",
"print('max')\n",
"\n",
"print(optimizer.max)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"F1 0.7848101265822784\n precision recall f1-score support\n\n non résilié 0.98 0.92 0.95 166\n resilié 0.69 0.91 0.78 34\n\n accuracy 0.92 200\n macro avg 0.83 0.91 0.87 200\nweighted avg 0.93 0.92 0.92 200\n\n"
]
}
],
"source": [
"model = AdaBoostClassifier(n_estimators = int(optimizer.max['params']['n_estimators']),\n",
" learning_rate = 10**optimizer.max['params']['learning_rate'],\n",
" random_state = 42)\n",
"model.fit(x_train_res , y_train_res)\n",
"y_pred = model.predict(X_test)\n",
"print(\"F1\", f1_score(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred,target_names=target_names))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.2733333333333333"
]
},
"metadata": {},
"execution_count": 20
}
],
"source": [
"yscore = model.predict(Xscore)\n",
"np.mean(yscore)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"yDF = pd.DataFrame(data = yscore.astype(int), columns=['target'])\n",
"yDF.index+=1000\n",
"yDF.to_csv(\"result_schindler_hugo.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}