update trailing char
This commit is contained in:
parent
dd581f7937
commit
f346005515
|
|
@ -1,20 +1,20 @@
|
||||||
*.code-workspace
|
*.code-workspace
|
||||||
quavenv/*
|
quavenv/*
|
||||||
*.pdf
|
*.pdf
|
||||||
|
|
||||||
__pycache__/*
|
__pycache__/*
|
||||||
baselines/__pycache__/*
|
baselines/__pycache__/*
|
||||||
baselines/densratio/__pycache__/*
|
baselines/densratio/__pycache__/*
|
||||||
quacc/__pycache__/*
|
quacc/__pycache__/*
|
||||||
quacc/evaluation/__pycache__/*
|
quacc/evaluation/__pycache__/*
|
||||||
quacc/method/__pycache__/*
|
quacc/method/__pycache__/*
|
||||||
tests/__pycache__/*
|
tests/__pycache__/*
|
||||||
|
|
||||||
*.coverage
|
*.coverage
|
||||||
.coverage
|
.coverage
|
||||||
|
|
||||||
scp_sync.py
|
scp_sync.py
|
||||||
|
|
||||||
out/*
|
out/*
|
||||||
output/*
|
output/*
|
||||||
!output/main/
|
!output/main/
|
||||||
|
|
@ -1,25 +1,25 @@
|
||||||
{
|
{
|
||||||
// Use IntelliSense to learn about possible attributes.
|
// Use IntelliSense to learn about possible attributes.
|
||||||
// Hover to view descriptions of existing attributes.
|
// Hover to view descriptions of existing attributes.
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
"version": "0.2.0",
|
"version": "0.2.0",
|
||||||
"configurations": [
|
"configurations": [
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "main",
|
"name": "main",
|
||||||
"type": "python",
|
"type": "python",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main.py",
|
"program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main.py",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": true
|
"justMyCode": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "main_test",
|
"name": "main_test",
|
||||||
"type": "python",
|
"type": "python",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main_test.py",
|
"program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main_test.py",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": false
|
"justMyCode": false
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -1,54 +1,54 @@
|
||||||
{
|
{
|
||||||
"todo": [
|
"todo": [
|
||||||
{
|
{
|
||||||
"assignedTo": {
|
"assignedTo": {
|
||||||
"name": "Lorenzo Volpi"
|
"name": "Lorenzo Volpi"
|
||||||
},
|
},
|
||||||
"creation_time": "2023-10-28T14:33:36.069Z",
|
"creation_time": "2023-10-28T14:33:36.069Z",
|
||||||
"id": "2",
|
"id": "2",
|
||||||
"references": [],
|
"references": [],
|
||||||
"title": "Creare plot avg con training prevalence sull'asse x e media rispetto a test prevalence"
|
"title": "Creare plot avg con training prevalence sull'asse x e media rispetto a test prevalence"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"assignedTo": {
|
"assignedTo": {
|
||||||
"name": "Lorenzo Volpi"
|
"name": "Lorenzo Volpi"
|
||||||
},
|
},
|
||||||
"creation_time": "2023-10-28T14:32:37.610Z",
|
"creation_time": "2023-10-28T14:32:37.610Z",
|
||||||
"id": "1",
|
"id": "1",
|
||||||
"references": [],
|
"references": [],
|
||||||
"title": "Testare su imdb"
|
"title": "Testare su imdb"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"in-progress": [
|
"in-progress": [
|
||||||
{
|
{
|
||||||
"assignedTo": {
|
"assignedTo": {
|
||||||
"name": "Lorenzo Volpi"
|
"name": "Lorenzo Volpi"
|
||||||
},
|
},
|
||||||
"creation_time": "2023-10-28T14:34:23.217Z",
|
"creation_time": "2023-10-28T14:34:23.217Z",
|
||||||
"id": "3",
|
"id": "3",
|
||||||
"references": [],
|
"references": [],
|
||||||
"title": "Relaizzare grid search per task specifico partedno da GridSearchQ"
|
"title": "Relaizzare grid search per task specifico partedno da GridSearchQ"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"assignedTo": {
|
"assignedTo": {
|
||||||
"name": "Lorenzo Volpi"
|
"name": "Lorenzo Volpi"
|
||||||
},
|
},
|
||||||
"creation_time": "2023-10-28T14:34:46.226Z",
|
"creation_time": "2023-10-28T14:34:46.226Z",
|
||||||
"id": "4",
|
"id": "4",
|
||||||
"references": [],
|
"references": [],
|
||||||
"title": "Aggingere estimator basati su PACC (quantificatore)"
|
"title": "Aggingere estimator basati su PACC (quantificatore)"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"testing": [],
|
"testing": [],
|
||||||
"done": [
|
"done": [
|
||||||
{
|
{
|
||||||
"assignedTo": {
|
"assignedTo": {
|
||||||
"name": "Lorenzo Volpi"
|
"name": "Lorenzo Volpi"
|
||||||
},
|
},
|
||||||
"creation_time": "2023-10-28T14:35:12.683Z",
|
"creation_time": "2023-10-28T14:35:12.683Z",
|
||||||
"id": "5",
|
"id": "5",
|
||||||
"references": [],
|
"references": [],
|
||||||
"title": "Rework rappresentazione dati di report"
|
"title": "Rework rappresentazione dati di report"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
284
TODO.html
284
TODO.html
|
|
@ -1,143 +1,143 @@
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<title></title>
|
<title></title>
|
||||||
<style>
|
<style>
|
||||||
/* From extension vscode.github */
|
/* From extension vscode.github */
|
||||||
/*---------------------------------------------------------------------------------------------
|
/*---------------------------------------------------------------------------------------------
|
||||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||||
*--------------------------------------------------------------------------------------------*/
|
*--------------------------------------------------------------------------------------------*/
|
||||||
|
|
||||||
.vscode-dark img[src$=\#gh-light-mode-only],
|
.vscode-dark img[src$=\#gh-light-mode-only],
|
||||||
.vscode-light img[src$=\#gh-dark-mode-only] {
|
.vscode-light img[src$=\#gh-dark-mode-only] {
|
||||||
display: none;
|
display: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
</style>
|
</style>
|
||||||
|
|
||||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
|
||||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
|
||||||
<style>
|
<style>
|
||||||
body {
|
body {
|
||||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', system-ui, 'Ubuntu', 'Droid Sans', sans-serif;
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', system-ui, 'Ubuntu', 'Droid Sans', sans-serif;
|
||||||
font-size: 14px;
|
font-size: 14px;
|
||||||
line-height: 1.6;
|
line-height: 1.6;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
<style>
|
<style>
|
||||||
.task-list-item {
|
.task-list-item {
|
||||||
list-style-type: none;
|
list-style-type: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.task-list-item-checkbox {
|
.task-list-item-checkbox {
|
||||||
margin-left: -20px;
|
margin-left: -20px;
|
||||||
vertical-align: middle;
|
vertical-align: middle;
|
||||||
pointer-events: none;
|
pointer-events: none;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|
||||||
</head>
|
</head>
|
||||||
<body class="vscode-body vscode-light">
|
<body class="vscode-body vscode-light">
|
||||||
<ul class="contains-task-list">
|
<ul class="contains-task-list">
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere media tabelle</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere media tabelle</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot; 3 tipi (appunti + email + garg)</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot; 3 tipi (appunti + email + garg)</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare kfcv baseline</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare kfcv baseline</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere metodo con CC oltre SLD</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere metodo con CC oltre SLD</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> variare parametro recalibration in SLD</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> variare parametro recalibration in SLD</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> fix grafico diagonal</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> fix grafico diagonal</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>seaborn example gallery</li>
|
<li>seaborn example gallery</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> varianti recalib: bcts, SLD (provare exact_train_prev=False)</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> varianti recalib: bcts, SLD (provare exact_train_prev=False)</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> vedere cosa usa garg di validation size</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> vedere cosa usa garg di validation size</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>qp.train_test_split per avere v_train e v_val</li>
|
<li>qp.train_test_split per avere v_train e v_val</li>
|
||||||
<li>GridSearchQ(
|
<li>GridSearchQ(
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
param_grid: {
|
param_grid: {
|
||||||
'classifier__C': np.logspace(-3,3,7),
|
'classifier__C': np.logspace(-3,3,7),
|
||||||
'classifier__class_weight': [None, 'balanced'],
|
'classifier__class_weight': [None, 'balanced'],
|
||||||
'recalib': [None, 'bcts']
|
'recalib': [None, 'bcts']
|
||||||
},
|
},
|
||||||
protocol: UPP(V_val, repeats=1000),
|
protocol: UPP(V_val, repeats=1000),
|
||||||
error = qp.error.mae,
|
error = qp.error.mae,
|
||||||
refit=True,
|
refit=True,
|
||||||
timeout=-1,
|
timeout=-1,
|
||||||
n_jobs=-2,
|
n_jobs=-2,
|
||||||
verbose=True).fit(V_tr)</li>
|
verbose=True).fit(V_tr)</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> salvare il best score ottenuto da ogni applicazione di GridSearchQ</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> salvare il best score ottenuto da ogni applicazione di GridSearchQ</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>nel caso di bin fare media dei due best score</li>
|
<li>nel caso di bin fare media dei due best score</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> import baselines</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> import baselines</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox"type="checkbox"> importare mandoline</p>
|
<p><input class="task-list-item-checkbox"type="checkbox"> importare mandoline</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc</li>
|
<li>mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox"type="checkbox"> sistemare vecchie iw baselines</p>
|
<p><input class="task-list-item-checkbox"type="checkbox"> sistemare vecchie iw baselines</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>non possono essere fixate perché dipendono da numpy</li>
|
<li>non possono essere fixate perché dipendono da numpy</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot avg con train prevalence sull'asse x e media su test prevalecne</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot avg con train prevalence sull'asse x e media su test prevalecne</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> realizzare grid search per task specifico partendo da GridSearchQ</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> realizzare grid search per task specifico partendo da GridSearchQ</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> provare PACC come quantificatore</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> provare PACC come quantificatore</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere etichette in shift plot</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere etichette in shift plot</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare exact_train quapy</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare exact_train quapy</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> testare anche su imbd</p>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> testare anche su imbd</p>
|
||||||
</li>
|
</li>
|
||||||
<li class="task-list-item enabled">
|
<li class="task-list-item enabled">
|
||||||
<p><input class="task-list-item-checkbox"type="checkbox"> rivedere nuove baselines</p>
|
<p><input class="task-list-item-checkbox"type="checkbox"> rivedere nuove baselines</p>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
86
TODO.md
86
TODO.md
|
|
@ -1,44 +1,44 @@
|
||||||
- [x] aggiungere media tabelle
|
- [x] aggiungere media tabelle
|
||||||
- [x] plot; 3 tipi (appunti + email + garg)
|
- [x] plot; 3 tipi (appunti + email + garg)
|
||||||
- [x] sistemare kfcv baseline
|
- [x] sistemare kfcv baseline
|
||||||
- [x] aggiungere metodo con CC oltre SLD
|
- [x] aggiungere metodo con CC oltre SLD
|
||||||
- [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
|
- [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
|
||||||
- [x] variare parametro recalibration in SLD
|
- [x] variare parametro recalibration in SLD
|
||||||
|
|
||||||
|
|
||||||
- [x] fix grafico diagonal
|
- [x] fix grafico diagonal
|
||||||
- seaborn example gallery
|
- seaborn example gallery
|
||||||
- [x] varianti recalib: bcts, SLD (provare exact_train_prev=False)
|
- [x] varianti recalib: bcts, SLD (provare exact_train_prev=False)
|
||||||
- [x] vedere cosa usa garg di validation size
|
- [x] vedere cosa usa garg di validation size
|
||||||
- [x] per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo
|
- [x] per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo
|
||||||
- qp.train_test_split per avere v_train e v_val
|
- qp.train_test_split per avere v_train e v_val
|
||||||
- GridSearchQ(
|
- GridSearchQ(
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
param_grid: {
|
param_grid: {
|
||||||
'classifier__C': np.logspace(-3,3,7),
|
'classifier__C': np.logspace(-3,3,7),
|
||||||
'classifier__class_weight': [None, 'balanced'],
|
'classifier__class_weight': [None, 'balanced'],
|
||||||
'recalib': [None, 'bcts']
|
'recalib': [None, 'bcts']
|
||||||
},
|
},
|
||||||
protocol: UPP(V_val, repeats=1000),
|
protocol: UPP(V_val, repeats=1000),
|
||||||
error = qp.error.mae,
|
error = qp.error.mae,
|
||||||
refit=True,
|
refit=True,
|
||||||
timeout=-1,
|
timeout=-1,
|
||||||
n_jobs=-2,
|
n_jobs=-2,
|
||||||
verbose=True).fit(V_tr)
|
verbose=True).fit(V_tr)
|
||||||
- [x] plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati
|
- [x] plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati
|
||||||
- [x] salvare il best score ottenuto da ogni applicazione di GridSearchQ
|
- [x] salvare il best score ottenuto da ogni applicazione di GridSearchQ
|
||||||
- nel caso di bin fare media dei due best score
|
- nel caso di bin fare media dei due best score
|
||||||
- [x] import baselines
|
- [x] import baselines
|
||||||
|
|
||||||
- [ ] importare mandoline
|
- [ ] importare mandoline
|
||||||
- mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc
|
- mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc
|
||||||
- [ ] sistemare vecchie iw baselines
|
- [ ] sistemare vecchie iw baselines
|
||||||
- non possono essere fixate perché dipendono da numpy
|
- non possono essere fixate perché dipendono da numpy
|
||||||
- [x] plot avg con train prevalence sull'asse x e media su test prevalecne
|
- [x] plot avg con train prevalence sull'asse x e media su test prevalecne
|
||||||
- [x] realizzare grid search per task specifico partendo da GridSearchQ
|
- [x] realizzare grid search per task specifico partendo da GridSearchQ
|
||||||
- [x] provare PACC come quantificatore
|
- [x] provare PACC come quantificatore
|
||||||
- [x] aggiungere etichette in shift plot
|
- [x] aggiungere etichette in shift plot
|
||||||
- [x] sistemare exact_train quapy
|
- [x] sistemare exact_train quapy
|
||||||
- [x] testare anche su imbd
|
- [x] testare anche su imbd
|
||||||
|
|
||||||
- [ ] rivedere nuove baselines
|
- [ ] rivedere nuove baselines
|
||||||
|
|
@ -1,44 +1,44 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.metrics import f1_score
|
from sklearn.metrics import f1_score
|
||||||
|
|
||||||
|
|
||||||
def get_entropy(probs):
|
def get_entropy(probs):
|
||||||
return np.sum(np.multiply(probs, np.log(probs + 1e-20)), axis=1)
|
return np.sum(np.multiply(probs, np.log(probs + 1e-20)), axis=1)
|
||||||
|
|
||||||
|
|
||||||
def get_max_conf(probs):
|
def get_max_conf(probs):
|
||||||
return np.max(probs, axis=-1)
|
return np.max(probs, axis=-1)
|
||||||
|
|
||||||
|
|
||||||
def find_ATC_threshold(scores, labels):
|
def find_ATC_threshold(scores, labels):
|
||||||
sorted_idx = np.argsort(scores)
|
sorted_idx = np.argsort(scores)
|
||||||
|
|
||||||
sorted_scores = scores[sorted_idx]
|
sorted_scores = scores[sorted_idx]
|
||||||
sorted_labels = labels[sorted_idx]
|
sorted_labels = labels[sorted_idx]
|
||||||
|
|
||||||
fp = np.sum(labels == 0)
|
fp = np.sum(labels == 0)
|
||||||
fn = 0.0
|
fn = 0.0
|
||||||
|
|
||||||
min_fp_fn = np.abs(fp - fn)
|
min_fp_fn = np.abs(fp - fn)
|
||||||
thres = 0.0
|
thres = 0.0
|
||||||
for i in range(len(labels)):
|
for i in range(len(labels)):
|
||||||
if sorted_labels[i] == 0:
|
if sorted_labels[i] == 0:
|
||||||
fp -= 1
|
fp -= 1
|
||||||
else:
|
else:
|
||||||
fn += 1
|
fn += 1
|
||||||
|
|
||||||
if np.abs(fp - fn) < min_fp_fn:
|
if np.abs(fp - fn) < min_fp_fn:
|
||||||
min_fp_fn = np.abs(fp - fn)
|
min_fp_fn = np.abs(fp - fn)
|
||||||
thres = sorted_scores[i]
|
thres = sorted_scores[i]
|
||||||
|
|
||||||
return min_fp_fn, thres
|
return min_fp_fn, thres
|
||||||
|
|
||||||
|
|
||||||
def get_ATC_acc(thres, scores):
|
def get_ATC_acc(thres, scores):
|
||||||
return np.mean(scores >= thres)
|
return np.mean(scores >= thres)
|
||||||
|
|
||||||
|
|
||||||
def get_ATC_f1(thres, scores, probs):
|
def get_ATC_f1(thres, scores, probs):
|
||||||
preds = np.argmax(probs, axis=-1)
|
preds = np.argmax(probs, axis=-1)
|
||||||
estim_y = np.abs(1 - (scores >= thres) ^ preds)
|
estim_y = np.abs(1 - (scores >= thres) ^ preds)
|
||||||
return f1_score(estim_y, preds)
|
return f1_score(estim_y, preds)
|
||||||
|
|
|
||||||
|
|
@ -1,277 +1,277 @@
|
||||||
"""
|
"""
|
||||||
Relative Unconstrained Least-Squares Fitting (RuLSIF): A Python Implementation
|
Relative Unconstrained Least-Squares Fitting (RuLSIF): A Python Implementation
|
||||||
References:
|
References:
|
||||||
'Change-point detection in time-series data by relative density-ratio estimation'
|
'Change-point detection in time-series data by relative density-ratio estimation'
|
||||||
Song Liu, Makoto Yamada, Nigel Collier and Masashi Sugiyama,
|
Song Liu, Makoto Yamada, Nigel Collier and Masashi Sugiyama,
|
||||||
Neural Networks 43 (2013) 72-83.
|
Neural Networks 43 (2013) 72-83.
|
||||||
|
|
||||||
'A Least-squares Approach to Direct Importance Estimation'
|
'A Least-squares Approach to Direct Importance Estimation'
|
||||||
Takafumi Kanamori, Shohei Hido, and Masashi Sugiyama,
|
Takafumi Kanamori, Shohei Hido, and Masashi Sugiyama,
|
||||||
Journal of Machine Learning Research 10 (2009) 1391-1445.
|
Journal of Machine Learning Research 10 (2009) 1391-1445.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from warnings import warn
|
from warnings import warn
|
||||||
|
|
||||||
from numpy import (
|
from numpy import (
|
||||||
array,
|
array,
|
||||||
asarray,
|
asarray,
|
||||||
asmatrix,
|
asmatrix,
|
||||||
diag,
|
diag,
|
||||||
diagflat,
|
diagflat,
|
||||||
empty,
|
empty,
|
||||||
exp,
|
exp,
|
||||||
inf,
|
inf,
|
||||||
log,
|
log,
|
||||||
matrix,
|
matrix,
|
||||||
multiply,
|
multiply,
|
||||||
ones,
|
ones,
|
||||||
power,
|
power,
|
||||||
sum,
|
sum,
|
||||||
)
|
)
|
||||||
from numpy.linalg import solve
|
from numpy.linalg import solve
|
||||||
from numpy.random import randint
|
from numpy.random import randint
|
||||||
|
|
||||||
from .density_ratio import DensityRatio, KernelInfo
|
from .density_ratio import DensityRatio, KernelInfo
|
||||||
from .helpers import guvectorize_compute, np_float, to_ndarray
|
from .helpers import guvectorize_compute, np_float, to_ndarray
|
||||||
|
|
||||||
|
|
||||||
def RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num=100, verbose=True):
|
def RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num=100, verbose=True):
|
||||||
"""
|
"""
|
||||||
Estimation of the alpha-Relative Density Ratio p(x)/p_alpha(x) by RuLSIF
|
Estimation of the alpha-Relative Density Ratio p(x)/p_alpha(x) by RuLSIF
|
||||||
(Relative Unconstrained Least-Square Importance Fitting)
|
(Relative Unconstrained Least-Square Importance Fitting)
|
||||||
|
|
||||||
p_alpha(x) = alpha * p(x) + (1 - alpha) * q(x)
|
p_alpha(x) = alpha * p(x) + (1 - alpha) * q(x)
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
x (numpy.matrix): Sample from p(x).
|
x (numpy.matrix): Sample from p(x).
|
||||||
y (numpy.matrix): Sample from q(x).
|
y (numpy.matrix): Sample from q(x).
|
||||||
alpha (float): Mixture parameter.
|
alpha (float): Mixture parameter.
|
||||||
sigma_range (list<float>): Search range of Gaussian kernel bandwidth.
|
sigma_range (list<float>): Search range of Gaussian kernel bandwidth.
|
||||||
lambda_range (list<float>): Search range of regularization parameter.
|
lambda_range (list<float>): Search range of regularization parameter.
|
||||||
kernel_num (int): Number of kernels. (Default 100)
|
kernel_num (int): Number of kernels. (Default 100)
|
||||||
verbose (bool): Indicator to print messages (Default True)
|
verbose (bool): Indicator to print messages (Default True)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
densratio.DensityRatio object which has `compute_density_ratio()`.
|
densratio.DensityRatio object which has `compute_density_ratio()`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Number of samples.
|
# Number of samples.
|
||||||
nx = x.shape[0]
|
nx = x.shape[0]
|
||||||
ny = y.shape[0]
|
ny = y.shape[0]
|
||||||
|
|
||||||
# Number of kernel functions.
|
# Number of kernel functions.
|
||||||
kernel_num = min(kernel_num, nx)
|
kernel_num = min(kernel_num, nx)
|
||||||
|
|
||||||
# Randomly take a subset of x, to identify centers for the kernels.
|
# Randomly take a subset of x, to identify centers for the kernels.
|
||||||
centers = x[randint(nx, size=kernel_num)]
|
centers = x[randint(nx, size=kernel_num)]
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("RuLSIF starting...")
|
print("RuLSIF starting...")
|
||||||
|
|
||||||
if len(sigma_range) == 1 and len(lambda_range) == 1:
|
if len(sigma_range) == 1 and len(lambda_range) == 1:
|
||||||
sigma = sigma_range[0]
|
sigma = sigma_range[0]
|
||||||
lambda_ = lambda_range[0]
|
lambda_ = lambda_range[0]
|
||||||
else:
|
else:
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Searching for the optimal sigma and lambda...")
|
print("Searching for the optimal sigma and lambda...")
|
||||||
|
|
||||||
# Grid-search cross-validation for optimal kernel and regularization parameters.
|
# Grid-search cross-validation for optimal kernel and regularization parameters.
|
||||||
opt_params = search_sigma_and_lambda(
|
opt_params = search_sigma_and_lambda(
|
||||||
x, y, alpha, centers, sigma_range, lambda_range, verbose
|
x, y, alpha, centers, sigma_range, lambda_range, verbose
|
||||||
)
|
)
|
||||||
sigma = opt_params["sigma"]
|
sigma = opt_params["sigma"]
|
||||||
lambda_ = opt_params["lambda"]
|
lambda_ = opt_params["lambda"]
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print(
|
print(
|
||||||
"Found optimal sigma = {:.3f}, lambda = {:.3f}.".format(sigma, lambda_)
|
"Found optimal sigma = {:.3f}, lambda = {:.3f}.".format(sigma, lambda_)
|
||||||
)
|
)
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Optimizing theta...")
|
print("Optimizing theta...")
|
||||||
|
|
||||||
phi_x = compute_kernel_Gaussian(x, centers, sigma)
|
phi_x = compute_kernel_Gaussian(x, centers, sigma)
|
||||||
phi_y = compute_kernel_Gaussian(y, centers, sigma)
|
phi_y = compute_kernel_Gaussian(y, centers, sigma)
|
||||||
H = alpha * (phi_x.T.dot(phi_x) / nx) + (1 - alpha) * (phi_y.T.dot(phi_y) / ny)
|
H = alpha * (phi_x.T.dot(phi_x) / nx) + (1 - alpha) * (phi_y.T.dot(phi_y) / ny)
|
||||||
h = phi_x.mean(axis=0).T
|
h = phi_x.mean(axis=0).T
|
||||||
theta = asarray(solve(H + diag(array(lambda_).repeat(kernel_num)), h)).ravel()
|
theta = asarray(solve(H + diag(array(lambda_).repeat(kernel_num)), h)).ravel()
|
||||||
|
|
||||||
# No negative coefficients.
|
# No negative coefficients.
|
||||||
theta[theta < 0] = 0
|
theta[theta < 0] = 0
|
||||||
|
|
||||||
# Compute the alpha-relative density ratio, at the given coordinates.
|
# Compute the alpha-relative density ratio, at the given coordinates.
|
||||||
def alpha_density_ratio(coordinates):
|
def alpha_density_ratio(coordinates):
|
||||||
# Evaluate the kernel at these coordinates, and take the dot-product with the weights.
|
# Evaluate the kernel at these coordinates, and take the dot-product with the weights.
|
||||||
coordinates = to_ndarray(coordinates)
|
coordinates = to_ndarray(coordinates)
|
||||||
phi_x = compute_kernel_Gaussian(coordinates, centers, sigma)
|
phi_x = compute_kernel_Gaussian(coordinates, centers, sigma)
|
||||||
alpha_density_ratio = phi_x @ theta
|
alpha_density_ratio = phi_x @ theta
|
||||||
|
|
||||||
return alpha_density_ratio
|
return alpha_density_ratio
|
||||||
|
|
||||||
# Compute the approximate alpha-relative PE-divergence, given samples x and y from the respective distributions.
|
# Compute the approximate alpha-relative PE-divergence, given samples x and y from the respective distributions.
|
||||||
def alpha_PE_divergence(x, y):
|
def alpha_PE_divergence(x, y):
|
||||||
# This is Y, in Reference 1.
|
# This is Y, in Reference 1.
|
||||||
x = to_ndarray(x)
|
x = to_ndarray(x)
|
||||||
|
|
||||||
# Obtain alpha-relative density ratio at these points.
|
# Obtain alpha-relative density ratio at these points.
|
||||||
g_x = alpha_density_ratio(x)
|
g_x = alpha_density_ratio(x)
|
||||||
|
|
||||||
# This is Y', in Reference 1.
|
# This is Y', in Reference 1.
|
||||||
y = to_ndarray(y)
|
y = to_ndarray(y)
|
||||||
|
|
||||||
# Obtain alpha-relative density ratio at these points.
|
# Obtain alpha-relative density ratio at these points.
|
||||||
g_y = alpha_density_ratio(y)
|
g_y = alpha_density_ratio(y)
|
||||||
|
|
||||||
# Compute the alpha-relative PE-divergence as given in Reference 1.
|
# Compute the alpha-relative PE-divergence as given in Reference 1.
|
||||||
n = x.shape[0]
|
n = x.shape[0]
|
||||||
divergence = (
|
divergence = (
|
||||||
-alpha * (g_x @ g_x) / 2 - (1 - alpha) * (g_y @ g_y) / 2 + g_x.sum(axis=0)
|
-alpha * (g_x @ g_x) / 2 - (1 - alpha) * (g_y @ g_y) / 2 + g_x.sum(axis=0)
|
||||||
) / n - 1.0 / 2
|
) / n - 1.0 / 2
|
||||||
return divergence
|
return divergence
|
||||||
|
|
||||||
# Compute the approximate alpha-relative KL-divergence, given samples x and y from the respective distributions.
|
# Compute the approximate alpha-relative KL-divergence, given samples x and y from the respective distributions.
|
||||||
def alpha_KL_divergence(x, y):
|
def alpha_KL_divergence(x, y):
|
||||||
# This is Y, in Reference 1.
|
# This is Y, in Reference 1.
|
||||||
x = to_ndarray(x)
|
x = to_ndarray(x)
|
||||||
|
|
||||||
# Obtain alpha-relative density ratio at these points.
|
# Obtain alpha-relative density ratio at these points.
|
||||||
g_x = alpha_density_ratio(x)
|
g_x = alpha_density_ratio(x)
|
||||||
|
|
||||||
# Compute the alpha-relative KL-divergence.
|
# Compute the alpha-relative KL-divergence.
|
||||||
n = x.shape[0]
|
n = x.shape[0]
|
||||||
divergence = log(g_x).sum(axis=0) / n
|
divergence = log(g_x).sum(axis=0) / n
|
||||||
return divergence
|
return divergence
|
||||||
|
|
||||||
alpha_PE = alpha_PE_divergence(x, y)
|
alpha_PE = alpha_PE_divergence(x, y)
|
||||||
alpha_KL = alpha_KL_divergence(x, y)
|
alpha_KL = alpha_KL_divergence(x, y)
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Approximate alpha-relative PE-divergence = {:03.2f}".format(alpha_PE))
|
print("Approximate alpha-relative PE-divergence = {:03.2f}".format(alpha_PE))
|
||||||
print("Approximate alpha-relative KL-divergence = {:03.2f}".format(alpha_KL))
|
print("Approximate alpha-relative KL-divergence = {:03.2f}".format(alpha_KL))
|
||||||
|
|
||||||
kernel_info = KernelInfo(
|
kernel_info = KernelInfo(
|
||||||
kernel_type="Gaussian", kernel_num=kernel_num, sigma=sigma, centers=centers
|
kernel_type="Gaussian", kernel_num=kernel_num, sigma=sigma, centers=centers
|
||||||
)
|
)
|
||||||
result = DensityRatio(
|
result = DensityRatio(
|
||||||
method="RuLSIF",
|
method="RuLSIF",
|
||||||
alpha=alpha,
|
alpha=alpha,
|
||||||
theta=theta,
|
theta=theta,
|
||||||
lambda_=lambda_,
|
lambda_=lambda_,
|
||||||
alpha_PE=alpha_PE,
|
alpha_PE=alpha_PE,
|
||||||
alpha_KL=alpha_KL,
|
alpha_KL=alpha_KL,
|
||||||
kernel_info=kernel_info,
|
kernel_info=kernel_info,
|
||||||
compute_density_ratio=alpha_density_ratio,
|
compute_density_ratio=alpha_density_ratio,
|
||||||
)
|
)
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("RuLSIF completed.")
|
print("RuLSIF completed.")
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
# Grid-search cross-validation for the optimal parameters sigma and lambda by leave-one-out cross-validation. See Reference 2.
|
# Grid-search cross-validation for the optimal parameters sigma and lambda by leave-one-out cross-validation. See Reference 2.
|
||||||
def search_sigma_and_lambda(x, y, alpha, centers, sigma_range, lambda_range, verbose):
|
def search_sigma_and_lambda(x, y, alpha, centers, sigma_range, lambda_range, verbose):
|
||||||
nx = x.shape[0]
|
nx = x.shape[0]
|
||||||
ny = y.shape[0]
|
ny = y.shape[0]
|
||||||
n_min = min(nx, ny)
|
n_min = min(nx, ny)
|
||||||
kernel_num = centers.shape[0]
|
kernel_num = centers.shape[0]
|
||||||
|
|
||||||
score_new = inf
|
score_new = inf
|
||||||
sigma_new = 0
|
sigma_new = 0
|
||||||
lambda_new = 0
|
lambda_new = 0
|
||||||
|
|
||||||
for sigma in sigma_range:
|
for sigma in sigma_range:
|
||||||
phi_x = compute_kernel_Gaussian(x, centers, sigma) # (nx, kernel_num)
|
phi_x = compute_kernel_Gaussian(x, centers, sigma) # (nx, kernel_num)
|
||||||
phi_y = compute_kernel_Gaussian(y, centers, sigma) # (ny, kernel_num)
|
phi_y = compute_kernel_Gaussian(y, centers, sigma) # (ny, kernel_num)
|
||||||
H = alpha * (phi_x.T @ phi_x / nx) + (1 - alpha) * (
|
H = alpha * (phi_x.T @ phi_x / nx) + (1 - alpha) * (
|
||||||
phi_y.T @ phi_y / ny
|
phi_y.T @ phi_y / ny
|
||||||
) # (kernel_num, kernel_num)
|
) # (kernel_num, kernel_num)
|
||||||
h = phi_x.mean(axis=0).reshape(-1, 1) # (kernel_num, 1)
|
h = phi_x.mean(axis=0).reshape(-1, 1) # (kernel_num, 1)
|
||||||
phi_x = phi_x[:n_min].T # (kernel_num, n_min)
|
phi_x = phi_x[:n_min].T # (kernel_num, n_min)
|
||||||
phi_y = phi_y[:n_min].T # (kernel_num, n_min)
|
phi_y = phi_y[:n_min].T # (kernel_num, n_min)
|
||||||
|
|
||||||
for lambda_ in lambda_range:
|
for lambda_ in lambda_range:
|
||||||
B = H + diag(
|
B = H + diag(
|
||||||
array(lambda_ * (ny - 1) / ny).repeat(kernel_num)
|
array(lambda_ * (ny - 1) / ny).repeat(kernel_num)
|
||||||
) # (kernel_num, kernel_num)
|
) # (kernel_num, kernel_num)
|
||||||
B_inv_X = solve(B, phi_y) # (kernel_num, n_min)
|
B_inv_X = solve(B, phi_y) # (kernel_num, n_min)
|
||||||
X_B_inv_X = multiply(phi_y, B_inv_X) # (kernel_num, n_min)
|
X_B_inv_X = multiply(phi_y, B_inv_X) # (kernel_num, n_min)
|
||||||
denom = ny * ones(n_min) - ones(kernel_num) @ X_B_inv_X # (n_min, )
|
denom = ny * ones(n_min) - ones(kernel_num) @ X_B_inv_X # (n_min, )
|
||||||
B0 = solve(B, h @ ones((1, n_min))) + B_inv_X @ diagflat(
|
B0 = solve(B, h @ ones((1, n_min))) + B_inv_X @ diagflat(
|
||||||
h.T @ B_inv_X / denom
|
h.T @ B_inv_X / denom
|
||||||
) # (kernel_num, n_min)
|
) # (kernel_num, n_min)
|
||||||
B1 = solve(B, phi_x) + B_inv_X @ diagflat(
|
B1 = solve(B, phi_x) + B_inv_X @ diagflat(
|
||||||
ones(kernel_num) @ multiply(phi_x, B_inv_X)
|
ones(kernel_num) @ multiply(phi_x, B_inv_X)
|
||||||
) # (kernel_num, n_min)
|
) # (kernel_num, n_min)
|
||||||
B2 = (ny - 1) * (nx * B0 - B1) / (ny * (nx - 1)) # (kernel_num, n_min)
|
B2 = (ny - 1) * (nx * B0 - B1) / (ny * (nx - 1)) # (kernel_num, n_min)
|
||||||
B2[B2 < 0] = 0
|
B2[B2 < 0] = 0
|
||||||
r_y = multiply(phi_y, B2).sum(axis=0).T # (n_min, )
|
r_y = multiply(phi_y, B2).sum(axis=0).T # (n_min, )
|
||||||
r_x = multiply(phi_x, B2).sum(axis=0).T # (n_min, )
|
r_x = multiply(phi_x, B2).sum(axis=0).T # (n_min, )
|
||||||
|
|
||||||
# Squared loss of RuLSIF, without regularization term.
|
# Squared loss of RuLSIF, without regularization term.
|
||||||
# Directly related to the negative of the PE-divergence.
|
# Directly related to the negative of the PE-divergence.
|
||||||
score = (r_y @ r_y / 2 - r_x.sum(axis=0)) / n_min
|
score = (r_y @ r_y / 2 - r_x.sum(axis=0)) / n_min
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print(
|
print(
|
||||||
"sigma = %.5f, lambda = %.5f, score = %.5f"
|
"sigma = %.5f, lambda = %.5f, score = %.5f"
|
||||||
% (sigma, lambda_, score)
|
% (sigma, lambda_, score)
|
||||||
)
|
)
|
||||||
|
|
||||||
if score < score_new:
|
if score < score_new:
|
||||||
score_new = score
|
score_new = score
|
||||||
sigma_new = sigma
|
sigma_new = sigma
|
||||||
lambda_new = lambda_
|
lambda_new = lambda_
|
||||||
|
|
||||||
return {"sigma": sigma_new, "lambda": lambda_new}
|
return {"sigma": sigma_new, "lambda": lambda_new}
|
||||||
|
|
||||||
|
|
||||||
def _compute_kernel_Gaussian(x_list, y_row, neg_gamma, res) -> None:
|
def _compute_kernel_Gaussian(x_list, y_row, neg_gamma, res) -> None:
|
||||||
sq_norm = sum(power(x_list - y_row, 2), 1)
|
sq_norm = sum(power(x_list - y_row, 2), 1)
|
||||||
multiply(neg_gamma, sq_norm, res)
|
multiply(neg_gamma, sq_norm, res)
|
||||||
exp(res, res)
|
exp(res, res)
|
||||||
|
|
||||||
|
|
||||||
def _target_numpy_wrapper(x_list, y_list, neg_gamma):
|
def _target_numpy_wrapper(x_list, y_list, neg_gamma):
|
||||||
res = empty((y_list.shape[0], x_list.shape[0]), np_float)
|
res = empty((y_list.shape[0], x_list.shape[0]), np_float)
|
||||||
if isinstance(x_list, matrix) or isinstance(y_list, matrix):
|
if isinstance(x_list, matrix) or isinstance(y_list, matrix):
|
||||||
res = asmatrix(res)
|
res = asmatrix(res)
|
||||||
|
|
||||||
for j, y_row in enumerate(y_list):
|
for j, y_row in enumerate(y_list):
|
||||||
# `.T` aligns shapes for matrices, does nothing for 1D ndarray.
|
# `.T` aligns shapes for matrices, does nothing for 1D ndarray.
|
||||||
_compute_kernel_Gaussian(x_list, y_row, neg_gamma, res[j].T)
|
_compute_kernel_Gaussian(x_list, y_row, neg_gamma, res[j].T)
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
_compute_functions = {"numpy": _target_numpy_wrapper}
|
_compute_functions = {"numpy": _target_numpy_wrapper}
|
||||||
if guvectorize_compute:
|
if guvectorize_compute:
|
||||||
_compute_functions.update(
|
_compute_functions.update(
|
||||||
{
|
{
|
||||||
key: guvectorize_compute(key)(_compute_kernel_Gaussian)
|
key: guvectorize_compute(key)(_compute_kernel_Gaussian)
|
||||||
for key in ("cpu", "parallel")
|
for key in ("cpu", "parallel")
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
_compute_function = _compute_functions[
|
_compute_function = _compute_functions[
|
||||||
"cpu" if "cpu" in _compute_functions else "numpy"
|
"cpu" if "cpu" in _compute_functions else "numpy"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
# Returns a 2D numpy matrix of kernel evaluated at the gridpoints with coordinates from x_list and y_list.
|
# Returns a 2D numpy matrix of kernel evaluated at the gridpoints with coordinates from x_list and y_list.
|
||||||
def compute_kernel_Gaussian(x_list, y_list, sigma):
|
def compute_kernel_Gaussian(x_list, y_list, sigma):
|
||||||
return _compute_function(x_list, y_list, -0.5 * sigma**-2).T
|
return _compute_function(x_list, y_list, -0.5 * sigma**-2).T
|
||||||
|
|
||||||
|
|
||||||
def set_compute_kernel_target(target: str) -> None:
|
def set_compute_kernel_target(target: str) -> None:
|
||||||
global _compute_function
|
global _compute_function
|
||||||
if target not in ("numpy", "cpu", "parallel"):
|
if target not in ("numpy", "cpu", "parallel"):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"'target' must be one of the following: 'numpy', 'cpu', or 'parallel'."
|
"'target' must be one of the following: 'numpy', 'cpu', or 'parallel'."
|
||||||
)
|
)
|
||||||
|
|
||||||
if target not in _compute_functions:
|
if target not in _compute_functions:
|
||||||
warn("'numba' not available; defaulting to 'numpy'.", ImportWarning)
|
warn("'numba' not available; defaulting to 'numpy'.", ImportWarning)
|
||||||
target = "numpy"
|
target = "numpy"
|
||||||
|
|
||||||
_compute_function = _compute_functions[target]
|
_compute_function = _compute_functions[target]
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
from warnings import filterwarnings
|
from warnings import filterwarnings
|
||||||
|
|
||||||
from .core import densratio
|
from .core import densratio
|
||||||
from .RuLSIF import set_compute_kernel_target
|
from .RuLSIF import set_compute_kernel_target
|
||||||
|
|
||||||
filterwarnings("default", message="'numba'", category=ImportWarning, module="densratio")
|
filterwarnings("default", message="'numba'", category=ImportWarning, module="densratio")
|
||||||
__all__ = ["densratio", "set_compute_kernel_target"]
|
__all__ = ["densratio", "set_compute_kernel_target"]
|
||||||
|
|
|
||||||
|
|
@ -1,70 +1,70 @@
|
||||||
"""
|
"""
|
||||||
densratio.core
|
densratio.core
|
||||||
~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~
|
||||||
|
|
||||||
Estimate Density Ratio p(x)/q(y)
|
Estimate Density Ratio p(x)/q(y)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from numpy import linspace
|
from numpy import linspace
|
||||||
|
|
||||||
from .helpers import to_ndarray
|
from .helpers import to_ndarray
|
||||||
from .RuLSIF import RuLSIF
|
from .RuLSIF import RuLSIF
|
||||||
|
|
||||||
|
|
||||||
def densratio(
|
def densratio(
|
||||||
x, y, alpha=0, sigma_range="auto", lambda_range="auto", kernel_num=100, verbose=True
|
x, y, alpha=0, sigma_range="auto", lambda_range="auto", kernel_num=100, verbose=True
|
||||||
):
|
):
|
||||||
"""Estimate alpha-mixture Density Ratio p(x)/(alpha*p(x) + (1 - alpha)*q(x))
|
"""Estimate alpha-mixture Density Ratio p(x)/(alpha*p(x) + (1 - alpha)*q(x))
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
x: sample from p(x).
|
x: sample from p(x).
|
||||||
y: sample from q(x).
|
y: sample from q(x).
|
||||||
alpha: Default 0 - corresponds to ordinary density ratio.
|
alpha: Default 0 - corresponds to ordinary density ratio.
|
||||||
sigma_range: search range of Gaussian kernel bandwidth.
|
sigma_range: search range of Gaussian kernel bandwidth.
|
||||||
Default "auto" means 10^-3, 10^-2, ..., 10^9.
|
Default "auto" means 10^-3, 10^-2, ..., 10^9.
|
||||||
lambda_range: search range of regularization parameter for uLSIF.
|
lambda_range: search range of regularization parameter for uLSIF.
|
||||||
Default "auto" means 10^-3, 10^-2, ..., 10^9.
|
Default "auto" means 10^-3, 10^-2, ..., 10^9.
|
||||||
kernel_num: number of kernels. Default 100.
|
kernel_num: number of kernels. Default 100.
|
||||||
verbose: indicator to print messages. Default True.
|
verbose: indicator to print messages. Default True.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
densratio.DensityRatio object which has `compute_density_ratio()`.
|
densratio.DensityRatio object which has `compute_density_ratio()`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: if dimension of x != dimension of y
|
ValueError: if dimension of x != dimension of y
|
||||||
|
|
||||||
Usage::
|
Usage::
|
||||||
>>> from scipy.stats import norm
|
>>> from scipy.stats import norm
|
||||||
>>> from densratio import densratio
|
>>> from densratio import densratio
|
||||||
|
|
||||||
>>> x = norm.rvs(size=200, loc=1, scale=1./8)
|
>>> x = norm.rvs(size=200, loc=1, scale=1./8)
|
||||||
>>> y = norm.rvs(size=200, loc=1, scale=1./2)
|
>>> y = norm.rvs(size=200, loc=1, scale=1./2)
|
||||||
>>> result = densratio(x, y, alpha=0.7)
|
>>> result = densratio(x, y, alpha=0.7)
|
||||||
>>> print(result)
|
>>> print(result)
|
||||||
|
|
||||||
>>> density_ratio = result.compute_density_ratio(y)
|
>>> density_ratio = result.compute_density_ratio(y)
|
||||||
>>> print(density_ratio)
|
>>> print(density_ratio)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
x = to_ndarray(x)
|
x = to_ndarray(x)
|
||||||
y = to_ndarray(y)
|
y = to_ndarray(y)
|
||||||
|
|
||||||
if x.shape[1] != y.shape[1]:
|
if x.shape[1] != y.shape[1]:
|
||||||
raise ValueError("x and y must be same dimensions.")
|
raise ValueError("x and y must be same dimensions.")
|
||||||
|
|
||||||
if isinstance(sigma_range, str) and sigma_range != "auto":
|
if isinstance(sigma_range, str) and sigma_range != "auto":
|
||||||
raise TypeError("Invalid value for sigma_range.")
|
raise TypeError("Invalid value for sigma_range.")
|
||||||
|
|
||||||
if isinstance(lambda_range, str) and lambda_range != "auto":
|
if isinstance(lambda_range, str) and lambda_range != "auto":
|
||||||
raise TypeError("Invalid value for lambda_range.")
|
raise TypeError("Invalid value for lambda_range.")
|
||||||
|
|
||||||
if sigma_range is None or (isinstance(sigma_range, str) and sigma_range == "auto"):
|
if sigma_range is None or (isinstance(sigma_range, str) and sigma_range == "auto"):
|
||||||
sigma_range = 10 ** linspace(-3, 9, 13)
|
sigma_range = 10 ** linspace(-3, 9, 13)
|
||||||
|
|
||||||
if lambda_range is None or (
|
if lambda_range is None or (
|
||||||
isinstance(lambda_range, str) and lambda_range == "auto"
|
isinstance(lambda_range, str) and lambda_range == "auto"
|
||||||
):
|
):
|
||||||
lambda_range = 10 ** linspace(-3, 9, 13)
|
lambda_range = 10 ** linspace(-3, 9, 13)
|
||||||
|
|
||||||
result = RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num, verbose)
|
result = RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num, verbose)
|
||||||
return result
|
return result
|
||||||
|
|
|
||||||
|
|
@ -1,88 +1,88 @@
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
from re import sub
|
from re import sub
|
||||||
|
|
||||||
|
|
||||||
class DensityRatio:
|
class DensityRatio:
|
||||||
"""Density Ratio."""
|
"""Density Ratio."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
method,
|
method,
|
||||||
alpha,
|
alpha,
|
||||||
theta,
|
theta,
|
||||||
lambda_,
|
lambda_,
|
||||||
alpha_PE,
|
alpha_PE,
|
||||||
alpha_KL,
|
alpha_KL,
|
||||||
kernel_info,
|
kernel_info,
|
||||||
compute_density_ratio,
|
compute_density_ratio,
|
||||||
):
|
):
|
||||||
self.method = method
|
self.method = method
|
||||||
self.alpha = alpha
|
self.alpha = alpha
|
||||||
self.theta = theta
|
self.theta = theta
|
||||||
self.lambda_ = lambda_
|
self.lambda_ = lambda_
|
||||||
self.alpha_PE = alpha_PE
|
self.alpha_PE = alpha_PE
|
||||||
self.alpha_KL = alpha_KL
|
self.alpha_KL = alpha_KL
|
||||||
self.kernel_info = kernel_info
|
self.kernel_info = kernel_info
|
||||||
self.compute_density_ratio = compute_density_ratio
|
self.compute_density_ratio = compute_density_ratio
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return """
|
return """
|
||||||
Method: %(method)s
|
Method: %(method)s
|
||||||
|
|
||||||
Alpha: %(alpha)s
|
Alpha: %(alpha)s
|
||||||
|
|
||||||
Kernel Information:
|
Kernel Information:
|
||||||
%(kernel_info)s
|
%(kernel_info)s
|
||||||
|
|
||||||
Kernel Weights (theta):
|
Kernel Weights (theta):
|
||||||
%(theta)s
|
%(theta)s
|
||||||
|
|
||||||
Regularization Parameter (lambda): %(lambda_)s
|
Regularization Parameter (lambda): %(lambda_)s
|
||||||
|
|
||||||
Alpha-Relative PE-Divergence: %(alpha_PE)s
|
Alpha-Relative PE-Divergence: %(alpha_PE)s
|
||||||
|
|
||||||
Alpha-Relative KL-Divergence: %(alpha_KL)s
|
Alpha-Relative KL-Divergence: %(alpha_KL)s
|
||||||
|
|
||||||
Function to Estimate Density Ratio:
|
Function to Estimate Density Ratio:
|
||||||
compute_density_ratio(x)
|
compute_density_ratio(x)
|
||||||
|
|
||||||
"""[
|
"""[
|
||||||
1:-1
|
1:-1
|
||||||
] % dict(
|
] % dict(
|
||||||
method=self.method,
|
method=self.method,
|
||||||
kernel_info=self.kernel_info,
|
kernel_info=self.kernel_info,
|
||||||
alpha=self.alpha,
|
alpha=self.alpha,
|
||||||
theta=my_format(self.theta),
|
theta=my_format(self.theta),
|
||||||
lambda_=self.lambda_,
|
lambda_=self.lambda_,
|
||||||
alpha_PE=self.alpha_PE,
|
alpha_PE=self.alpha_PE,
|
||||||
alpha_KL=self.alpha_KL,
|
alpha_KL=self.alpha_KL,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class KernelInfo:
|
class KernelInfo:
|
||||||
"""Kernel Information."""
|
"""Kernel Information."""
|
||||||
|
|
||||||
def __init__(self, kernel_type, kernel_num, sigma, centers):
|
def __init__(self, kernel_type, kernel_num, sigma, centers):
|
||||||
self.kernel_type = kernel_type
|
self.kernel_type = kernel_type
|
||||||
self.kernel_num = kernel_num
|
self.kernel_num = kernel_num
|
||||||
self.sigma = sigma
|
self.sigma = sigma
|
||||||
self.centers = centers
|
self.centers = centers
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return """
|
return """
|
||||||
Kernel type: %(kernel_type)s
|
Kernel type: %(kernel_type)s
|
||||||
Number of kernels: %(kernel_num)s
|
Number of kernels: %(kernel_num)s
|
||||||
Bandwidth(sigma): %(sigma)s
|
Bandwidth(sigma): %(sigma)s
|
||||||
Centers: %(centers)s
|
Centers: %(centers)s
|
||||||
"""[
|
"""[
|
||||||
1:-1
|
1:-1
|
||||||
] % dict(
|
] % dict(
|
||||||
kernel_type=self.kernel_type,
|
kernel_type=self.kernel_type,
|
||||||
kernel_num=self.kernel_num,
|
kernel_num=self.kernel_num,
|
||||||
sigma=self.sigma,
|
sigma=self.sigma,
|
||||||
centers=my_format(self.centers),
|
centers=my_format(self.centers),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def my_format(str):
|
def my_format(str):
|
||||||
return sub(r"\s+", " ", (pformat(str).split("\n")[0] + ".."))
|
return sub(r"\s+", " ", (pformat(str).split("\n")[0] + ".."))
|
||||||
|
|
|
||||||
|
|
@ -1,36 +1,36 @@
|
||||||
from numpy import array, ndarray, result_type
|
from numpy import array, ndarray, result_type
|
||||||
|
|
||||||
np_float = result_type(float)
|
np_float = result_type(float)
|
||||||
try:
|
try:
|
||||||
import numba as nb
|
import numba as nb
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
guvectorize_compute = None
|
guvectorize_compute = None
|
||||||
else:
|
else:
|
||||||
_nb_float = nb.from_dtype(np_float)
|
_nb_float = nb.from_dtype(np_float)
|
||||||
|
|
||||||
def guvectorize_compute(target: str, *, cache: bool = True):
|
def guvectorize_compute(target: str, *, cache: bool = True):
|
||||||
return nb.guvectorize(
|
return nb.guvectorize(
|
||||||
[nb.void(_nb_float[:, :], _nb_float[:], _nb_float, _nb_float[:])],
|
[nb.void(_nb_float[:, :], _nb_float[:], _nb_float, _nb_float[:])],
|
||||||
"(m, p),(p),()->(m)",
|
"(m, p),(p),()->(m)",
|
||||||
nopython=True,
|
nopython=True,
|
||||||
target=target,
|
target=target,
|
||||||
cache=cache,
|
cache=cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def is_numeric(x):
|
def is_numeric(x):
|
||||||
return isinstance(x, int) or isinstance(x, float)
|
return isinstance(x, int) or isinstance(x, float)
|
||||||
|
|
||||||
|
|
||||||
def to_ndarray(x):
|
def to_ndarray(x):
|
||||||
if isinstance(x, ndarray):
|
if isinstance(x, ndarray):
|
||||||
if len(x.shape) == 1:
|
if len(x.shape) == 1:
|
||||||
return x.reshape(-1, 1)
|
return x.reshape(-1, 1)
|
||||||
else:
|
else:
|
||||||
return x
|
return x
|
||||||
elif str(type(x)) == "<class 'pandas.core.frame.DataFrame'>":
|
elif str(type(x)) == "<class 'pandas.core.frame.DataFrame'>":
|
||||||
return x.values
|
return x.values
|
||||||
elif not x:
|
elif not x:
|
||||||
raise ValueError("Cannot transform to numpy.matrix.")
|
raise ValueError("Cannot transform to numpy.matrix.")
|
||||||
else:
|
else:
|
||||||
return to_ndarray(array(x))
|
return to_ndarray(array(x))
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
def get_doc(probs1, probs2):
|
def get_doc(probs1, probs2):
|
||||||
return np.mean(probs2) - np.mean(probs1)
|
return np.mean(probs2) - np.mean(probs1)
|
||||||
|
|
@ -1,66 +1,66 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.sparse import issparse, vstack
|
from scipy.sparse import issparse, vstack
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
from sklearn.neighbors import KernelDensity
|
from sklearn.neighbors import KernelDensity
|
||||||
|
|
||||||
from baselines import densratio
|
from baselines import densratio
|
||||||
from baselines.pykliep import DensityRatioEstimator
|
from baselines.pykliep import DensityRatioEstimator
|
||||||
|
|
||||||
|
|
||||||
def kliep(Xtr, ytr, Xte):
|
def kliep(Xtr, ytr, Xte):
|
||||||
kliep = DensityRatioEstimator()
|
kliep = DensityRatioEstimator()
|
||||||
kliep.fit(Xtr, Xte)
|
kliep.fit(Xtr, Xte)
|
||||||
return kliep.predict(Xtr)
|
return kliep.predict(Xtr)
|
||||||
|
|
||||||
|
|
||||||
def usilf(Xtr, ytr, Xte, alpha=0.0):
|
def usilf(Xtr, ytr, Xte, alpha=0.0):
|
||||||
dense_ratio_obj = densratio(Xtr, Xte, alpha=alpha, verbose=False)
|
dense_ratio_obj = densratio(Xtr, Xte, alpha=alpha, verbose=False)
|
||||||
return dense_ratio_obj.compute_density_ratio(Xtr)
|
return dense_ratio_obj.compute_density_ratio(Xtr)
|
||||||
|
|
||||||
|
|
||||||
def logreg(Xtr, ytr, Xte):
|
def logreg(Xtr, ytr, Xte):
|
||||||
# check "Direct Density Ratio Estimation for
|
# check "Direct Density Ratio Estimation for
|
||||||
# Large-scale Covariate Shift Adaptation", Eq.28
|
# Large-scale Covariate Shift Adaptation", Eq.28
|
||||||
|
|
||||||
if issparse(Xtr):
|
if issparse(Xtr):
|
||||||
X = vstack([Xtr, Xte])
|
X = vstack([Xtr, Xte])
|
||||||
else:
|
else:
|
||||||
X = np.concatenate([Xtr, Xte])
|
X = np.concatenate([Xtr, Xte])
|
||||||
|
|
||||||
y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
|
y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
|
||||||
|
|
||||||
logreg = GridSearchCV(
|
logreg = GridSearchCV(
|
||||||
LogisticRegression(),
|
LogisticRegression(),
|
||||||
param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
|
param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
)
|
)
|
||||||
logreg.fit(X, y)
|
logreg.fit(X, y)
|
||||||
probs = logreg.predict_proba(Xtr)
|
probs = logreg.predict_proba(Xtr)
|
||||||
prob_train, prob_test = probs[:, 0], probs[:, 1]
|
prob_train, prob_test = probs[:, 0], probs[:, 1]
|
||||||
prior_train = Xtr.shape[0]
|
prior_train = Xtr.shape[0]
|
||||||
prior_test = Xte.shape[0]
|
prior_test = Xte.shape[0]
|
||||||
w = (prior_train / prior_test) * (prob_test / prob_train)
|
w = (prior_train / prior_test) * (prob_test / prob_train)
|
||||||
return w
|
return w
|
||||||
|
|
||||||
|
|
||||||
kdex2_params = {"bandwidth": np.logspace(-1, 1, 20)}
|
kdex2_params = {"bandwidth": np.logspace(-1, 1, 20)}
|
||||||
|
|
||||||
|
|
||||||
def kdex2_lltr(Xtr):
|
def kdex2_lltr(Xtr):
|
||||||
if issparse(Xtr):
|
if issparse(Xtr):
|
||||||
Xtr = Xtr.toarray()
|
Xtr = Xtr.toarray()
|
||||||
return GridSearchCV(KernelDensity(), kdex2_params).fit(Xtr).score_samples(Xtr)
|
return GridSearchCV(KernelDensity(), kdex2_params).fit(Xtr).score_samples(Xtr)
|
||||||
|
|
||||||
|
|
||||||
def kdex2_weights(Xtr, Xte, log_likelihood_tr):
|
def kdex2_weights(Xtr, Xte, log_likelihood_tr):
|
||||||
log_likelihood_te = (
|
log_likelihood_te = (
|
||||||
GridSearchCV(KernelDensity(), kdex2_params).fit(Xte).score_samples(Xtr)
|
GridSearchCV(KernelDensity(), kdex2_params).fit(Xte).score_samples(Xtr)
|
||||||
)
|
)
|
||||||
likelihood_tr = np.exp(log_likelihood_tr)
|
likelihood_tr = np.exp(log_likelihood_tr)
|
||||||
likelihood_te = np.exp(log_likelihood_te)
|
likelihood_te = np.exp(log_likelihood_te)
|
||||||
return likelihood_te / likelihood_tr
|
return likelihood_te / likelihood_tr
|
||||||
|
|
||||||
|
|
||||||
def get_acc(tr_preds, ytr, w):
|
def get_acc(tr_preds, ytr, w):
|
||||||
return np.sum((1.0 * (tr_preds == ytr)) * w) / np.sum(w)
|
return np.sum((1.0 * (tr_preds == ytr)) * w) / np.sum(w)
|
||||||
|
|
|
||||||
|
|
@ -1,140 +1,140 @@
|
||||||
# import itertools
|
# import itertools
|
||||||
# from typing import Iterable
|
# from typing import Iterable
|
||||||
|
|
||||||
# import quapy as qp
|
# import quapy as qp
|
||||||
# import quapy.functional as F
|
# import quapy.functional as F
|
||||||
# from densratio import densratio
|
# from densratio import densratio
|
||||||
# from quapy.method.aggregative import *
|
# from quapy.method.aggregative import *
|
||||||
# from quapy.protocol import (
|
# from quapy.protocol import (
|
||||||
# AbstractStochasticSeededProtocol,
|
# AbstractStochasticSeededProtocol,
|
||||||
# OnLabelledCollectionProtocol,
|
# OnLabelledCollectionProtocol,
|
||||||
# )
|
# )
|
||||||
# from scipy.sparse import issparse, vstack
|
# from scipy.sparse import issparse, vstack
|
||||||
# from scipy.spatial.distance import cdist
|
# from scipy.spatial.distance import cdist
|
||||||
# from scipy.stats import multivariate_normal
|
# from scipy.stats import multivariate_normal
|
||||||
# from sklearn.linear_model import LogisticRegression
|
# from sklearn.linear_model import LogisticRegression
|
||||||
# from sklearn.model_selection import GridSearchCV
|
# from sklearn.model_selection import GridSearchCV
|
||||||
# from sklearn.neighbors import KernelDensity
|
# from sklearn.neighbors import KernelDensity
|
||||||
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sklearn.metrics as metrics
|
import sklearn.metrics as metrics
|
||||||
from pykliep import DensityRatioEstimator
|
from pykliep import DensityRatioEstimator
|
||||||
from quapy.protocol import APP
|
from quapy.protocol import APP
|
||||||
from scipy.sparse import issparse, vstack
|
from scipy.sparse import issparse, vstack
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
from sklearn.neighbors import KernelDensity
|
from sklearn.neighbors import KernelDensity
|
||||||
|
|
||||||
import baselines.impweight as iw
|
import baselines.impweight as iw
|
||||||
from baselines.densratio import densratio
|
from baselines.densratio import densratio
|
||||||
from quacc.dataset import Dataset
|
from quacc.dataset import Dataset
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------------------
|
||||||
# Methods of "importance weight", e.g., by ratio density estimation (KLIEP, SILF, LogReg)
|
# Methods of "importance weight", e.g., by ratio density estimation (KLIEP, SILF, LogReg)
|
||||||
# ---------------------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------------------
|
||||||
class ImportanceWeight:
|
class ImportanceWeight:
|
||||||
def weights(self, Xtr, ytr, Xte):
|
def weights(self, Xtr, ytr, Xte):
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
class KLIEP(ImportanceWeight):
|
class KLIEP(ImportanceWeight):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def weights(self, Xtr, ytr, Xte):
|
def weights(self, Xtr, ytr, Xte):
|
||||||
kliep = DensityRatioEstimator()
|
kliep = DensityRatioEstimator()
|
||||||
kliep.fit(Xtr, Xte)
|
kliep.fit(Xtr, Xte)
|
||||||
return kliep.predict(Xtr)
|
return kliep.predict(Xtr)
|
||||||
|
|
||||||
|
|
||||||
class USILF(ImportanceWeight):
|
class USILF(ImportanceWeight):
|
||||||
def __init__(self, alpha=0.0):
|
def __init__(self, alpha=0.0):
|
||||||
self.alpha = alpha
|
self.alpha = alpha
|
||||||
|
|
||||||
def weights(self, Xtr, ytr, Xte):
|
def weights(self, Xtr, ytr, Xte):
|
||||||
dense_ratio_obj = densratio(Xtr, Xte, alpha=self.alpha, verbose=False)
|
dense_ratio_obj = densratio(Xtr, Xte, alpha=self.alpha, verbose=False)
|
||||||
return dense_ratio_obj.compute_density_ratio(Xtr)
|
return dense_ratio_obj.compute_density_ratio(Xtr)
|
||||||
|
|
||||||
|
|
||||||
class LogReg(ImportanceWeight):
|
class LogReg(ImportanceWeight):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def weights(self, Xtr, ytr, Xte):
|
def weights(self, Xtr, ytr, Xte):
|
||||||
# check "Direct Density Ratio Estimation for
|
# check "Direct Density Ratio Estimation for
|
||||||
# Large-scale Covariate Shift Adaptation", Eq.28
|
# Large-scale Covariate Shift Adaptation", Eq.28
|
||||||
|
|
||||||
if issparse(Xtr):
|
if issparse(Xtr):
|
||||||
X = vstack([Xtr, Xte])
|
X = vstack([Xtr, Xte])
|
||||||
else:
|
else:
|
||||||
X = np.concatenate([Xtr, Xte])
|
X = np.concatenate([Xtr, Xte])
|
||||||
|
|
||||||
y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
|
y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
|
||||||
|
|
||||||
logreg = GridSearchCV(
|
logreg = GridSearchCV(
|
||||||
LogisticRegression(),
|
LogisticRegression(),
|
||||||
param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
|
param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
)
|
)
|
||||||
logreg.fit(X, y)
|
logreg.fit(X, y)
|
||||||
probs = logreg.predict_proba(Xtr)
|
probs = logreg.predict_proba(Xtr)
|
||||||
prob_train, prob_test = probs[:, 0], probs[:, 1]
|
prob_train, prob_test = probs[:, 0], probs[:, 1]
|
||||||
prior_train = Xtr.shape[0]
|
prior_train = Xtr.shape[0]
|
||||||
prior_test = Xte.shape[0]
|
prior_test = Xte.shape[0]
|
||||||
w = (prior_train / prior_test) * (prob_test / prob_train)
|
w = (prior_train / prior_test) * (prob_test / prob_train)
|
||||||
return w
|
return w
|
||||||
|
|
||||||
|
|
||||||
class KDEx2(ImportanceWeight):
|
class KDEx2(ImportanceWeight):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def weights(self, Xtr, ytr, Xte):
|
def weights(self, Xtr, ytr, Xte):
|
||||||
params = {"bandwidth": np.logspace(-1, 1, 20)}
|
params = {"bandwidth": np.logspace(-1, 1, 20)}
|
||||||
log_likelihood_tr = (
|
log_likelihood_tr = (
|
||||||
GridSearchCV(KernelDensity(), params).fit(Xtr).score_samples(Xtr)
|
GridSearchCV(KernelDensity(), params).fit(Xtr).score_samples(Xtr)
|
||||||
)
|
)
|
||||||
log_likelihood_te = (
|
log_likelihood_te = (
|
||||||
GridSearchCV(KernelDensity(), params).fit(Xte).score_samples(Xtr)
|
GridSearchCV(KernelDensity(), params).fit(Xte).score_samples(Xtr)
|
||||||
)
|
)
|
||||||
likelihood_tr = np.exp(log_likelihood_tr)
|
likelihood_tr = np.exp(log_likelihood_tr)
|
||||||
likelihood_te = np.exp(log_likelihood_te)
|
likelihood_te = np.exp(log_likelihood_te)
|
||||||
return likelihood_te / likelihood_tr
|
return likelihood_te / likelihood_tr
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# d = Dataset("rcv1", target="CCAT").get_raw()
|
# d = Dataset("rcv1", target="CCAT").get_raw()
|
||||||
d = Dataset("imdb", n_prevalences=1).get()[0]
|
d = Dataset("imdb", n_prevalences=1).get()[0]
|
||||||
|
|
||||||
tstart = time.time()
|
tstart = time.time()
|
||||||
lr = LogisticRegression()
|
lr = LogisticRegression()
|
||||||
lr.fit(*d.train.Xy)
|
lr.fit(*d.train.Xy)
|
||||||
val_preds = lr.predict(d.validation.X)
|
val_preds = lr.predict(d.validation.X)
|
||||||
protocol = APP(
|
protocol = APP(
|
||||||
d.test,
|
d.test,
|
||||||
n_prevalences=21,
|
n_prevalences=21,
|
||||||
repeats=1,
|
repeats=1,
|
||||||
sample_size=100,
|
sample_size=100,
|
||||||
return_type="labelled_collection",
|
return_type="labelled_collection",
|
||||||
)
|
)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for sample in protocol():
|
for sample in protocol():
|
||||||
wx = iw.kliep(d.validation.X, d.validation.y, sample.X)
|
wx = iw.kliep(d.validation.X, d.validation.y, sample.X)
|
||||||
test_preds = lr.predict(sample.X)
|
test_preds = lr.predict(sample.X)
|
||||||
estim_acc = np.sum((1.0 * (val_preds == d.validation.y)) * wx) / np.sum(wx)
|
estim_acc = np.sum((1.0 * (val_preds == d.validation.y)) * wx) / np.sum(wx)
|
||||||
true_acc = metrics.accuracy_score(sample.y, test_preds)
|
true_acc = metrics.accuracy_score(sample.y, test_preds)
|
||||||
results.append((sample.prevalence(), estim_acc, true_acc))
|
results.append((sample.prevalence(), estim_acc, true_acc))
|
||||||
|
|
||||||
tend = time.time()
|
tend = time.time()
|
||||||
|
|
||||||
for r in results:
|
for r in results:
|
||||||
print(*r)
|
print(*r)
|
||||||
|
|
||||||
print(f"logreg finished [took {tend-tstart:.3f}s]")
|
print(f"logreg finished [took {tend-tstart:.3f}s]")
|
||||||
import win11toast
|
import win11toast
|
||||||
|
|
||||||
win11toast.notify("models.py", "Completed")
|
win11toast.notify("models.py", "Completed")
|
||||||
|
|
|
||||||
|
|
@ -1,221 +1,221 @@
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.sparse import csr_matrix
|
from scipy.sparse import csr_matrix
|
||||||
|
|
||||||
|
|
||||||
class DensityRatioEstimator:
|
class DensityRatioEstimator:
|
||||||
"""
|
"""
|
||||||
Class to accomplish direct density estimation implementing the original KLIEP
|
Class to accomplish direct density estimation implementing the original KLIEP
|
||||||
algorithm from Direct Importance Estimation with Model Selection
|
algorithm from Direct Importance Estimation with Model Selection
|
||||||
and Its Application to Covariate Shift Adaptation by Sugiyama et al.
|
and Its Application to Covariate Shift Adaptation by Sugiyama et al.
|
||||||
|
|
||||||
The training set is distributed via
|
The training set is distributed via
|
||||||
train ~ p(x)
|
train ~ p(x)
|
||||||
and the test set is distributed via
|
and the test set is distributed via
|
||||||
test ~ q(x).
|
test ~ q(x).
|
||||||
|
|
||||||
The KLIEP algorithm and its variants approximate w(x) = q(x) / p(x) directly. The predict function returns the
|
The KLIEP algorithm and its variants approximate w(x) = q(x) / p(x) directly. The predict function returns the
|
||||||
estimate of w(x). The function w(x) can serve as sample weights for the training set during
|
estimate of w(x). The function w(x) can serve as sample weights for the training set during
|
||||||
training to modify the expectation function that the model's loss function is optimized via,
|
training to modify the expectation function that the model's loss function is optimized via,
|
||||||
i.e.
|
i.e.
|
||||||
|
|
||||||
E_{x ~ w(x)p(x)} loss(x) = E_{x ~ q(x)} loss(x).
|
E_{x ~ w(x)p(x)} loss(x) = E_{x ~ q(x)} loss(x).
|
||||||
|
|
||||||
Usage :
|
Usage :
|
||||||
The fit method is used to run the KLIEP algorithm using LCV and returns value of J
|
The fit method is used to run the KLIEP algorithm using LCV and returns value of J
|
||||||
trained on the entire training/test set with the best sigma found.
|
trained on the entire training/test set with the best sigma found.
|
||||||
Use the predict method on the training set to determine the sample weights from the KLIEP algorithm.
|
Use the predict method on the training set to determine the sample weights from the KLIEP algorithm.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
max_iter=5000,
|
max_iter=5000,
|
||||||
num_params=[0.1, 0.2],
|
num_params=[0.1, 0.2],
|
||||||
epsilon=1e-4,
|
epsilon=1e-4,
|
||||||
cv=3,
|
cv=3,
|
||||||
sigmas=[0.01, 0.1, 0.25, 0.5, 0.75, 1],
|
sigmas=[0.01, 0.1, 0.25, 0.5, 0.75, 1],
|
||||||
random_state=None,
|
random_state=None,
|
||||||
verbose=0,
|
verbose=0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Direct density estimation using an inner LCV loop to estimate the proper model. Can be used with sklearn
|
Direct density estimation using an inner LCV loop to estimate the proper model. Can be used with sklearn
|
||||||
cross validation methods with or without storing the inner CV. To use a standard grid search.
|
cross validation methods with or without storing the inner CV. To use a standard grid search.
|
||||||
|
|
||||||
|
|
||||||
max_iter : Number of iterations to perform
|
max_iter : Number of iterations to perform
|
||||||
num_params : List of number of test set vectors used to construct the approximation for inner LCV.
|
num_params : List of number of test set vectors used to construct the approximation for inner LCV.
|
||||||
Must be a float. Original paper used 10%, i.e. =.1
|
Must be a float. Original paper used 10%, i.e. =.1
|
||||||
sigmas : List of sigmas to be used in inner LCV loop.
|
sigmas : List of sigmas to be used in inner LCV loop.
|
||||||
epsilon : Additive factor in the iterative algorithm for numerical stability.
|
epsilon : Additive factor in the iterative algorithm for numerical stability.
|
||||||
"""
|
"""
|
||||||
self.max_iter = max_iter
|
self.max_iter = max_iter
|
||||||
self.num_params = num_params
|
self.num_params = num_params
|
||||||
self.epsilon = epsilon
|
self.epsilon = epsilon
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.sigmas = sigmas
|
self.sigmas = sigmas
|
||||||
self.cv = cv
|
self.cv = cv
|
||||||
self.random_state = 0
|
self.random_state = 0
|
||||||
|
|
||||||
def fit(self, X_train, X_test, alpha_0=None):
|
def fit(self, X_train, X_test, alpha_0=None):
|
||||||
"""Uses cross validation to select sigma as in the original paper (LCV).
|
"""Uses cross validation to select sigma as in the original paper (LCV).
|
||||||
In a break from sklearn convention, y=X_test.
|
In a break from sklearn convention, y=X_test.
|
||||||
The parameter cv corresponds to R in the original paper.
|
The parameter cv corresponds to R in the original paper.
|
||||||
Once found, the best sigma is used to train on the full set."""
|
Once found, the best sigma is used to train on the full set."""
|
||||||
|
|
||||||
# LCV loop, shuffle a copy in place for performance.
|
# LCV loop, shuffle a copy in place for performance.
|
||||||
cv = self.cv
|
cv = self.cv
|
||||||
chunk = int(X_test.shape[0] / float(cv))
|
chunk = int(X_test.shape[0] / float(cv))
|
||||||
if self.random_state is not None:
|
if self.random_state is not None:
|
||||||
np.random.seed(self.random_state)
|
np.random.seed(self.random_state)
|
||||||
# if isinstance(X_test, csr_matrix):
|
# if isinstance(X_test, csr_matrix):
|
||||||
# X_test_shuffled = X_test.toarray()
|
# X_test_shuffled = X_test.toarray()
|
||||||
# else:
|
# else:
|
||||||
# X_test_shuffled = X_test.copy()
|
# X_test_shuffled = X_test.copy()
|
||||||
X_test_shuffled = X_test.copy()
|
X_test_shuffled = X_test.copy()
|
||||||
|
|
||||||
X_test_index = np.arange(X_test_shuffled.shape[0])
|
X_test_index = np.arange(X_test_shuffled.shape[0])
|
||||||
np.random.shuffle(X_test_index)
|
np.random.shuffle(X_test_index)
|
||||||
X_test_shuffled = X_test_shuffled[X_test_index, :]
|
X_test_shuffled = X_test_shuffled[X_test_index, :]
|
||||||
|
|
||||||
j_scores = {}
|
j_scores = {}
|
||||||
|
|
||||||
if type(self.sigmas) != list:
|
if type(self.sigmas) != list:
|
||||||
self.sigmas = [self.sigmas]
|
self.sigmas = [self.sigmas]
|
||||||
|
|
||||||
if type(self.num_params) != list:
|
if type(self.num_params) != list:
|
||||||
self.num_params = [self.num_params]
|
self.num_params = [self.num_params]
|
||||||
|
|
||||||
if len(self.sigmas) * len(self.num_params) > 1:
|
if len(self.sigmas) * len(self.num_params) > 1:
|
||||||
# Inner LCV loop
|
# Inner LCV loop
|
||||||
for num_param in self.num_params:
|
for num_param in self.num_params:
|
||||||
for sigma in self.sigmas:
|
for sigma in self.sigmas:
|
||||||
j_scores[(num_param, sigma)] = np.zeros(cv)
|
j_scores[(num_param, sigma)] = np.zeros(cv)
|
||||||
for k in range(1, cv + 1):
|
for k in range(1, cv + 1):
|
||||||
if self.verbose > 0:
|
if self.verbose > 0:
|
||||||
print("Training: sigma: %s R: %s" % (sigma, k))
|
print("Training: sigma: %s R: %s" % (sigma, k))
|
||||||
X_test_fold = X_test_shuffled[(k - 1) * chunk : k * chunk, :]
|
X_test_fold = X_test_shuffled[(k - 1) * chunk : k * chunk, :]
|
||||||
j_scores[(num_param, sigma)][k - 1] = self._fit(
|
j_scores[(num_param, sigma)][k - 1] = self._fit(
|
||||||
X_train=X_train,
|
X_train=X_train,
|
||||||
X_test=X_test_fold,
|
X_test=X_test_fold,
|
||||||
num_parameters=num_param,
|
num_parameters=num_param,
|
||||||
sigma=sigma,
|
sigma=sigma,
|
||||||
)
|
)
|
||||||
j_scores[(num_param, sigma)] = np.mean(j_scores[(num_param, sigma)])
|
j_scores[(num_param, sigma)] = np.mean(j_scores[(num_param, sigma)])
|
||||||
|
|
||||||
sorted_scores = sorted(
|
sorted_scores = sorted(
|
||||||
[x for x in j_scores.items() if np.isfinite(x[1])],
|
[x for x in j_scores.items() if np.isfinite(x[1])],
|
||||||
key=lambda x: x[1],
|
key=lambda x: x[1],
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)
|
)
|
||||||
if len(sorted_scores) == 0:
|
if len(sorted_scores) == 0:
|
||||||
warnings.warn("LCV failed to converge for all values of sigma.")
|
warnings.warn("LCV failed to converge for all values of sigma.")
|
||||||
return self
|
return self
|
||||||
self._sigma = sorted_scores[0][0][1]
|
self._sigma = sorted_scores[0][0][1]
|
||||||
self._num_parameters = sorted_scores[0][0][0]
|
self._num_parameters = sorted_scores[0][0][0]
|
||||||
self._j_scores = sorted_scores
|
self._j_scores = sorted_scores
|
||||||
else:
|
else:
|
||||||
self._sigma = self.sigmas[0]
|
self._sigma = self.sigmas[0]
|
||||||
self._num_parameters = self.num_params[0]
|
self._num_parameters = self.num_params[0]
|
||||||
# best sigma
|
# best sigma
|
||||||
self._j = self._fit(
|
self._j = self._fit(
|
||||||
X_train=X_train,
|
X_train=X_train,
|
||||||
X_test=X_test_shuffled,
|
X_test=X_test_shuffled,
|
||||||
num_parameters=self._num_parameters,
|
num_parameters=self._num_parameters,
|
||||||
sigma=self._sigma,
|
sigma=self._sigma,
|
||||||
)
|
)
|
||||||
|
|
||||||
return self # Compatibility with sklearn
|
return self # Compatibility with sklearn
|
||||||
|
|
||||||
def _fit(self, X_train, X_test, num_parameters, sigma, alpha_0=None):
|
def _fit(self, X_train, X_test, num_parameters, sigma, alpha_0=None):
|
||||||
"""Fits the estimator with the given parameters w-hat and returns J"""
|
"""Fits the estimator with the given parameters w-hat and returns J"""
|
||||||
|
|
||||||
num_parameters = num_parameters
|
num_parameters = num_parameters
|
||||||
|
|
||||||
if type(num_parameters) == float:
|
if type(num_parameters) == float:
|
||||||
num_parameters = int(X_test.shape[0] * num_parameters)
|
num_parameters = int(X_test.shape[0] * num_parameters)
|
||||||
|
|
||||||
self._select_param_vectors(
|
self._select_param_vectors(
|
||||||
X_test=X_test, sigma=sigma, num_parameters=num_parameters
|
X_test=X_test, sigma=sigma, num_parameters=num_parameters
|
||||||
)
|
)
|
||||||
|
|
||||||
# if isinstance(X_train, csr_matrix):
|
# if isinstance(X_train, csr_matrix):
|
||||||
# X_train = X_train.toarray()
|
# X_train = X_train.toarray()
|
||||||
X_train = self._reshape_X(X_train)
|
X_train = self._reshape_X(X_train)
|
||||||
X_test = self._reshape_X(X_test)
|
X_test = self._reshape_X(X_test)
|
||||||
|
|
||||||
if alpha_0 is None:
|
if alpha_0 is None:
|
||||||
alpha_0 = np.ones(shape=(num_parameters, 1)) / float(num_parameters)
|
alpha_0 = np.ones(shape=(num_parameters, 1)) / float(num_parameters)
|
||||||
|
|
||||||
self._find_alpha(
|
self._find_alpha(
|
||||||
X_train=X_train,
|
X_train=X_train,
|
||||||
X_test=X_test,
|
X_test=X_test,
|
||||||
num_parameters=num_parameters,
|
num_parameters=num_parameters,
|
||||||
epsilon=self.epsilon,
|
epsilon=self.epsilon,
|
||||||
alpha_0=alpha_0,
|
alpha_0=alpha_0,
|
||||||
sigma=sigma,
|
sigma=sigma,
|
||||||
)
|
)
|
||||||
|
|
||||||
return self._calculate_j(X_test, sigma=sigma)
|
return self._calculate_j(X_test, sigma=sigma)
|
||||||
|
|
||||||
def _calculate_j(self, X_test, sigma):
|
def _calculate_j(self, X_test, sigma):
|
||||||
pred = self.predict(X_test, sigma=sigma) + 0.0000001
|
pred = self.predict(X_test, sigma=sigma) + 0.0000001
|
||||||
log = np.log(pred).sum()
|
log = np.log(pred).sum()
|
||||||
return log / (X_test.shape[0])
|
return log / (X_test.shape[0])
|
||||||
|
|
||||||
def score(self, X_test):
|
def score(self, X_test):
|
||||||
"""Return the J score, similar to sklearn's API"""
|
"""Return the J score, similar to sklearn's API"""
|
||||||
return self._calculate_j(X_test=X_test, sigma=self._sigma)
|
return self._calculate_j(X_test=X_test, sigma=self._sigma)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _reshape_X(X):
|
def _reshape_X(X):
|
||||||
"""Reshape input from mxn to mx1xn to take advantage of numpy broadcasting."""
|
"""Reshape input from mxn to mx1xn to take advantage of numpy broadcasting."""
|
||||||
if len(X.shape) != 3:
|
if len(X.shape) != 3:
|
||||||
return X.reshape((X.shape[0], 1, X.shape[1]))
|
return X.reshape((X.shape[0], 1, X.shape[1]))
|
||||||
return X
|
return X
|
||||||
|
|
||||||
def _select_param_vectors(self, X_test, sigma, num_parameters):
|
def _select_param_vectors(self, X_test, sigma, num_parameters):
|
||||||
"""X_test is the test set. b is the number of parameters."""
|
"""X_test is the test set. b is the number of parameters."""
|
||||||
indices = np.random.choice(X_test.shape[0], size=num_parameters, replace=False)
|
indices = np.random.choice(X_test.shape[0], size=num_parameters, replace=False)
|
||||||
self._test_vectors = X_test[indices, :].copy()
|
self._test_vectors = X_test[indices, :].copy()
|
||||||
self._phi_fitted = True
|
self._phi_fitted = True
|
||||||
|
|
||||||
def _phi(self, X, sigma=None):
|
def _phi(self, X, sigma=None):
|
||||||
if sigma is None:
|
if sigma is None:
|
||||||
sigma = self._sigma
|
sigma = self._sigma
|
||||||
|
|
||||||
if self._phi_fitted:
|
if self._phi_fitted:
|
||||||
return np.exp(
|
return np.exp(
|
||||||
-np.sum((X - self._test_vectors) ** 2, axis=-1) / (2 * sigma**2)
|
-np.sum((X - self._test_vectors) ** 2, axis=-1) / (2 * sigma**2)
|
||||||
)
|
)
|
||||||
raise Exception("Phi not fitted.")
|
raise Exception("Phi not fitted.")
|
||||||
|
|
||||||
def _find_alpha(self, alpha_0, X_train, X_test, num_parameters, sigma, epsilon):
|
def _find_alpha(self, alpha_0, X_train, X_test, num_parameters, sigma, epsilon):
|
||||||
A = np.zeros(shape=(X_test.shape[0], num_parameters))
|
A = np.zeros(shape=(X_test.shape[0], num_parameters))
|
||||||
b = np.zeros(shape=(num_parameters, 1))
|
b = np.zeros(shape=(num_parameters, 1))
|
||||||
|
|
||||||
A = self._phi(X_test, sigma)
|
A = self._phi(X_test, sigma)
|
||||||
b = self._phi(X_train, sigma).sum(axis=0) / X_train.shape[0]
|
b = self._phi(X_train, sigma).sum(axis=0) / X_train.shape[0]
|
||||||
b = b.reshape((num_parameters, 1))
|
b = b.reshape((num_parameters, 1))
|
||||||
|
|
||||||
out = alpha_0.copy()
|
out = alpha_0.copy()
|
||||||
for k in range(self.max_iter):
|
for k in range(self.max_iter):
|
||||||
mat = np.dot(A, out)
|
mat = np.dot(A, out)
|
||||||
mat += 0.000000001
|
mat += 0.000000001
|
||||||
out += epsilon * np.dot(np.transpose(A), 1.0 / mat)
|
out += epsilon * np.dot(np.transpose(A), 1.0 / mat)
|
||||||
out += b * (
|
out += b * (
|
||||||
((1 - np.dot(np.transpose(b), out)) / np.dot(np.transpose(b), b))
|
((1 - np.dot(np.transpose(b), out)) / np.dot(np.transpose(b), b))
|
||||||
)
|
)
|
||||||
out = np.maximum(0, out)
|
out = np.maximum(0, out)
|
||||||
out /= np.dot(np.transpose(b), out)
|
out /= np.dot(np.transpose(b), out)
|
||||||
|
|
||||||
self._alpha = out
|
self._alpha = out
|
||||||
self._fitted = True
|
self._fitted = True
|
||||||
|
|
||||||
def predict(self, X, sigma=None):
|
def predict(self, X, sigma=None):
|
||||||
"""Equivalent of w(X) from the original paper."""
|
"""Equivalent of w(X) from the original paper."""
|
||||||
|
|
||||||
X = self._reshape_X(X)
|
X = self._reshape_X(X)
|
||||||
if not self._fitted:
|
if not self._fitted:
|
||||||
raise Exception("Not fitted!")
|
raise Exception("Not fitted!")
|
||||||
return np.dot(self._phi(X, sigma=sigma), self._alpha).reshape((X.shape[0],))
|
return np.dot(self._phi(X, sigma=sigma), self._alpha).reshape((X.shape[0],))
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,14 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn import clone
|
from sklearn import clone
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
|
|
||||||
|
|
||||||
def clone_fit(c_model: BaseEstimator, data, labels):
|
def clone_fit(c_model: BaseEstimator, data, labels):
|
||||||
c_model2 = clone(c_model)
|
c_model2 = clone(c_model)
|
||||||
c_model2.fit(data, labels)
|
c_model2.fit(data, labels)
|
||||||
return c_model2
|
return c_model2
|
||||||
|
|
||||||
def get_score(pred1, pred2, labels):
|
def get_score(pred1, pred2, labels):
|
||||||
return np.mean((pred1 == labels).astype(int) - (pred2 == labels).astype(int))
|
return np.mean((pred1 == labels).astype(int) - (pred2 == labels).astype(int))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
464
conf.yaml
464
conf.yaml
|
|
@ -1,233 +1,233 @@
|
||||||
debug_conf: &debug_conf
|
debug_conf: &debug_conf
|
||||||
global:
|
global:
|
||||||
METRICS:
|
METRICS:
|
||||||
- acc
|
- acc
|
||||||
DATASET_N_PREVS: 5
|
DATASET_N_PREVS: 5
|
||||||
DATASET_PREVS:
|
DATASET_PREVS:
|
||||||
# - 0.2
|
# - 0.2
|
||||||
- 0.5
|
- 0.5
|
||||||
# - 0.8
|
# - 0.8
|
||||||
|
|
||||||
confs:
|
confs:
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: CCAT
|
DATASET_TARGET: CCAT
|
||||||
|
|
||||||
plot_confs:
|
plot_confs:
|
||||||
debug:
|
debug:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- mulmc_sld
|
- mulmc_sld
|
||||||
- atc_mc
|
- atc_mc
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
|
|
||||||
mc_conf: &mc_conf
|
mc_conf: &mc_conf
|
||||||
global:
|
global:
|
||||||
METRICS:
|
METRICS:
|
||||||
- acc
|
- acc
|
||||||
DATASET_N_PREVS: 9
|
DATASET_N_PREVS: 9
|
||||||
DATASET_DIR_UPDATE: true
|
DATASET_DIR_UPDATE: true
|
||||||
|
|
||||||
confs:
|
confs:
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: CCAT
|
DATASET_TARGET: CCAT
|
||||||
# - DATASET_NAME: imdb
|
# - DATASET_NAME: imdb
|
||||||
|
|
||||||
plot_confs:
|
plot_confs:
|
||||||
debug3:
|
debug3:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- binmc_sld
|
- binmc_sld
|
||||||
- mulmc_sld
|
- mulmc_sld
|
||||||
- binne_sld
|
- binne_sld
|
||||||
- mulne_sld
|
- mulne_sld
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- atc_mc
|
- atc_mc
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
|
|
||||||
test_conf: &test_conf
|
test_conf: &test_conf
|
||||||
global:
|
global:
|
||||||
METRICS:
|
METRICS:
|
||||||
- acc
|
- acc
|
||||||
- f1
|
- f1
|
||||||
DATASET_N_PREVS: 9
|
DATASET_N_PREVS: 9
|
||||||
|
|
||||||
confs:
|
confs:
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: CCAT
|
DATASET_TARGET: CCAT
|
||||||
# - DATASET_NAME: imdb
|
# - DATASET_NAME: imdb
|
||||||
|
|
||||||
plot_confs:
|
plot_confs:
|
||||||
gs_vs_gsq:
|
gs_vs_gsq:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- bin_sld
|
- bin_sld
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- bin_sld_gsq
|
- bin_sld_gsq
|
||||||
- mul_sld
|
- mul_sld
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- mul_sld_gsq
|
- mul_sld_gsq
|
||||||
gs_vs_atc:
|
gs_vs_atc:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- bin_sld
|
- bin_sld
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- mul_sld
|
- mul_sld
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- atc_ne
|
- atc_ne
|
||||||
sld_vs_pacc:
|
sld_vs_pacc:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- bin_sld
|
- bin_sld
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- mul_sld
|
- mul_sld
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- bin_pacc
|
- bin_pacc
|
||||||
- bin_pacc_gs
|
- bin_pacc_gs
|
||||||
- mul_pacc
|
- mul_pacc
|
||||||
- mul_pacc_gs
|
- mul_pacc_gs
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- atc_ne
|
- atc_ne
|
||||||
pacc_vs_atc:
|
pacc_vs_atc:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- bin_pacc
|
- bin_pacc
|
||||||
- bin_pacc_gs
|
- bin_pacc_gs
|
||||||
- mul_pacc
|
- mul_pacc
|
||||||
- mul_pacc_gs
|
- mul_pacc_gs
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- atc_ne
|
- atc_ne
|
||||||
|
|
||||||
main_conf: &main_conf
|
main_conf: &main_conf
|
||||||
|
|
||||||
global:
|
global:
|
||||||
METRICS:
|
METRICS:
|
||||||
- acc
|
- acc
|
||||||
- f1
|
- f1
|
||||||
DATASET_N_PREVS: 9
|
DATASET_N_PREVS: 9
|
||||||
DATASET_DIR_UPDATE: true
|
DATASET_DIR_UPDATE: true
|
||||||
|
|
||||||
confs:
|
confs:
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: CCAT
|
DATASET_TARGET: CCAT
|
||||||
- DATASET_NAME: imdb
|
- DATASET_NAME: imdb
|
||||||
confs_next:
|
confs_next:
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: GCAT
|
DATASET_TARGET: GCAT
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: MCAT
|
DATASET_TARGET: MCAT
|
||||||
|
|
||||||
plot_confs:
|
plot_confs:
|
||||||
gs_vs_qgs:
|
gs_vs_qgs:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- mul_sld_gsq
|
- mul_sld_gsq
|
||||||
- bin_sld_gsq
|
- bin_sld_gsq
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- atc_ne
|
- atc_ne
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
plot_confs_completed:
|
plot_confs_completed:
|
||||||
max_conf_vs_atc_pacc:
|
max_conf_vs_atc_pacc:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- bin_pacc
|
- bin_pacc
|
||||||
- binmc_pacc
|
- binmc_pacc
|
||||||
- mul_pacc
|
- mul_pacc
|
||||||
- mulmc_pacc
|
- mulmc_pacc
|
||||||
- atc_mc
|
- atc_mc
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
max_conf_vs_entropy_pacc:
|
max_conf_vs_entropy_pacc:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- binmc_pacc
|
- binmc_pacc
|
||||||
- binne_pacc
|
- binne_pacc
|
||||||
- mulmc_pacc
|
- mulmc_pacc
|
||||||
- mulne_pacc
|
- mulne_pacc
|
||||||
- atc_mc
|
- atc_mc
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
gs_vs_atc:
|
gs_vs_atc:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- mul_pacc_gs
|
- mul_pacc_gs
|
||||||
- bin_pacc_gs
|
- bin_pacc_gs
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- atc_ne
|
- atc_ne
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
gs_vs_all:
|
gs_vs_all:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- mul_pacc_gs
|
- mul_pacc_gs
|
||||||
- bin_pacc_gs
|
- bin_pacc_gs
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- doc_feat
|
- doc_feat
|
||||||
- kfcv
|
- kfcv
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
gs_vs_qgs:
|
gs_vs_qgs:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- mul_sld_gsq
|
- mul_sld_gsq
|
||||||
- bin_sld_gsq
|
- bin_sld_gsq
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- atc_ne
|
- atc_ne
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
cc_vs_other:
|
cc_vs_other:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- mul_cc
|
- mul_cc
|
||||||
- bin_cc
|
- bin_cc
|
||||||
- mul_sld
|
- mul_sld
|
||||||
- bin_sld
|
- bin_sld
|
||||||
- mul_pacc
|
- mul_pacc
|
||||||
- bin_pacc
|
- bin_pacc
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
max_conf_vs_atc:
|
max_conf_vs_atc:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- bin_sld
|
- bin_sld
|
||||||
- binmc_sld
|
- binmc_sld
|
||||||
- mul_sld
|
- mul_sld
|
||||||
- mulmc_sld
|
- mulmc_sld
|
||||||
- atc_mc
|
- atc_mc
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
max_conf_vs_entropy:
|
max_conf_vs_entropy:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- binmc_sld
|
- binmc_sld
|
||||||
- binne_sld
|
- binne_sld
|
||||||
- mulmc_sld
|
- mulmc_sld
|
||||||
- mulne_sld
|
- mulne_sld
|
||||||
- atc_mc
|
- atc_mc
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
sld_vs_pacc:
|
sld_vs_pacc:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- bin_sld
|
- bin_sld
|
||||||
- mul_sld
|
- mul_sld
|
||||||
- bin_pacc
|
- bin_pacc
|
||||||
- mul_pacc
|
- mul_pacc
|
||||||
- atc_mc
|
- atc_mc
|
||||||
PLOT_STDEV: true
|
PLOT_STDEV: true
|
||||||
plot_confs_other:
|
plot_confs_other:
|
||||||
best_vs_atc:
|
best_vs_atc:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- mul_sld_bcts
|
- mul_sld_bcts
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- bin_sld_bcts
|
- bin_sld_bcts
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- atc_ne
|
- atc_ne
|
||||||
all_vs_atc:
|
all_vs_atc:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- bin_sld
|
- bin_sld
|
||||||
- bin_sld_bcts
|
- bin_sld_bcts
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- mul_sld
|
- mul_sld
|
||||||
- mul_sld_bcts
|
- mul_sld_bcts
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- atc_ne
|
- atc_ne
|
||||||
best_vs_all:
|
best_vs_all:
|
||||||
PLOT_ESTIMATORS:
|
PLOT_ESTIMATORS:
|
||||||
- bin_sld_bcts
|
- bin_sld_bcts
|
||||||
- bin_sld_gs
|
- bin_sld_gs
|
||||||
- mul_sld_bcts
|
- mul_sld_bcts
|
||||||
- mul_sld_gs
|
- mul_sld_gs
|
||||||
- kfcv
|
- kfcv
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- atc_ne
|
- atc_ne
|
||||||
- doc_feat
|
- doc_feat
|
||||||
|
|
||||||
exec: *main_conf
|
exec: *main_conf
|
||||||
890
out_imdb.md
890
out_imdb.md
|
|
@ -1,445 +1,445 @@
|
||||||
|
|
||||||
<div>target: default</div>
|
<div>target: default</div>
|
||||||
<div>train: [0.5 0.5]</div>
|
<div>train: [0.5 0.5]</div>
|
||||||
<div>validation: [0.5 0.5]</div>
|
<div>validation: [0.5 0.5]</div>
|
||||||
<div>evaluate_binary: 277.300s</div>
|
<div>evaluate_binary: 277.300s</div>
|
||||||
<div>evaluate_multiclass: 139.986s</div>
|
<div>evaluate_multiclass: 139.986s</div>
|
||||||
<div>kfcv: 98.625s</div>
|
<div>kfcv: 98.625s</div>
|
||||||
<div>atc_mc: 93.304s</div>
|
<div>atc_mc: 93.304s</div>
|
||||||
<div>atc_ne: 91.201s</div>
|
<div>atc_ne: 91.201s</div>
|
||||||
<div>doc_feat: 29.930s</div>
|
<div>doc_feat: 29.930s</div>
|
||||||
<div>rca_score: 1018.341s</div>
|
<div>rca_score: 1018.341s</div>
|
||||||
<div>rca_star_score: 1013.733s</div>
|
<div>rca_star_score: 1013.733s</div>
|
||||||
<div>tot: 1054.413s</div>
|
<div>tot: 1054.413s</div>
|
||||||
|
|
||||||
<table border="1" class="dataframe">
|
<table border="1" class="dataframe">
|
||||||
<thead>
|
<thead>
|
||||||
<tr style="text-align: right;">
|
<tr style="text-align: right;">
|
||||||
<th></th>
|
<th></th>
|
||||||
<th>bin</th>
|
<th>bin</th>
|
||||||
<th>mul</th>
|
<th>mul</th>
|
||||||
<th>kfcv</th>
|
<th>kfcv</th>
|
||||||
<th>atc_mc</th>
|
<th>atc_mc</th>
|
||||||
<th>atc_ne</th>
|
<th>atc_ne</th>
|
||||||
<th>doc_feat</th>
|
<th>doc_feat</th>
|
||||||
<th>rca</th>
|
<th>rca</th>
|
||||||
<th>rca_star</th>
|
<th>rca_star</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.0, 1.0)</th>
|
<th>(0.0, 1.0)</th>
|
||||||
<td>0.0154</td>
|
<td>0.0154</td>
|
||||||
<td>0.0177</td>
|
<td>0.0177</td>
|
||||||
<td>0.0249</td>
|
<td>0.0249</td>
|
||||||
<td>0.0291</td>
|
<td>0.0291</td>
|
||||||
<td>0.0291</td>
|
<td>0.0291</td>
|
||||||
<td>0.0248</td>
|
<td>0.0248</td>
|
||||||
<td>0.2705</td>
|
<td>0.2705</td>
|
||||||
<td>0.2413</td>
|
<td>0.2413</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.05, 0.95)</th>
|
<th>(0.05, 0.95)</th>
|
||||||
<td>0.0309</td>
|
<td>0.0309</td>
|
||||||
<td>0.0284</td>
|
<td>0.0284</td>
|
||||||
<td>0.0252</td>
|
<td>0.0252</td>
|
||||||
<td>0.0300</td>
|
<td>0.0300</td>
|
||||||
<td>0.0300</td>
|
<td>0.0300</td>
|
||||||
<td>0.0247</td>
|
<td>0.0247</td>
|
||||||
<td>0.2796</td>
|
<td>0.2796</td>
|
||||||
<td>0.2504</td>
|
<td>0.2504</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.1, 0.9)</th>
|
<th>(0.1, 0.9)</th>
|
||||||
<td>0.0309</td>
|
<td>0.0309</td>
|
||||||
<td>0.0302</td>
|
<td>0.0302</td>
|
||||||
<td>0.0251</td>
|
<td>0.0251</td>
|
||||||
<td>0.0279</td>
|
<td>0.0279</td>
|
||||||
<td>0.0279</td>
|
<td>0.0279</td>
|
||||||
<td>0.0250</td>
|
<td>0.0250</td>
|
||||||
<td>0.2722</td>
|
<td>0.2722</td>
|
||||||
<td>0.2430</td>
|
<td>0.2430</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.15, 0.85)</th>
|
<th>(0.15, 0.85)</th>
|
||||||
<td>0.0310</td>
|
<td>0.0310</td>
|
||||||
<td>0.0339</td>
|
<td>0.0339</td>
|
||||||
<td>0.0245</td>
|
<td>0.0245</td>
|
||||||
<td>0.0269</td>
|
<td>0.0269</td>
|
||||||
<td>0.0269</td>
|
<td>0.0269</td>
|
||||||
<td>0.0244</td>
|
<td>0.0244</td>
|
||||||
<td>0.2684</td>
|
<td>0.2684</td>
|
||||||
<td>0.2392</td>
|
<td>0.2392</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.2, 0.8)</th>
|
<th>(0.2, 0.8)</th>
|
||||||
<td>0.0411</td>
|
<td>0.0411</td>
|
||||||
<td>0.0407</td>
|
<td>0.0407</td>
|
||||||
<td>0.0259</td>
|
<td>0.0259</td>
|
||||||
<td>0.0292</td>
|
<td>0.0292</td>
|
||||||
<td>0.0292</td>
|
<td>0.0292</td>
|
||||||
<td>0.0257</td>
|
<td>0.0257</td>
|
||||||
<td>0.2724</td>
|
<td>0.2724</td>
|
||||||
<td>0.2432</td>
|
<td>0.2432</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.25, 0.75)</th>
|
<th>(0.25, 0.75)</th>
|
||||||
<td>0.0381</td>
|
<td>0.0381</td>
|
||||||
<td>0.0376</td>
|
<td>0.0376</td>
|
||||||
<td>0.0262</td>
|
<td>0.0262</td>
|
||||||
<td>0.0319</td>
|
<td>0.0319</td>
|
||||||
<td>0.0319</td>
|
<td>0.0319</td>
|
||||||
<td>0.0259</td>
|
<td>0.0259</td>
|
||||||
<td>0.2701</td>
|
<td>0.2701</td>
|
||||||
<td>0.2409</td>
|
<td>0.2409</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.3, 0.7)</th>
|
<th>(0.3, 0.7)</th>
|
||||||
<td>0.0442</td>
|
<td>0.0442</td>
|
||||||
<td>0.0452</td>
|
<td>0.0452</td>
|
||||||
<td>0.0254</td>
|
<td>0.0254</td>
|
||||||
<td>0.0273</td>
|
<td>0.0273</td>
|
||||||
<td>0.0273</td>
|
<td>0.0273</td>
|
||||||
<td>0.0256</td>
|
<td>0.0256</td>
|
||||||
<td>0.2650</td>
|
<td>0.2650</td>
|
||||||
<td>0.2358</td>
|
<td>0.2358</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.35, 0.65)</th>
|
<th>(0.35, 0.65)</th>
|
||||||
<td>0.0480</td>
|
<td>0.0480</td>
|
||||||
<td>0.0498</td>
|
<td>0.0498</td>
|
||||||
<td>0.0236</td>
|
<td>0.0236</td>
|
||||||
<td>0.0257</td>
|
<td>0.0257</td>
|
||||||
<td>0.0257</td>
|
<td>0.0257</td>
|
||||||
<td>0.0235</td>
|
<td>0.0235</td>
|
||||||
<td>0.2640</td>
|
<td>0.2640</td>
|
||||||
<td>0.2347</td>
|
<td>0.2347</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.4, 0.6)</th>
|
<th>(0.4, 0.6)</th>
|
||||||
<td>0.0401</td>
|
<td>0.0401</td>
|
||||||
<td>0.0431</td>
|
<td>0.0431</td>
|
||||||
<td>0.0222</td>
|
<td>0.0222</td>
|
||||||
<td>0.0296</td>
|
<td>0.0296</td>
|
||||||
<td>0.0296</td>
|
<td>0.0296</td>
|
||||||
<td>0.0220</td>
|
<td>0.0220</td>
|
||||||
<td>0.2654</td>
|
<td>0.2654</td>
|
||||||
<td>0.2361</td>
|
<td>0.2361</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.45, 0.55)</th>
|
<th>(0.45, 0.55)</th>
|
||||||
<td>0.0551</td>
|
<td>0.0551</td>
|
||||||
<td>0.0558</td>
|
<td>0.0558</td>
|
||||||
<td>0.0243</td>
|
<td>0.0243</td>
|
||||||
<td>0.0295</td>
|
<td>0.0295</td>
|
||||||
<td>0.0295</td>
|
<td>0.0295</td>
|
||||||
<td>0.0246</td>
|
<td>0.0246</td>
|
||||||
<td>0.1838</td>
|
<td>0.1838</td>
|
||||||
<td>0.1551</td>
|
<td>0.1551</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.5, 0.5)</th>
|
<th>(0.5, 0.5)</th>
|
||||||
<td>0.0499</td>
|
<td>0.0499</td>
|
||||||
<td>0.0513</td>
|
<td>0.0513</td>
|
||||||
<td>0.0308</td>
|
<td>0.0308</td>
|
||||||
<td>0.0319</td>
|
<td>0.0319</td>
|
||||||
<td>0.0319</td>
|
<td>0.0319</td>
|
||||||
<td>0.0309</td>
|
<td>0.0309</td>
|
||||||
<td>0.1472</td>
|
<td>0.1472</td>
|
||||||
<td>0.1202</td>
|
<td>0.1202</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.55, 0.45)</th>
|
<th>(0.55, 0.45)</th>
|
||||||
<td>0.0538</td>
|
<td>0.0538</td>
|
||||||
<td>0.0542</td>
|
<td>0.0542</td>
|
||||||
<td>0.0278</td>
|
<td>0.0278</td>
|
||||||
<td>0.0329</td>
|
<td>0.0329</td>
|
||||||
<td>0.0329</td>
|
<td>0.0329</td>
|
||||||
<td>0.0280</td>
|
<td>0.0280</td>
|
||||||
<td>0.1717</td>
|
<td>0.1717</td>
|
||||||
<td>0.1459</td>
|
<td>0.1459</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.6, 0.4)</th>
|
<th>(0.6, 0.4)</th>
|
||||||
<td>0.0476</td>
|
<td>0.0476</td>
|
||||||
<td>0.0484</td>
|
<td>0.0484</td>
|
||||||
<td>0.0258</td>
|
<td>0.0258</td>
|
||||||
<td>0.0298</td>
|
<td>0.0298</td>
|
||||||
<td>0.0298</td>
|
<td>0.0298</td>
|
||||||
<td>0.0259</td>
|
<td>0.0259</td>
|
||||||
<td>0.2434</td>
|
<td>0.2434</td>
|
||||||
<td>0.2147</td>
|
<td>0.2147</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.65, 0.35)</th>
|
<th>(0.65, 0.35)</th>
|
||||||
<td>0.0447</td>
|
<td>0.0447</td>
|
||||||
<td>0.0474</td>
|
<td>0.0474</td>
|
||||||
<td>0.0287</td>
|
<td>0.0287</td>
|
||||||
<td>0.0332</td>
|
<td>0.0332</td>
|
||||||
<td>0.0332</td>
|
<td>0.0332</td>
|
||||||
<td>0.0288</td>
|
<td>0.0288</td>
|
||||||
<td>0.2632</td>
|
<td>0.2632</td>
|
||||||
<td>0.2340</td>
|
<td>0.2340</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.7, 0.3)</th>
|
<th>(0.7, 0.3)</th>
|
||||||
<td>0.0388</td>
|
<td>0.0388</td>
|
||||||
<td>0.0397</td>
|
<td>0.0397</td>
|
||||||
<td>0.0295</td>
|
<td>0.0295</td>
|
||||||
<td>0.0328</td>
|
<td>0.0328</td>
|
||||||
<td>0.0328</td>
|
<td>0.0328</td>
|
||||||
<td>0.0296</td>
|
<td>0.0296</td>
|
||||||
<td>0.2659</td>
|
<td>0.2659</td>
|
||||||
<td>0.2367</td>
|
<td>0.2367</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.75, 0.25)</th>
|
<th>(0.75, 0.25)</th>
|
||||||
<td>0.0336</td>
|
<td>0.0336</td>
|
||||||
<td>0.0399</td>
|
<td>0.0399</td>
|
||||||
<td>0.0241</td>
|
<td>0.0241</td>
|
||||||
<td>0.0293</td>
|
<td>0.0293</td>
|
||||||
<td>0.0293</td>
|
<td>0.0293</td>
|
||||||
<td>0.0244</td>
|
<td>0.0244</td>
|
||||||
<td>0.2612</td>
|
<td>0.2612</td>
|
||||||
<td>0.2320</td>
|
<td>0.2320</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.8, 0.2)</th>
|
<th>(0.8, 0.2)</th>
|
||||||
<td>0.0407</td>
|
<td>0.0407</td>
|
||||||
<td>0.0447</td>
|
<td>0.0447</td>
|
||||||
<td>0.0266</td>
|
<td>0.0266</td>
|
||||||
<td>0.0303</td>
|
<td>0.0303</td>
|
||||||
<td>0.0303</td>
|
<td>0.0303</td>
|
||||||
<td>0.0271</td>
|
<td>0.0271</td>
|
||||||
<td>0.2601</td>
|
<td>0.2601</td>
|
||||||
<td>0.2309</td>
|
<td>0.2309</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.85, 0.15)</th>
|
<th>(0.85, 0.15)</th>
|
||||||
<td>0.0383</td>
|
<td>0.0383</td>
|
||||||
<td>0.0423</td>
|
<td>0.0423</td>
|
||||||
<td>0.0219</td>
|
<td>0.0219</td>
|
||||||
<td>0.0278</td>
|
<td>0.0278</td>
|
||||||
<td>0.0278</td>
|
<td>0.0278</td>
|
||||||
<td>0.0220</td>
|
<td>0.0220</td>
|
||||||
<td>0.2670</td>
|
<td>0.2670</td>
|
||||||
<td>0.2378</td>
|
<td>0.2378</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.9, 0.1)</th>
|
<th>(0.9, 0.1)</th>
|
||||||
<td>0.0351</td>
|
<td>0.0351</td>
|
||||||
<td>0.0387</td>
|
<td>0.0387</td>
|
||||||
<td>0.0244</td>
|
<td>0.0244</td>
|
||||||
<td>0.0275</td>
|
<td>0.0275</td>
|
||||||
<td>0.0275</td>
|
<td>0.0275</td>
|
||||||
<td>0.0245</td>
|
<td>0.0245</td>
|
||||||
<td>0.2618</td>
|
<td>0.2618</td>
|
||||||
<td>0.2326</td>
|
<td>0.2326</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.95, 0.05)</th>
|
<th>(0.95, 0.05)</th>
|
||||||
<td>0.0238</td>
|
<td>0.0238</td>
|
||||||
<td>0.0263</td>
|
<td>0.0263</td>
|
||||||
<td>0.0269</td>
|
<td>0.0269</td>
|
||||||
<td>0.0296</td>
|
<td>0.0296</td>
|
||||||
<td>0.0296</td>
|
<td>0.0296</td>
|
||||||
<td>0.0272</td>
|
<td>0.0272</td>
|
||||||
<td>0.2602</td>
|
<td>0.2602</td>
|
||||||
<td>0.2310</td>
|
<td>0.2310</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(1.0, 0.0)</th>
|
<th>(1.0, 0.0)</th>
|
||||||
<td>0.0118</td>
|
<td>0.0118</td>
|
||||||
<td>0.0202</td>
|
<td>0.0202</td>
|
||||||
<td>0.0241</td>
|
<td>0.0241</td>
|
||||||
<td>0.0279</td>
|
<td>0.0279</td>
|
||||||
<td>0.0279</td>
|
<td>0.0279</td>
|
||||||
<td>0.0244</td>
|
<td>0.0244</td>
|
||||||
<td>0.2571</td>
|
<td>0.2571</td>
|
||||||
<td>0.2279</td>
|
<td>0.2279</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
<table border="1" class="dataframe">
|
<table border="1" class="dataframe">
|
||||||
<thead>
|
<thead>
|
||||||
<tr style="text-align: right;">
|
<tr style="text-align: right;">
|
||||||
<th></th>
|
<th></th>
|
||||||
<th>bin</th>
|
<th>bin</th>
|
||||||
<th>mul</th>
|
<th>mul</th>
|
||||||
<th>kfcv</th>
|
<th>kfcv</th>
|
||||||
<th>atc_mc</th>
|
<th>atc_mc</th>
|
||||||
<th>atc_ne</th>
|
<th>atc_ne</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.0, 1.0)</th>
|
<th>(0.0, 1.0)</th>
|
||||||
<td>0.0088</td>
|
<td>0.0088</td>
|
||||||
<td>0.0100</td>
|
<td>0.0100</td>
|
||||||
<td>0.0580</td>
|
<td>0.0580</td>
|
||||||
<td>0.0183</td>
|
<td>0.0183</td>
|
||||||
<td>0.0183</td>
|
<td>0.0183</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.05, 0.95)</th>
|
<th>(0.05, 0.95)</th>
|
||||||
<td>0.0175</td>
|
<td>0.0175</td>
|
||||||
<td>0.0159</td>
|
<td>0.0159</td>
|
||||||
<td>0.0605</td>
|
<td>0.0605</td>
|
||||||
<td>0.0193</td>
|
<td>0.0193</td>
|
||||||
<td>0.0193</td>
|
<td>0.0193</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.1, 0.9)</th>
|
<th>(0.1, 0.9)</th>
|
||||||
<td>0.0184</td>
|
<td>0.0184</td>
|
||||||
<td>0.0176</td>
|
<td>0.0176</td>
|
||||||
<td>0.0532</td>
|
<td>0.0532</td>
|
||||||
<td>0.0189</td>
|
<td>0.0189</td>
|
||||||
<td>0.0189</td>
|
<td>0.0189</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.15, 0.85)</th>
|
<th>(0.15, 0.85)</th>
|
||||||
<td>0.0188</td>
|
<td>0.0188</td>
|
||||||
<td>0.0204</td>
|
<td>0.0204</td>
|
||||||
<td>0.0475</td>
|
<td>0.0475</td>
|
||||||
<td>0.0180</td>
|
<td>0.0180</td>
|
||||||
<td>0.0180</td>
|
<td>0.0180</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.2, 0.8)</th>
|
<th>(0.2, 0.8)</th>
|
||||||
<td>0.0269</td>
|
<td>0.0269</td>
|
||||||
<td>0.0266</td>
|
<td>0.0266</td>
|
||||||
<td>0.0455</td>
|
<td>0.0455</td>
|
||||||
<td>0.0206</td>
|
<td>0.0206</td>
|
||||||
<td>0.0206</td>
|
<td>0.0206</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.25, 0.75)</th>
|
<th>(0.25, 0.75)</th>
|
||||||
<td>0.0265</td>
|
<td>0.0265</td>
|
||||||
<td>0.0261</td>
|
<td>0.0261</td>
|
||||||
<td>0.0401</td>
|
<td>0.0401</td>
|
||||||
<td>0.0242</td>
|
<td>0.0242</td>
|
||||||
<td>0.0242</td>
|
<td>0.0242</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.3, 0.7)</th>
|
<th>(0.3, 0.7)</th>
|
||||||
<td>0.0328</td>
|
<td>0.0328</td>
|
||||||
<td>0.0336</td>
|
<td>0.0336</td>
|
||||||
<td>0.0331</td>
|
<td>0.0331</td>
|
||||||
<td>0.0208</td>
|
<td>0.0208</td>
|
||||||
<td>0.0208</td>
|
<td>0.0208</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.35, 0.65)</th>
|
<th>(0.35, 0.65)</th>
|
||||||
<td>0.0386</td>
|
<td>0.0386</td>
|
||||||
<td>0.0394</td>
|
<td>0.0394</td>
|
||||||
<td>0.0307</td>
|
<td>0.0307</td>
|
||||||
<td>0.0211</td>
|
<td>0.0211</td>
|
||||||
<td>0.0211</td>
|
<td>0.0211</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.4, 0.6)</th>
|
<th>(0.4, 0.6)</th>
|
||||||
<td>0.0343</td>
|
<td>0.0343</td>
|
||||||
<td>0.0371</td>
|
<td>0.0371</td>
|
||||||
<td>0.0273</td>
|
<td>0.0273</td>
|
||||||
<td>0.0265</td>
|
<td>0.0265</td>
|
||||||
<td>0.0265</td>
|
<td>0.0265</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.45, 0.55)</th>
|
<th>(0.45, 0.55)</th>
|
||||||
<td>0.0511</td>
|
<td>0.0511</td>
|
||||||
<td>0.0512</td>
|
<td>0.0512</td>
|
||||||
<td>0.0231</td>
|
<td>0.0231</td>
|
||||||
<td>0.0275</td>
|
<td>0.0275</td>
|
||||||
<td>0.0275</td>
|
<td>0.0275</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.5, 0.5)</th>
|
<th>(0.5, 0.5)</th>
|
||||||
<td>0.0517</td>
|
<td>0.0517</td>
|
||||||
<td>0.0529</td>
|
<td>0.0529</td>
|
||||||
<td>0.0306</td>
|
<td>0.0306</td>
|
||||||
<td>0.0319</td>
|
<td>0.0319</td>
|
||||||
<td>0.0319</td>
|
<td>0.0319</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.55, 0.45)</th>
|
<th>(0.55, 0.45)</th>
|
||||||
<td>0.0584</td>
|
<td>0.0584</td>
|
||||||
<td>0.0583</td>
|
<td>0.0583</td>
|
||||||
<td>0.0308</td>
|
<td>0.0308</td>
|
||||||
<td>0.0354</td>
|
<td>0.0354</td>
|
||||||
<td>0.0354</td>
|
<td>0.0354</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.6, 0.4)</th>
|
<th>(0.6, 0.4)</th>
|
||||||
<td>0.0590</td>
|
<td>0.0590</td>
|
||||||
<td>0.0599</td>
|
<td>0.0599</td>
|
||||||
<td>0.0363</td>
|
<td>0.0363</td>
|
||||||
<td>0.0357</td>
|
<td>0.0357</td>
|
||||||
<td>0.0357</td>
|
<td>0.0357</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.65, 0.35)</th>
|
<th>(0.65, 0.35)</th>
|
||||||
<td>0.0635</td>
|
<td>0.0635</td>
|
||||||
<td>0.0662</td>
|
<td>0.0662</td>
|
||||||
<td>0.0506</td>
|
<td>0.0506</td>
|
||||||
<td>0.0440</td>
|
<td>0.0440</td>
|
||||||
<td>0.0440</td>
|
<td>0.0440</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.7, 0.3)</th>
|
<th>(0.7, 0.3)</th>
|
||||||
<td>0.0596</td>
|
<td>0.0596</td>
|
||||||
<td>0.0638</td>
|
<td>0.0638</td>
|
||||||
<td>0.0654</td>
|
<td>0.0654</td>
|
||||||
<td>0.0457</td>
|
<td>0.0457</td>
|
||||||
<td>0.0457</td>
|
<td>0.0457</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.75, 0.25)</th>
|
<th>(0.75, 0.25)</th>
|
||||||
<td>0.0627</td>
|
<td>0.0627</td>
|
||||||
<td>0.0744</td>
|
<td>0.0744</td>
|
||||||
<td>0.0964</td>
|
<td>0.0964</td>
|
||||||
<td>0.0461</td>
|
<td>0.0461</td>
|
||||||
<td>0.0461</td>
|
<td>0.0461</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.8, 0.2)</th>
|
<th>(0.8, 0.2)</th>
|
||||||
<td>0.0909</td>
|
<td>0.0909</td>
|
||||||
<td>0.0999</td>
|
<td>0.0999</td>
|
||||||
<td>0.1400</td>
|
<td>0.1400</td>
|
||||||
<td>0.0629</td>
|
<td>0.0629</td>
|
||||||
<td>0.0629</td>
|
<td>0.0629</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.85, 0.15)</th>
|
<th>(0.85, 0.15)</th>
|
||||||
<td>0.1052</td>
|
<td>0.1052</td>
|
||||||
<td>0.1126</td>
|
<td>0.1126</td>
|
||||||
<td>0.1829</td>
|
<td>0.1829</td>
|
||||||
<td>0.0727</td>
|
<td>0.0727</td>
|
||||||
<td>0.0727</td>
|
<td>0.0727</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.9, 0.1)</th>
|
<th>(0.9, 0.1)</th>
|
||||||
<td>0.1377</td>
|
<td>0.1377</td>
|
||||||
<td>0.1481</td>
|
<td>0.1481</td>
|
||||||
<td>0.2839</td>
|
<td>0.2839</td>
|
||||||
<td>0.1215</td>
|
<td>0.1215</td>
|
||||||
<td>0.1215</td>
|
<td>0.1215</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.95, 0.05)</th>
|
<th>(0.95, 0.05)</th>
|
||||||
<td>0.1305</td>
|
<td>0.1305</td>
|
||||||
<td>0.1450</td>
|
<td>0.1450</td>
|
||||||
<td>0.4592</td>
|
<td>0.4592</td>
|
||||||
<td>0.2037</td>
|
<td>0.2037</td>
|
||||||
<td>0.2037</td>
|
<td>0.2037</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(1.0, 0.0)</th>
|
<th>(1.0, 0.0)</th>
|
||||||
<td>0.1092</td>
|
<td>0.1092</td>
|
||||||
<td>0.1387</td>
|
<td>0.1387</td>
|
||||||
<td>0.8818</td>
|
<td>0.8818</td>
|
||||||
<td>0.5267</td>
|
<td>0.5267</td>
|
||||||
<td>0.5267</td>
|
<td>0.5267</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
|
||||||
34710
out_rcv1.md
34710
out_rcv1.md
File diff suppressed because it is too large
Load Diff
890
out_spambase.md
890
out_spambase.md
|
|
@ -1,445 +1,445 @@
|
||||||
|
|
||||||
<div>target: default</div>
|
<div>target: default</div>
|
||||||
<div>train: [0.60621118 0.39378882]</div>
|
<div>train: [0.60621118 0.39378882]</div>
|
||||||
<div>validation: [0.60559006 0.39440994]</div>
|
<div>validation: [0.60559006 0.39440994]</div>
|
||||||
<div>evaluate_binary: 31.883s</div>
|
<div>evaluate_binary: 31.883s</div>
|
||||||
<div>evaluate_multiclass: 24.748s</div>
|
<div>evaluate_multiclass: 24.748s</div>
|
||||||
<div>kfcv: 23.957s</div>
|
<div>kfcv: 23.957s</div>
|
||||||
<div>atc_mc: 36.062s</div>
|
<div>atc_mc: 36.062s</div>
|
||||||
<div>atc_ne: 37.123s</div>
|
<div>atc_ne: 37.123s</div>
|
||||||
<div>doc_feat: 7.063s</div>
|
<div>doc_feat: 7.063s</div>
|
||||||
<div>rca_score: 148.420s</div>
|
<div>rca_score: 148.420s</div>
|
||||||
<div>rca_star_score: 145.690s</div>
|
<div>rca_star_score: 145.690s</div>
|
||||||
<div>tot: 149.118s</div>
|
<div>tot: 149.118s</div>
|
||||||
|
|
||||||
<table border="1" class="dataframe">
|
<table border="1" class="dataframe">
|
||||||
<thead>
|
<thead>
|
||||||
<tr style="text-align: right;">
|
<tr style="text-align: right;">
|
||||||
<th></th>
|
<th></th>
|
||||||
<th>bin</th>
|
<th>bin</th>
|
||||||
<th>mul</th>
|
<th>mul</th>
|
||||||
<th>kfcv</th>
|
<th>kfcv</th>
|
||||||
<th>atc_mc</th>
|
<th>atc_mc</th>
|
||||||
<th>atc_ne</th>
|
<th>atc_ne</th>
|
||||||
<th>doc_feat</th>
|
<th>doc_feat</th>
|
||||||
<th>rca</th>
|
<th>rca</th>
|
||||||
<th>rca_star</th>
|
<th>rca_star</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.0, 1.0)</th>
|
<th>(0.0, 1.0)</th>
|
||||||
<td>0.0411</td>
|
<td>0.0411</td>
|
||||||
<td>0.0907</td>
|
<td>0.0907</td>
|
||||||
<td>0.0208</td>
|
<td>0.0208</td>
|
||||||
<td>0.0267</td>
|
<td>0.0267</td>
|
||||||
<td>0.0267</td>
|
<td>0.0267</td>
|
||||||
<td>0.0204</td>
|
<td>0.0204</td>
|
||||||
<td>0.1106</td>
|
<td>0.1106</td>
|
||||||
<td>0.1059</td>
|
<td>0.1059</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.05, 0.95)</th>
|
<th>(0.05, 0.95)</th>
|
||||||
<td>0.0392</td>
|
<td>0.0392</td>
|
||||||
<td>0.0897</td>
|
<td>0.0897</td>
|
||||||
<td>0.0216</td>
|
<td>0.0216</td>
|
||||||
<td>0.0266</td>
|
<td>0.0266</td>
|
||||||
<td>0.0266</td>
|
<td>0.0266</td>
|
||||||
<td>0.0211</td>
|
<td>0.0211</td>
|
||||||
<td>0.0523</td>
|
<td>0.0523</td>
|
||||||
<td>0.0510</td>
|
<td>0.0510</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.1, 0.9)</th>
|
<th>(0.1, 0.9)</th>
|
||||||
<td>0.0371</td>
|
<td>0.0371</td>
|
||||||
<td>0.0891</td>
|
<td>0.0891</td>
|
||||||
<td>0.0232</td>
|
<td>0.0232</td>
|
||||||
<td>0.0267</td>
|
<td>0.0267</td>
|
||||||
<td>0.0267</td>
|
<td>0.0267</td>
|
||||||
<td>0.0227</td>
|
<td>0.0227</td>
|
||||||
<td>0.0347</td>
|
<td>0.0347</td>
|
||||||
<td>0.0354</td>
|
<td>0.0354</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.15, 0.85)</th>
|
<th>(0.15, 0.85)</th>
|
||||||
<td>0.0464</td>
|
<td>0.0464</td>
|
||||||
<td>0.0853</td>
|
<td>0.0853</td>
|
||||||
<td>0.0226</td>
|
<td>0.0226</td>
|
||||||
<td>0.0257</td>
|
<td>0.0257</td>
|
||||||
<td>0.0257</td>
|
<td>0.0257</td>
|
||||||
<td>0.0222</td>
|
<td>0.0222</td>
|
||||||
<td>0.0315</td>
|
<td>0.0315</td>
|
||||||
<td>0.0341</td>
|
<td>0.0341</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.2, 0.8)</th>
|
<th>(0.2, 0.8)</th>
|
||||||
<td>0.0414</td>
|
<td>0.0414</td>
|
||||||
<td>0.0757</td>
|
<td>0.0757</td>
|
||||||
<td>0.0202</td>
|
<td>0.0202</td>
|
||||||
<td>0.0249</td>
|
<td>0.0249</td>
|
||||||
<td>0.0249</td>
|
<td>0.0249</td>
|
||||||
<td>0.0200</td>
|
<td>0.0200</td>
|
||||||
<td>0.0280</td>
|
<td>0.0280</td>
|
||||||
<td>0.0302</td>
|
<td>0.0302</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.25, 0.75)</th>
|
<th>(0.25, 0.75)</th>
|
||||||
<td>0.0468</td>
|
<td>0.0468</td>
|
||||||
<td>0.0768</td>
|
<td>0.0768</td>
|
||||||
<td>0.0204</td>
|
<td>0.0204</td>
|
||||||
<td>0.0250</td>
|
<td>0.0250</td>
|
||||||
<td>0.0250</td>
|
<td>0.0250</td>
|
||||||
<td>0.0201</td>
|
<td>0.0201</td>
|
||||||
<td>0.0335</td>
|
<td>0.0335</td>
|
||||||
<td>0.0376</td>
|
<td>0.0376</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.3, 0.7)</th>
|
<th>(0.3, 0.7)</th>
|
||||||
<td>0.0384</td>
|
<td>0.0384</td>
|
||||||
<td>0.0739</td>
|
<td>0.0739</td>
|
||||||
<td>0.0201</td>
|
<td>0.0201</td>
|
||||||
<td>0.0252</td>
|
<td>0.0252</td>
|
||||||
<td>0.0252</td>
|
<td>0.0252</td>
|
||||||
<td>0.0200</td>
|
<td>0.0200</td>
|
||||||
<td>0.0349</td>
|
<td>0.0349</td>
|
||||||
<td>0.0410</td>
|
<td>0.0410</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.35, 0.65)</th>
|
<th>(0.35, 0.65)</th>
|
||||||
<td>0.0386</td>
|
<td>0.0386</td>
|
||||||
<td>0.0715</td>
|
<td>0.0715</td>
|
||||||
<td>0.0198</td>
|
<td>0.0198</td>
|
||||||
<td>0.0239</td>
|
<td>0.0239</td>
|
||||||
<td>0.0239</td>
|
<td>0.0239</td>
|
||||||
<td>0.0196</td>
|
<td>0.0196</td>
|
||||||
<td>0.0376</td>
|
<td>0.0376</td>
|
||||||
<td>0.0448</td>
|
<td>0.0448</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.4, 0.6)</th>
|
<th>(0.4, 0.6)</th>
|
||||||
<td>0.0392</td>
|
<td>0.0392</td>
|
||||||
<td>0.0657</td>
|
<td>0.0657</td>
|
||||||
<td>0.0199</td>
|
<td>0.0199</td>
|
||||||
<td>0.0249</td>
|
<td>0.0249</td>
|
||||||
<td>0.0249</td>
|
<td>0.0249</td>
|
||||||
<td>0.0197</td>
|
<td>0.0197</td>
|
||||||
<td>0.0315</td>
|
<td>0.0315</td>
|
||||||
<td>0.0391</td>
|
<td>0.0391</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.45, 0.55)</th>
|
<th>(0.45, 0.55)</th>
|
||||||
<td>0.0380</td>
|
<td>0.0380</td>
|
||||||
<td>0.0679</td>
|
<td>0.0679</td>
|
||||||
<td>0.0213</td>
|
<td>0.0213</td>
|
||||||
<td>0.0258</td>
|
<td>0.0258</td>
|
||||||
<td>0.0258</td>
|
<td>0.0258</td>
|
||||||
<td>0.0212</td>
|
<td>0.0212</td>
|
||||||
<td>0.0358</td>
|
<td>0.0358</td>
|
||||||
<td>0.0450</td>
|
<td>0.0450</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.5, 0.5)</th>
|
<th>(0.5, 0.5)</th>
|
||||||
<td>0.0400</td>
|
<td>0.0400</td>
|
||||||
<td>0.0670</td>
|
<td>0.0670</td>
|
||||||
<td>0.0218</td>
|
<td>0.0218</td>
|
||||||
<td>0.0228</td>
|
<td>0.0228</td>
|
||||||
<td>0.0228</td>
|
<td>0.0228</td>
|
||||||
<td>0.0217</td>
|
<td>0.0217</td>
|
||||||
<td>0.0441</td>
|
<td>0.0441</td>
|
||||||
<td>0.0550</td>
|
<td>0.0550</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.55, 0.45)</th>
|
<th>(0.55, 0.45)</th>
|
||||||
<td>0.0403</td>
|
<td>0.0403</td>
|
||||||
<td>0.0686</td>
|
<td>0.0686</td>
|
||||||
<td>0.0203</td>
|
<td>0.0203</td>
|
||||||
<td>0.0237</td>
|
<td>0.0237</td>
|
||||||
<td>0.0237</td>
|
<td>0.0237</td>
|
||||||
<td>0.0200</td>
|
<td>0.0200</td>
|
||||||
<td>0.0398</td>
|
<td>0.0398</td>
|
||||||
<td>0.0507</td>
|
<td>0.0507</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.6, 0.4)</th>
|
<th>(0.6, 0.4)</th>
|
||||||
<td>0.0432</td>
|
<td>0.0432</td>
|
||||||
<td>0.0625</td>
|
<td>0.0625</td>
|
||||||
<td>0.0201</td>
|
<td>0.0201</td>
|
||||||
<td>0.0245</td>
|
<td>0.0245</td>
|
||||||
<td>0.0245</td>
|
<td>0.0245</td>
|
||||||
<td>0.0200</td>
|
<td>0.0200</td>
|
||||||
<td>0.0370</td>
|
<td>0.0370</td>
|
||||||
<td>0.0487</td>
|
<td>0.0487</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.65, 0.35)</th>
|
<th>(0.65, 0.35)</th>
|
||||||
<td>0.0384</td>
|
<td>0.0384</td>
|
||||||
<td>0.0620</td>
|
<td>0.0620</td>
|
||||||
<td>0.0195</td>
|
<td>0.0195</td>
|
||||||
<td>0.0236</td>
|
<td>0.0236</td>
|
||||||
<td>0.0236</td>
|
<td>0.0236</td>
|
||||||
<td>0.0195</td>
|
<td>0.0195</td>
|
||||||
<td>0.0356</td>
|
<td>0.0356</td>
|
||||||
<td>0.0460</td>
|
<td>0.0460</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.7, 0.3)</th>
|
<th>(0.7, 0.3)</th>
|
||||||
<td>0.0304</td>
|
<td>0.0304</td>
|
||||||
<td>0.0570</td>
|
<td>0.0570</td>
|
||||||
<td>0.0236</td>
|
<td>0.0236</td>
|
||||||
<td>0.0227</td>
|
<td>0.0227</td>
|
||||||
<td>0.0227</td>
|
<td>0.0227</td>
|
||||||
<td>0.0236</td>
|
<td>0.0236</td>
|
||||||
<td>0.0302</td>
|
<td>0.0302</td>
|
||||||
<td>0.0396</td>
|
<td>0.0396</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.75, 0.25)</th>
|
<th>(0.75, 0.25)</th>
|
||||||
<td>0.0321</td>
|
<td>0.0321</td>
|
||||||
<td>0.0614</td>
|
<td>0.0614</td>
|
||||||
<td>0.0187</td>
|
<td>0.0187</td>
|
||||||
<td>0.0273</td>
|
<td>0.0273</td>
|
||||||
<td>0.0273</td>
|
<td>0.0273</td>
|
||||||
<td>0.0187</td>
|
<td>0.0187</td>
|
||||||
<td>0.0332</td>
|
<td>0.0332</td>
|
||||||
<td>0.0439</td>
|
<td>0.0439</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.8, 0.2)</th>
|
<th>(0.8, 0.2)</th>
|
||||||
<td>0.0300</td>
|
<td>0.0300</td>
|
||||||
<td>0.0555</td>
|
<td>0.0555</td>
|
||||||
<td>0.0221</td>
|
<td>0.0221</td>
|
||||||
<td>0.0230</td>
|
<td>0.0230</td>
|
||||||
<td>0.0230</td>
|
<td>0.0230</td>
|
||||||
<td>0.0222</td>
|
<td>0.0222</td>
|
||||||
<td>0.0287</td>
|
<td>0.0287</td>
|
||||||
<td>0.0340</td>
|
<td>0.0340</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.85, 0.15)</th>
|
<th>(0.85, 0.15)</th>
|
||||||
<td>0.0325</td>
|
<td>0.0325</td>
|
||||||
<td>0.0540</td>
|
<td>0.0540</td>
|
||||||
<td>0.0224</td>
|
<td>0.0224</td>
|
||||||
<td>0.0229</td>
|
<td>0.0229</td>
|
||||||
<td>0.0229</td>
|
<td>0.0229</td>
|
||||||
<td>0.0225</td>
|
<td>0.0225</td>
|
||||||
<td>0.0342</td>
|
<td>0.0342</td>
|
||||||
<td>0.0360</td>
|
<td>0.0360</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.9, 0.1)</th>
|
<th>(0.9, 0.1)</th>
|
||||||
<td>0.0262</td>
|
<td>0.0262</td>
|
||||||
<td>0.0518</td>
|
<td>0.0518</td>
|
||||||
<td>0.0211</td>
|
<td>0.0211</td>
|
||||||
<td>0.0238</td>
|
<td>0.0238</td>
|
||||||
<td>0.0238</td>
|
<td>0.0238</td>
|
||||||
<td>0.0211</td>
|
<td>0.0211</td>
|
||||||
<td>0.0483</td>
|
<td>0.0483</td>
|
||||||
<td>0.0469</td>
|
<td>0.0469</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.95, 0.05)</th>
|
<th>(0.95, 0.05)</th>
|
||||||
<td>0.0243</td>
|
<td>0.0243</td>
|
||||||
<td>0.0576</td>
|
<td>0.0576</td>
|
||||||
<td>0.0197</td>
|
<td>0.0197</td>
|
||||||
<td>0.0240</td>
|
<td>0.0240</td>
|
||||||
<td>0.0240</td>
|
<td>0.0240</td>
|
||||||
<td>0.0196</td>
|
<td>0.0196</td>
|
||||||
<td>0.0806</td>
|
<td>0.0806</td>
|
||||||
<td>0.0746</td>
|
<td>0.0746</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(1.0, 0.0)</th>
|
<th>(1.0, 0.0)</th>
|
||||||
<td>0.0146</td>
|
<td>0.0146</td>
|
||||||
<td>0.0597</td>
|
<td>0.0597</td>
|
||||||
<td>0.0231</td>
|
<td>0.0231</td>
|
||||||
<td>0.0244</td>
|
<td>0.0244</td>
|
||||||
<td>0.0244</td>
|
<td>0.0244</td>
|
||||||
<td>0.0232</td>
|
<td>0.0232</td>
|
||||||
<td>0.1600</td>
|
<td>0.1600</td>
|
||||||
<td>0.1515</td>
|
<td>0.1515</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
<table border="1" class="dataframe">
|
<table border="1" class="dataframe">
|
||||||
<thead>
|
<thead>
|
||||||
<tr style="text-align: right;">
|
<tr style="text-align: right;">
|
||||||
<th></th>
|
<th></th>
|
||||||
<th>bin</th>
|
<th>bin</th>
|
||||||
<th>mul</th>
|
<th>mul</th>
|
||||||
<th>kfcv</th>
|
<th>kfcv</th>
|
||||||
<th>atc_mc</th>
|
<th>atc_mc</th>
|
||||||
<th>atc_ne</th>
|
<th>atc_ne</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.0, 1.0)</th>
|
<th>(0.0, 1.0)</th>
|
||||||
<td>0.0239</td>
|
<td>0.0239</td>
|
||||||
<td>0.0477</td>
|
<td>0.0477</td>
|
||||||
<td>0.0345</td>
|
<td>0.0345</td>
|
||||||
<td>0.0162</td>
|
<td>0.0162</td>
|
||||||
<td>0.0162</td>
|
<td>0.0162</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.05, 0.95)</th>
|
<th>(0.05, 0.95)</th>
|
||||||
<td>0.0235</td>
|
<td>0.0235</td>
|
||||||
<td>0.0496</td>
|
<td>0.0496</td>
|
||||||
<td>0.0320</td>
|
<td>0.0320</td>
|
||||||
<td>0.0169</td>
|
<td>0.0169</td>
|
||||||
<td>0.0169</td>
|
<td>0.0169</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.1, 0.9)</th>
|
<th>(0.1, 0.9)</th>
|
||||||
<td>0.0230</td>
|
<td>0.0230</td>
|
||||||
<td>0.0520</td>
|
<td>0.0520</td>
|
||||||
<td>0.0289</td>
|
<td>0.0289</td>
|
||||||
<td>0.0171</td>
|
<td>0.0171</td>
|
||||||
<td>0.0171</td>
|
<td>0.0171</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.15, 0.85)</th>
|
<th>(0.15, 0.85)</th>
|
||||||
<td>0.0308</td>
|
<td>0.0308</td>
|
||||||
<td>0.0528</td>
|
<td>0.0528</td>
|
||||||
<td>0.0274</td>
|
<td>0.0274</td>
|
||||||
<td>0.0171</td>
|
<td>0.0171</td>
|
||||||
<td>0.0171</td>
|
<td>0.0171</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.2, 0.8)</th>
|
<th>(0.2, 0.8)</th>
|
||||||
<td>0.0286</td>
|
<td>0.0286</td>
|
||||||
<td>0.0490</td>
|
<td>0.0490</td>
|
||||||
<td>0.0291</td>
|
<td>0.0291</td>
|
||||||
<td>0.0186</td>
|
<td>0.0186</td>
|
||||||
<td>0.0186</td>
|
<td>0.0186</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.25, 0.75)</th>
|
<th>(0.25, 0.75)</th>
|
||||||
<td>0.0346</td>
|
<td>0.0346</td>
|
||||||
<td>0.0534</td>
|
<td>0.0534</td>
|
||||||
<td>0.0255</td>
|
<td>0.0255</td>
|
||||||
<td>0.0186</td>
|
<td>0.0186</td>
|
||||||
<td>0.0186</td>
|
<td>0.0186</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.3, 0.7)</th>
|
<th>(0.3, 0.7)</th>
|
||||||
<td>0.0299</td>
|
<td>0.0299</td>
|
||||||
<td>0.0545</td>
|
<td>0.0545</td>
|
||||||
<td>0.0232</td>
|
<td>0.0232</td>
|
||||||
<td>0.0205</td>
|
<td>0.0205</td>
|
||||||
<td>0.0205</td>
|
<td>0.0205</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.35, 0.65)</th>
|
<th>(0.35, 0.65)</th>
|
||||||
<td>0.0335</td>
|
<td>0.0335</td>
|
||||||
<td>0.0566</td>
|
<td>0.0566</td>
|
||||||
<td>0.0217</td>
|
<td>0.0217</td>
|
||||||
<td>0.0211</td>
|
<td>0.0211</td>
|
||||||
<td>0.0211</td>
|
<td>0.0211</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.4, 0.6)</th>
|
<th>(0.4, 0.6)</th>
|
||||||
<td>0.0360</td>
|
<td>0.0360</td>
|
||||||
<td>0.0562</td>
|
<td>0.0562</td>
|
||||||
<td>0.0217</td>
|
<td>0.0217</td>
|
||||||
<td>0.0226</td>
|
<td>0.0226</td>
|
||||||
<td>0.0226</td>
|
<td>0.0226</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.45, 0.55)</th>
|
<th>(0.45, 0.55)</th>
|
||||||
<td>0.0372</td>
|
<td>0.0372</td>
|
||||||
<td>0.0626</td>
|
<td>0.0626</td>
|
||||||
<td>0.0213</td>
|
<td>0.0213</td>
|
||||||
<td>0.0246</td>
|
<td>0.0246</td>
|
||||||
<td>0.0246</td>
|
<td>0.0246</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.5, 0.5)</th>
|
<th>(0.5, 0.5)</th>
|
||||||
<td>0.0437</td>
|
<td>0.0437</td>
|
||||||
<td>0.0677</td>
|
<td>0.0677</td>
|
||||||
<td>0.0223</td>
|
<td>0.0223</td>
|
||||||
<td>0.0241</td>
|
<td>0.0241</td>
|
||||||
<td>0.0241</td>
|
<td>0.0241</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.55, 0.45)</th>
|
<th>(0.55, 0.45)</th>
|
||||||
<td>0.0486</td>
|
<td>0.0486</td>
|
||||||
<td>0.0762</td>
|
<td>0.0762</td>
|
||||||
<td>0.0241</td>
|
<td>0.0241</td>
|
||||||
<td>0.0269</td>
|
<td>0.0269</td>
|
||||||
<td>0.0269</td>
|
<td>0.0269</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.6, 0.4)</th>
|
<th>(0.6, 0.4)</th>
|
||||||
<td>0.0572</td>
|
<td>0.0572</td>
|
||||||
<td>0.0779</td>
|
<td>0.0779</td>
|
||||||
<td>0.0290</td>
|
<td>0.0290</td>
|
||||||
<td>0.0312</td>
|
<td>0.0312</td>
|
||||||
<td>0.0312</td>
|
<td>0.0312</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.65, 0.35)</th>
|
<th>(0.65, 0.35)</th>
|
||||||
<td>0.0580</td>
|
<td>0.0580</td>
|
||||||
<td>0.0866</td>
|
<td>0.0866</td>
|
||||||
<td>0.0340</td>
|
<td>0.0340</td>
|
||||||
<td>0.0341</td>
|
<td>0.0341</td>
|
||||||
<td>0.0341</td>
|
<td>0.0341</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.7, 0.3)</th>
|
<th>(0.7, 0.3)</th>
|
||||||
<td>0.0546</td>
|
<td>0.0546</td>
|
||||||
<td>0.0919</td>
|
<td>0.0919</td>
|
||||||
<td>0.0420</td>
|
<td>0.0420</td>
|
||||||
<td>0.0374</td>
|
<td>0.0374</td>
|
||||||
<td>0.0374</td>
|
<td>0.0374</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.75, 0.25)</th>
|
<th>(0.75, 0.25)</th>
|
||||||
<td>0.0636</td>
|
<td>0.0636</td>
|
||||||
<td>0.1161</td>
|
<td>0.1161</td>
|
||||||
<td>0.0689</td>
|
<td>0.0689</td>
|
||||||
<td>0.0533</td>
|
<td>0.0533</td>
|
||||||
<td>0.0533</td>
|
<td>0.0533</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.8, 0.2)</th>
|
<th>(0.8, 0.2)</th>
|
||||||
<td>0.0750</td>
|
<td>0.0750</td>
|
||||||
<td>0.1192</td>
|
<td>0.1192</td>
|
||||||
<td>0.0768</td>
|
<td>0.0768</td>
|
||||||
<td>0.0560</td>
|
<td>0.0560</td>
|
||||||
<td>0.0560</td>
|
<td>0.0560</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.85, 0.15)</th>
|
<th>(0.85, 0.15)</th>
|
||||||
<td>0.1031</td>
|
<td>0.1031</td>
|
||||||
<td>0.1580</td>
|
<td>0.1580</td>
|
||||||
<td>0.1244</td>
|
<td>0.1244</td>
|
||||||
<td>0.0728</td>
|
<td>0.0728</td>
|
||||||
<td>0.0728</td>
|
<td>0.0728</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.9, 0.1)</th>
|
<th>(0.9, 0.1)</th>
|
||||||
<td>0.1175</td>
|
<td>0.1175</td>
|
||||||
<td>0.2412</td>
|
<td>0.2412</td>
|
||||||
<td>0.1885</td>
|
<td>0.1885</td>
|
||||||
<td>0.1100</td>
|
<td>0.1100</td>
|
||||||
<td>0.1100</td>
|
<td>0.1100</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(0.95, 0.05)</th>
|
<th>(0.95, 0.05)</th>
|
||||||
<td>0.1877</td>
|
<td>0.1877</td>
|
||||||
<td>0.3434</td>
|
<td>0.3434</td>
|
||||||
<td>0.3579</td>
|
<td>0.3579</td>
|
||||||
<td>0.2053</td>
|
<td>0.2053</td>
|
||||||
<td>0.2053</td>
|
<td>0.2053</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>(1.0, 0.0)</th>
|
<th>(1.0, 0.0)</th>
|
||||||
<td>0.2717</td>
|
<td>0.2717</td>
|
||||||
<td>0.3136</td>
|
<td>0.3136</td>
|
||||||
<td>0.9178</td>
|
<td>0.9178</td>
|
||||||
<td>0.6264</td>
|
<td>0.6264</td>
|
||||||
<td>0.6264</td>
|
<td>0.6264</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,40 +1,40 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "quacc"
|
name = "quacc"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = ""
|
description = ""
|
||||||
authors = ["Lorenzo Volpi <lorenzo.volpi@outlook.com>"]
|
authors = ["Lorenzo Volpi <lorenzo.volpi@outlook.com>"]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.11"
|
python = "^3.11"
|
||||||
quapy = "^0.1.7"
|
quapy = "^0.1.7"
|
||||||
pandas = "^2.0.3"
|
pandas = "^2.0.3"
|
||||||
jinja2 = "^3.1.2"
|
jinja2 = "^3.1.2"
|
||||||
pyyaml = "^6.0.1"
|
pyyaml = "^6.0.1"
|
||||||
logging = "^0.4.9.6"
|
logging = "^0.4.9.6"
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
[tool.poetry.scripts]
|
||||||
main = "quacc.main:main"
|
main = "quacc.main:main"
|
||||||
comp = "quacc.main:estimate_comparison"
|
comp = "quacc.main:estimate_comparison"
|
||||||
tohost = "scp_sync:scp_sync_to_host"
|
tohost = "scp_sync:scp_sync_to_host"
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
pytest = "^7.4.0"
|
pytest = "^7.4.0"
|
||||||
pylance = "^0.5.9"
|
pylance = "^0.5.9"
|
||||||
pytest-mock = "^3.11.1"
|
pytest-mock = "^3.11.1"
|
||||||
pytest-cov = "^4.1.0"
|
pytest-cov = "^4.1.0"
|
||||||
win11toast = "^0.32"
|
win11toast = "^0.32"
|
||||||
tabulate = "^0.9.0"
|
tabulate = "^0.9.0"
|
||||||
paramiko = "^3.3.1"
|
paramiko = "^3.3.1"
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
addopts = "--cov=quacc --capture=tee-sys"
|
addopts = "--cov=quacc --capture=tee-sys"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["poetry-core"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[virtualenvs]
|
[virtualenvs]
|
||||||
in-project = true
|
in-project = true
|
||||||
|
|
||||||
|
|
|
||||||
300
quacc/data.py
300
quacc/data.py
|
|
@ -1,150 +1,150 @@
|
||||||
import math
|
import math
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.sparse as sp
|
import scipy.sparse as sp
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
|
|
||||||
|
|
||||||
# Extended classes
|
# Extended classes
|
||||||
#
|
#
|
||||||
# 0 ~ True 0
|
# 0 ~ True 0
|
||||||
# 1 ~ False 1
|
# 1 ~ False 1
|
||||||
# 2 ~ False 0
|
# 2 ~ False 0
|
||||||
# 3 ~ True 1
|
# 3 ~ True 1
|
||||||
# _____________________
|
# _____________________
|
||||||
# | | |
|
# | | |
|
||||||
# | True 0 | False 1 |
|
# | True 0 | False 1 |
|
||||||
# |__________|__________|
|
# |__________|__________|
|
||||||
# | | |
|
# | | |
|
||||||
# | False 0 | True 1 |
|
# | False 0 | True 1 |
|
||||||
# |__________|__________|
|
# |__________|__________|
|
||||||
#
|
#
|
||||||
class ExClassManager:
|
class ExClassManager:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_ex(n_classes: int, true_class: int, pred_class: int) -> int:
|
def get_ex(n_classes: int, true_class: int, pred_class: int) -> int:
|
||||||
return true_class * n_classes + pred_class
|
return true_class * n_classes + pred_class
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_pred(n_classes: int, ex_class: int) -> int:
|
def get_pred(n_classes: int, ex_class: int) -> int:
|
||||||
return ex_class % n_classes
|
return ex_class % n_classes
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_true(n_classes: int, ex_class: int) -> int:
|
def get_true(n_classes: int, ex_class: int) -> int:
|
||||||
return ex_class // n_classes
|
return ex_class // n_classes
|
||||||
|
|
||||||
|
|
||||||
class ExtendedCollection(LabelledCollection):
|
class ExtendedCollection(LabelledCollection):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
instances: np.ndarray | sp.csr_matrix,
|
instances: np.ndarray | sp.csr_matrix,
|
||||||
labels: np.ndarray,
|
labels: np.ndarray,
|
||||||
classes: Optional[List] = None,
|
classes: Optional[List] = None,
|
||||||
):
|
):
|
||||||
super().__init__(instances, labels, classes=classes)
|
super().__init__(instances, labels, classes=classes)
|
||||||
|
|
||||||
def split_by_pred(self):
|
def split_by_pred(self):
|
||||||
_ncl = int(math.sqrt(self.n_classes))
|
_ncl = int(math.sqrt(self.n_classes))
|
||||||
_indexes = ExtendedCollection._split_index_by_pred(_ncl, self.instances)
|
_indexes = ExtendedCollection._split_index_by_pred(_ncl, self.instances)
|
||||||
if isinstance(self.instances, np.ndarray):
|
if isinstance(self.instances, np.ndarray):
|
||||||
_instances = [
|
_instances = [
|
||||||
self.instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
|
self.instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
|
||||||
for ind in _indexes
|
for ind in _indexes
|
||||||
]
|
]
|
||||||
elif isinstance(self.instances, sp.csr_matrix):
|
elif isinstance(self.instances, sp.csr_matrix):
|
||||||
_instances = [
|
_instances = [
|
||||||
self.instances[ind]
|
self.instances[ind]
|
||||||
if ind.shape[0] > 0
|
if ind.shape[0] > 0
|
||||||
else sp.csr_matrix(np.empty((0, 0), dtype=int))
|
else sp.csr_matrix(np.empty((0, 0), dtype=int))
|
||||||
for ind in _indexes
|
for ind in _indexes
|
||||||
]
|
]
|
||||||
_labels = [
|
_labels = [
|
||||||
np.asarray(
|
np.asarray(
|
||||||
[
|
[
|
||||||
ExClassManager.get_true(_ncl, lbl)
|
ExClassManager.get_true(_ncl, lbl)
|
||||||
for lbl in (self.labels[ind] if len(ind) > 0 else [])
|
for lbl in (self.labels[ind] if len(ind) > 0 else [])
|
||||||
],
|
],
|
||||||
dtype=int,
|
dtype=int,
|
||||||
)
|
)
|
||||||
for ind in _indexes
|
for ind in _indexes
|
||||||
]
|
]
|
||||||
return [
|
return [
|
||||||
ExtendedCollection(inst, lbl, classes=range(0, _ncl))
|
ExtendedCollection(inst, lbl, classes=range(0, _ncl))
|
||||||
for (inst, lbl) in zip(_instances, _labels)
|
for (inst, lbl) in zip(_instances, _labels)
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def split_inst_by_pred(
|
def split_inst_by_pred(
|
||||||
cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
|
cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
|
||||||
) -> (List[np.ndarray | sp.csr_matrix], List[float]):
|
) -> (List[np.ndarray | sp.csr_matrix], List[float]):
|
||||||
_indexes = cls._split_index_by_pred(n_classes, instances)
|
_indexes = cls._split_index_by_pred(n_classes, instances)
|
||||||
if isinstance(instances, np.ndarray):
|
if isinstance(instances, np.ndarray):
|
||||||
_instances = [
|
_instances = [
|
||||||
instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
|
instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
|
||||||
for ind in _indexes
|
for ind in _indexes
|
||||||
]
|
]
|
||||||
elif isinstance(instances, sp.csr_matrix):
|
elif isinstance(instances, sp.csr_matrix):
|
||||||
_instances = [
|
_instances = [
|
||||||
instances[ind]
|
instances[ind]
|
||||||
if ind.shape[0] > 0
|
if ind.shape[0] > 0
|
||||||
else sp.csr_matrix(np.empty((0, 0), dtype=int))
|
else sp.csr_matrix(np.empty((0, 0), dtype=int))
|
||||||
for ind in _indexes
|
for ind in _indexes
|
||||||
]
|
]
|
||||||
norms = [inst.shape[0] / instances.shape[0] for inst in _instances]
|
norms = [inst.shape[0] / instances.shape[0] for inst in _instances]
|
||||||
return _instances, norms
|
return _instances, norms
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _split_index_by_pred(
|
def _split_index_by_pred(
|
||||||
cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
|
cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
|
||||||
) -> List[np.ndarray]:
|
) -> List[np.ndarray]:
|
||||||
if isinstance(instances, np.ndarray):
|
if isinstance(instances, np.ndarray):
|
||||||
_pred_label = [np.argmax(inst[-n_classes:], axis=0) for inst in instances]
|
_pred_label = [np.argmax(inst[-n_classes:], axis=0) for inst in instances]
|
||||||
elif isinstance(instances, sp.csr_matrix):
|
elif isinstance(instances, sp.csr_matrix):
|
||||||
_pred_label = [
|
_pred_label = [
|
||||||
np.argmax(inst[:, -n_classes:].toarray().flatten(), axis=0)
|
np.argmax(inst[:, -n_classes:].toarray().flatten(), axis=0)
|
||||||
for inst in instances
|
for inst in instances
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupported matrix format")
|
raise ValueError("Unsupported matrix format")
|
||||||
|
|
||||||
return [
|
return [
|
||||||
np.asarray([j for (j, x) in enumerate(_pred_label) if x == i], dtype=int)
|
np.asarray([j for (j, x) in enumerate(_pred_label) if x == i], dtype=int)
|
||||||
for i in range(0, n_classes)
|
for i in range(0, n_classes)
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def extend_instances(
|
def extend_instances(
|
||||||
cls, instances: np.ndarray | sp.csr_matrix, pred_proba: np.ndarray
|
cls, instances: np.ndarray | sp.csr_matrix, pred_proba: np.ndarray
|
||||||
) -> np.ndarray | sp.csr_matrix:
|
) -> np.ndarray | sp.csr_matrix:
|
||||||
if isinstance(instances, sp.csr_matrix):
|
if isinstance(instances, sp.csr_matrix):
|
||||||
_pred_proba = sp.csr_matrix(pred_proba)
|
_pred_proba = sp.csr_matrix(pred_proba)
|
||||||
n_x = sp.hstack([instances, _pred_proba])
|
n_x = sp.hstack([instances, _pred_proba])
|
||||||
elif isinstance(instances, np.ndarray):
|
elif isinstance(instances, np.ndarray):
|
||||||
n_x = np.concatenate((instances, pred_proba), axis=1)
|
n_x = np.concatenate((instances, pred_proba), axis=1)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupported matrix format")
|
raise ValueError("Unsupported matrix format")
|
||||||
|
|
||||||
return n_x
|
return n_x
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def extend_collection(
|
def extend_collection(
|
||||||
cls,
|
cls,
|
||||||
base: LabelledCollection,
|
base: LabelledCollection,
|
||||||
pred_proba: np.ndarray,
|
pred_proba: np.ndarray,
|
||||||
):
|
):
|
||||||
n_classes = base.n_classes
|
n_classes = base.n_classes
|
||||||
|
|
||||||
# n_X = [ X | predicted probs. ]
|
# n_X = [ X | predicted probs. ]
|
||||||
n_x = cls.extend_instances(base.X, pred_proba)
|
n_x = cls.extend_instances(base.X, pred_proba)
|
||||||
|
|
||||||
# n_y = (exptected y, predicted y)
|
# n_y = (exptected y, predicted y)
|
||||||
pred_proba = pred_proba[:, -n_classes:]
|
pred_proba = pred_proba[:, -n_classes:]
|
||||||
preds = np.argmax(pred_proba, axis=-1)
|
preds = np.argmax(pred_proba, axis=-1)
|
||||||
n_y = np.asarray(
|
n_y = np.asarray(
|
||||||
[
|
[
|
||||||
ExClassManager.get_ex(n_classes, true_class, pred_class)
|
ExClassManager.get_ex(n_classes, true_class, pred_class)
|
||||||
for (true_class, pred_class) in zip(base.y, preds)
|
for (true_class, pred_class) in zip(base.y, preds)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
return ExtendedCollection(n_x, n_y, classes=[*range(0, n_classes * n_classes)])
|
return ExtendedCollection(n_x, n_y, classes=[*range(0, n_classes * n_classes)])
|
||||||
|
|
|
||||||
342
quacc/dataset.py
342
quacc/dataset.py
|
|
@ -1,171 +1,171 @@
|
||||||
import math
|
import math
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.data.base import LabelledCollection
|
from quapy.data.base import LabelledCollection
|
||||||
from sklearn.conftest import fetch_rcv1
|
from sklearn.conftest import fetch_rcv1
|
||||||
|
|
||||||
TRAIN_VAL_PROP = 0.5
|
TRAIN_VAL_PROP = 0.5
|
||||||
|
|
||||||
|
|
||||||
class DatasetSample:
|
class DatasetSample:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
train: LabelledCollection,
|
train: LabelledCollection,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
test: LabelledCollection,
|
test: LabelledCollection,
|
||||||
):
|
):
|
||||||
self.train = train
|
self.train = train
|
||||||
self.validation = validation
|
self.validation = validation
|
||||||
self.test = test
|
self.test = test
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def train_prev(self):
|
def train_prev(self):
|
||||||
return self.train.prevalence()
|
return self.train.prevalence()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def validation_prev(self):
|
def validation_prev(self):
|
||||||
return self.validation.prevalence()
|
return self.validation.prevalence()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def prevs(self):
|
def prevs(self):
|
||||||
return {"train": self.train_prev, "validation": self.validation_prev}
|
return {"train": self.train_prev, "validation": self.validation_prev}
|
||||||
|
|
||||||
|
|
||||||
class Dataset:
|
class Dataset:
|
||||||
def __init__(self, name, n_prevalences=9, prevs=None, target=None):
|
def __init__(self, name, n_prevalences=9, prevs=None, target=None):
|
||||||
self._name = name
|
self._name = name
|
||||||
self._target = target
|
self._target = target
|
||||||
|
|
||||||
self.prevs = None
|
self.prevs = None
|
||||||
self.n_prevs = n_prevalences
|
self.n_prevs = n_prevalences
|
||||||
if prevs is not None:
|
if prevs is not None:
|
||||||
prevs = np.unique([p for p in prevs if p > 0.0 and p < 1.0])
|
prevs = np.unique([p for p in prevs if p > 0.0 and p < 1.0])
|
||||||
if prevs.shape[0] > 0:
|
if prevs.shape[0] > 0:
|
||||||
self.prevs = np.sort(prevs)
|
self.prevs = np.sort(prevs)
|
||||||
self.n_prevs = self.prevs.shape[0]
|
self.n_prevs = self.prevs.shape[0]
|
||||||
|
|
||||||
def __spambase(self):
|
def __spambase(self):
|
||||||
return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
|
return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
|
||||||
|
|
||||||
# provare min_df=5
|
# provare min_df=5
|
||||||
def __imdb(self):
|
def __imdb(self):
|
||||||
return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
|
return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
|
||||||
|
|
||||||
def __rcv1(self):
|
def __rcv1(self):
|
||||||
n_train = 23149
|
n_train = 23149
|
||||||
available_targets = ["CCAT", "GCAT", "MCAT"]
|
available_targets = ["CCAT", "GCAT", "MCAT"]
|
||||||
|
|
||||||
if self._target is None or self._target not in available_targets:
|
if self._target is None or self._target not in available_targets:
|
||||||
raise ValueError(f"Invalid target {self._target}")
|
raise ValueError(f"Invalid target {self._target}")
|
||||||
|
|
||||||
dataset = fetch_rcv1()
|
dataset = fetch_rcv1()
|
||||||
target_index = np.where(dataset.target_names == self._target)[0]
|
target_index = np.where(dataset.target_names == self._target)[0]
|
||||||
all_train_d = dataset.data[:n_train, :]
|
all_train_d = dataset.data[:n_train, :]
|
||||||
test_d = dataset.data[n_train:, :]
|
test_d = dataset.data[n_train:, :]
|
||||||
labels = dataset.target[:, target_index].toarray().flatten()
|
labels = dataset.target[:, target_index].toarray().flatten()
|
||||||
all_train_l, test_l = labels[:n_train], labels[n_train:]
|
all_train_l, test_l = labels[:n_train], labels[n_train:]
|
||||||
all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
|
all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
|
||||||
test = LabelledCollection(test_d, test_l, classes=[0, 1])
|
test = LabelledCollection(test_d, test_l, classes=[0, 1])
|
||||||
|
|
||||||
return all_train, test
|
return all_train, test
|
||||||
|
|
||||||
def get_raw(self) -> DatasetSample:
|
def get_raw(self) -> DatasetSample:
|
||||||
all_train, test = {
|
all_train, test = {
|
||||||
"spambase": self.__spambase,
|
"spambase": self.__spambase,
|
||||||
"imdb": self.__imdb,
|
"imdb": self.__imdb,
|
||||||
"rcv1": self.__rcv1,
|
"rcv1": self.__rcv1,
|
||||||
}[self._name]()
|
}[self._name]()
|
||||||
|
|
||||||
train, val = all_train.split_stratified(
|
train, val = all_train.split_stratified(
|
||||||
train_prop=TRAIN_VAL_PROP, random_state=0
|
train_prop=TRAIN_VAL_PROP, random_state=0
|
||||||
)
|
)
|
||||||
|
|
||||||
return DatasetSample(train, val, test)
|
return DatasetSample(train, val, test)
|
||||||
|
|
||||||
def get(self) -> List[DatasetSample]:
|
def get(self) -> List[DatasetSample]:
|
||||||
(all_train, test) = {
|
(all_train, test) = {
|
||||||
"spambase": self.__spambase,
|
"spambase": self.__spambase,
|
||||||
"imdb": self.__imdb,
|
"imdb": self.__imdb,
|
||||||
"rcv1": self.__rcv1,
|
"rcv1": self.__rcv1,
|
||||||
}[self._name]()
|
}[self._name]()
|
||||||
|
|
||||||
# resample all_train set to have (0.5, 0.5) prevalence
|
# resample all_train set to have (0.5, 0.5) prevalence
|
||||||
at_positives = np.sum(all_train.y)
|
at_positives = np.sum(all_train.y)
|
||||||
all_train = all_train.sampling(
|
all_train = all_train.sampling(
|
||||||
min(at_positives, len(all_train) - at_positives) * 2, 0.5, random_state=0
|
min(at_positives, len(all_train) - at_positives) * 2, 0.5, random_state=0
|
||||||
)
|
)
|
||||||
|
|
||||||
# sample prevalences
|
# sample prevalences
|
||||||
if self.prevs is not None:
|
if self.prevs is not None:
|
||||||
prevs = self.prevs
|
prevs = self.prevs
|
||||||
else:
|
else:
|
||||||
prevs = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
|
prevs = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
|
||||||
|
|
||||||
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevs)
|
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevs)
|
||||||
datasets = []
|
datasets = []
|
||||||
for p in 1.0 - prevs:
|
for p in 1.0 - prevs:
|
||||||
all_train_sampled = all_train.sampling(at_size, p, random_state=0)
|
all_train_sampled = all_train.sampling(at_size, p, random_state=0)
|
||||||
train, validation = all_train_sampled.split_stratified(
|
train, validation = all_train_sampled.split_stratified(
|
||||||
train_prop=TRAIN_VAL_PROP, random_state=0
|
train_prop=TRAIN_VAL_PROP, random_state=0
|
||||||
)
|
)
|
||||||
datasets.append(DatasetSample(train, validation, test))
|
datasets.append(DatasetSample(train, validation, test))
|
||||||
|
|
||||||
return datasets
|
return datasets
|
||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
return self.get()
|
return self.get()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self):
|
def name(self):
|
||||||
return (
|
return (
|
||||||
f"{self._name}_{self._target}_{self.n_prevs}prevs"
|
f"{self._name}_{self._target}_{self.n_prevs}prevs"
|
||||||
if self._name == "rcv1"
|
if self._name == "rcv1"
|
||||||
else f"{self._name}_{self.n_prevs}prevs"
|
else f"{self._name}_{self.n_prevs}prevs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# >>> fetch_rcv1().target_names
|
# >>> fetch_rcv1().target_names
|
||||||
# array(['C11', 'C12', 'C13', 'C14', 'C15', 'C151', 'C1511', 'C152', 'C16',
|
# array(['C11', 'C12', 'C13', 'C14', 'C15', 'C151', 'C1511', 'C152', 'C16',
|
||||||
# 'C17', 'C171', 'C172', 'C173', 'C174', 'C18', 'C181', 'C182',
|
# 'C17', 'C171', 'C172', 'C173', 'C174', 'C18', 'C181', 'C182',
|
||||||
# 'C183', 'C21', 'C22', 'C23', 'C24', 'C31', 'C311', 'C312', 'C313',
|
# 'C183', 'C21', 'C22', 'C23', 'C24', 'C31', 'C311', 'C312', 'C313',
|
||||||
# 'C32', 'C33', 'C331', 'C34', 'C41', 'C411', 'C42', 'CCAT', 'E11',
|
# 'C32', 'C33', 'C331', 'C34', 'C41', 'C411', 'C42', 'CCAT', 'E11',
|
||||||
# 'E12', 'E121', 'E13', 'E131', 'E132', 'E14', 'E141', 'E142',
|
# 'E12', 'E121', 'E13', 'E131', 'E132', 'E14', 'E141', 'E142',
|
||||||
# 'E143', 'E21', 'E211', 'E212', 'E31', 'E311', 'E312', 'E313',
|
# 'E143', 'E21', 'E211', 'E212', 'E31', 'E311', 'E312', 'E313',
|
||||||
# 'E41', 'E411', 'E51', 'E511', 'E512', 'E513', 'E61', 'E71', 'ECAT',
|
# 'E41', 'E411', 'E51', 'E511', 'E512', 'E513', 'E61', 'E71', 'ECAT',
|
||||||
# 'G15', 'G151', 'G152', 'G153', 'G154', 'G155', 'G156', 'G157',
|
# 'G15', 'G151', 'G152', 'G153', 'G154', 'G155', 'G156', 'G157',
|
||||||
# 'G158', 'G159', 'GCAT', 'GCRIM', 'GDEF', 'GDIP', 'GDIS', 'GENT',
|
# 'G158', 'G159', 'GCAT', 'GCRIM', 'GDEF', 'GDIP', 'GDIS', 'GENT',
|
||||||
# 'GENV', 'GFAS', 'GHEA', 'GJOB', 'GMIL', 'GOBIT', 'GODD', 'GPOL',
|
# 'GENV', 'GFAS', 'GHEA', 'GJOB', 'GMIL', 'GOBIT', 'GODD', 'GPOL',
|
||||||
# 'GPRO', 'GREL', 'GSCI', 'GSPO', 'GTOUR', 'GVIO', 'GVOTE', 'GWEA',
|
# 'GPRO', 'GREL', 'GSCI', 'GSPO', 'GTOUR', 'GVIO', 'GVOTE', 'GWEA',
|
||||||
# 'GWELF', 'M11', 'M12', 'M13', 'M131', 'M132', 'M14', 'M141',
|
# 'GWELF', 'M11', 'M12', 'M13', 'M131', 'M132', 'M14', 'M141',
|
||||||
# 'M142', 'M143', 'MCAT'], dtype=object)
|
# 'M142', 'M143', 'MCAT'], dtype=object)
|
||||||
|
|
||||||
|
|
||||||
def rcv1_info():
|
def rcv1_info():
|
||||||
dataset = fetch_rcv1()
|
dataset = fetch_rcv1()
|
||||||
n_train = 23149
|
n_train = 23149
|
||||||
|
|
||||||
targets = []
|
targets = []
|
||||||
for target in range(103):
|
for target in range(103):
|
||||||
train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
|
train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
|
||||||
test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
|
test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
|
||||||
targets.append(
|
targets.append(
|
||||||
(
|
(
|
||||||
dataset.target_names[target],
|
dataset.target_names[target],
|
||||||
{
|
{
|
||||||
"train": (1.0 - train_t_prev, train_t_prev),
|
"train": (1.0 - train_t_prev, train_t_prev),
|
||||||
"test": (1.0 - test_t_prev, test_t_prev),
|
"test": (1.0 - test_t_prev, test_t_prev),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
targets.sort(key=lambda t: t[1]["train"][1])
|
targets.sort(key=lambda t: t[1]["train"][1])
|
||||||
for n, d in targets:
|
for n, d in targets:
|
||||||
print(f"{n}:")
|
print(f"{n}:")
|
||||||
for k, (fp, tp) in d.items():
|
for k, (fp, tp) in d.items():
|
||||||
print(f"\t{k}: {fp:.4f}, {tp:.4f}")
|
print(f"\t{k}: {fp:.4f}, {tp:.4f}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
rcv1_info()
|
rcv1_info()
|
||||||
|
|
|
||||||
|
|
@ -1,118 +1,118 @@
|
||||||
import collections as C
|
import collections as C
|
||||||
import copy
|
import copy
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
class environ:
|
class environ:
|
||||||
_instance = None
|
_instance = None
|
||||||
_default_env = {
|
_default_env = {
|
||||||
"DATASET_NAME": None,
|
"DATASET_NAME": None,
|
||||||
"DATASET_TARGET": None,
|
"DATASET_TARGET": None,
|
||||||
"METRICS": [],
|
"METRICS": [],
|
||||||
"COMP_ESTIMATORS": [],
|
"COMP_ESTIMATORS": [],
|
||||||
"DATASET_N_PREVS": 9,
|
"DATASET_N_PREVS": 9,
|
||||||
"DATASET_PREVS": None,
|
"DATASET_PREVS": None,
|
||||||
"OUT_DIR_NAME": "output",
|
"OUT_DIR_NAME": "output",
|
||||||
"OUT_DIR": None,
|
"OUT_DIR": None,
|
||||||
"PLOT_DIR_NAME": "plot",
|
"PLOT_DIR_NAME": "plot",
|
||||||
"PLOT_OUT_DIR": None,
|
"PLOT_OUT_DIR": None,
|
||||||
"DATASET_DIR_UPDATE": False,
|
"DATASET_DIR_UPDATE": False,
|
||||||
"PROTOCOL_N_PREVS": 21,
|
"PROTOCOL_N_PREVS": 21,
|
||||||
"PROTOCOL_REPEATS": 100,
|
"PROTOCOL_REPEATS": 100,
|
||||||
"SAMPLE_SIZE": 1000,
|
"SAMPLE_SIZE": 1000,
|
||||||
"PLOT_ESTIMATORS": [],
|
"PLOT_ESTIMATORS": [],
|
||||||
"PLOT_STDEV": False,
|
"PLOT_STDEV": False,
|
||||||
}
|
}
|
||||||
_keys = list(_default_env.keys())
|
_keys = list(_default_env.keys())
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.exec = []
|
self.exec = []
|
||||||
self.confs = []
|
self.confs = []
|
||||||
self.load_conf()
|
self.load_conf()
|
||||||
self._stack = C.deque([self.__getdict()])
|
self._stack = C.deque([self.__getdict()])
|
||||||
|
|
||||||
def __setdict(self, d):
|
def __setdict(self, d):
|
||||||
for k, v in d.items():
|
for k, v in d.items():
|
||||||
super().__setattr__(k, v)
|
super().__setattr__(k, v)
|
||||||
|
|
||||||
def __getdict(self):
|
def __getdict(self):
|
||||||
return {k: self.__getattribute__(k) for k in environ._keys}
|
return {k: self.__getattribute__(k) for k in environ._keys}
|
||||||
|
|
||||||
def __setattr__(self, __name: str, __value: Any) -> None:
|
def __setattr__(self, __name: str, __value: Any) -> None:
|
||||||
if __name in environ._keys:
|
if __name in environ._keys:
|
||||||
self._stack[-1][__name] = __value
|
self._stack[-1][__name] = __value
|
||||||
super().__setattr__(__name, __value)
|
super().__setattr__(__name, __value)
|
||||||
|
|
||||||
def load_conf(self):
|
def load_conf(self):
|
||||||
self.__setdict(environ._default_env)
|
self.__setdict(environ._default_env)
|
||||||
|
|
||||||
with open("conf.yaml", "r") as f:
|
with open("conf.yaml", "r") as f:
|
||||||
confs = yaml.safe_load(f)["exec"]
|
confs = yaml.safe_load(f)["exec"]
|
||||||
|
|
||||||
_global = confs["global"]
|
_global = confs["global"]
|
||||||
_estimators = set()
|
_estimators = set()
|
||||||
for pc in confs["plot_confs"].values():
|
for pc in confs["plot_confs"].values():
|
||||||
_estimators = _estimators.union(set(pc["PLOT_ESTIMATORS"]))
|
_estimators = _estimators.union(set(pc["PLOT_ESTIMATORS"]))
|
||||||
_global["COMP_ESTIMATORS"] = list(_estimators)
|
_global["COMP_ESTIMATORS"] = list(_estimators)
|
||||||
|
|
||||||
self.__setdict(_global)
|
self.__setdict(_global)
|
||||||
|
|
||||||
self.confs = confs["confs"]
|
self.confs = confs["confs"]
|
||||||
self.plot_confs = confs["plot_confs"]
|
self.plot_confs = confs["plot_confs"]
|
||||||
|
|
||||||
def get_confs(self):
|
def get_confs(self):
|
||||||
self._stack.append(None)
|
self._stack.append(None)
|
||||||
for _conf in self.confs:
|
for _conf in self.confs:
|
||||||
self._stack.pop()
|
self._stack.pop()
|
||||||
self.__setdict(self._stack[-1])
|
self.__setdict(self._stack[-1])
|
||||||
self.__setdict(_conf)
|
self.__setdict(_conf)
|
||||||
self._stack.append(self.__getdict())
|
self._stack.append(self.__getdict())
|
||||||
|
|
||||||
yield copy.deepcopy(self._stack[-1])
|
yield copy.deepcopy(self._stack[-1])
|
||||||
|
|
||||||
self._stack.pop()
|
self._stack.pop()
|
||||||
|
|
||||||
def get_plot_confs(self):
|
def get_plot_confs(self):
|
||||||
self._stack.append(None)
|
self._stack.append(None)
|
||||||
for k, pc in self.plot_confs.items():
|
for k, pc in self.plot_confs.items():
|
||||||
self._stack.pop()
|
self._stack.pop()
|
||||||
self.__setdict(self._stack[-1])
|
self.__setdict(self._stack[-1])
|
||||||
self.__setdict(pc)
|
self.__setdict(pc)
|
||||||
self._stack.append(self.__getdict())
|
self._stack.append(self.__getdict())
|
||||||
|
|
||||||
name = self.DATASET_NAME
|
name = self.DATASET_NAME
|
||||||
if self.DATASET_TARGET is not None:
|
if self.DATASET_TARGET is not None:
|
||||||
name += f"_{self.DATASET_TARGET}"
|
name += f"_{self.DATASET_TARGET}"
|
||||||
name += f"_{k}"
|
name += f"_{k}"
|
||||||
yield name
|
yield name
|
||||||
|
|
||||||
self._stack.pop()
|
self._stack.pop()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current(self):
|
def current(self):
|
||||||
return copy.deepcopy(self.__getdict())
|
return copy.deepcopy(self.__getdict())
|
||||||
|
|
||||||
|
|
||||||
env = environ()
|
env = environ()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
stack = C.deque()
|
stack = C.deque()
|
||||||
stack.append(-1)
|
stack.append(-1)
|
||||||
|
|
||||||
def __gen(stack: C.deque):
|
def __gen(stack: C.deque):
|
||||||
stack.append(None)
|
stack.append(None)
|
||||||
for i in range(5):
|
for i in range(5):
|
||||||
stack.pop()
|
stack.pop()
|
||||||
stack.append(i)
|
stack.append(i)
|
||||||
yield stack[-1]
|
yield stack[-1]
|
||||||
|
|
||||||
stack.pop()
|
stack.pop()
|
||||||
|
|
||||||
print(stack)
|
print(stack)
|
||||||
|
|
||||||
for i in __gen(stack):
|
for i in __gen(stack):
|
||||||
print(stack, i)
|
print(stack, i)
|
||||||
|
|
||||||
print(stack)
|
print(stack)
|
||||||
|
|
|
||||||
110
quacc/error.py
110
quacc/error.py
|
|
@ -1,55 +1,55 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
def from_name(err_name):
|
def from_name(err_name):
|
||||||
assert err_name in ERROR_NAMES, f"unknown error {err_name}"
|
assert err_name in ERROR_NAMES, f"unknown error {err_name}"
|
||||||
callable_error = globals()[err_name]
|
callable_error = globals()[err_name]
|
||||||
return callable_error
|
return callable_error
|
||||||
|
|
||||||
|
|
||||||
# def f1(prev):
|
# def f1(prev):
|
||||||
# # https://github.com/dice-group/gerbil/wiki/Precision,-Recall-and-F1-measure
|
# # https://github.com/dice-group/gerbil/wiki/Precision,-Recall-and-F1-measure
|
||||||
# if prev[0] == 0 and prev[1] == 0 and prev[2] == 0:
|
# if prev[0] == 0 and prev[1] == 0 and prev[2] == 0:
|
||||||
# return 1.0
|
# return 1.0
|
||||||
# elif prev[0] == 0 and prev[1] > 0 and prev[2] == 0:
|
# elif prev[0] == 0 and prev[1] > 0 and prev[2] == 0:
|
||||||
# return 0.0
|
# return 0.0
|
||||||
# elif prev[0] == 0 and prev[1] == 0 and prev[2] > 0:
|
# elif prev[0] == 0 and prev[1] == 0 and prev[2] > 0:
|
||||||
# return float('NaN')
|
# return float('NaN')
|
||||||
# else:
|
# else:
|
||||||
# recall = prev[0] / (prev[0] + prev[1])
|
# recall = prev[0] / (prev[0] + prev[1])
|
||||||
# precision = prev[0] / (prev[0] + prev[2])
|
# precision = prev[0] / (prev[0] + prev[2])
|
||||||
# return 2 * (precision * recall) / (precision + recall)
|
# return 2 * (precision * recall) / (precision + recall)
|
||||||
|
|
||||||
|
|
||||||
def f1(prev):
|
def f1(prev):
|
||||||
den = (2 * prev[3]) + prev[1] + prev[2]
|
den = (2 * prev[3]) + prev[1] + prev[2]
|
||||||
if den == 0:
|
if den == 0:
|
||||||
return 0.0
|
return 0.0
|
||||||
else:
|
else:
|
||||||
return (2 * prev[3]) / den
|
return (2 * prev[3]) / den
|
||||||
|
|
||||||
|
|
||||||
def f1e(prev):
|
def f1e(prev):
|
||||||
return 1 - f1(prev)
|
return 1 - f1(prev)
|
||||||
|
|
||||||
|
|
||||||
def acc(prev: np.ndarray) -> float:
|
def acc(prev: np.ndarray) -> float:
|
||||||
return (prev[0] + prev[3]) / np.sum(prev)
|
return (prev[0] + prev[3]) / np.sum(prev)
|
||||||
|
|
||||||
|
|
||||||
def accd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> np.ndarray:
|
def accd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> np.ndarray:
|
||||||
vacc = np.vectorize(acc, signature="(m)->()")
|
vacc = np.vectorize(acc, signature="(m)->()")
|
||||||
a_tp = vacc(true_prevs)
|
a_tp = vacc(true_prevs)
|
||||||
a_ep = vacc(estim_prevs)
|
a_ep = vacc(estim_prevs)
|
||||||
return np.abs(a_tp - a_ep)
|
return np.abs(a_tp - a_ep)
|
||||||
|
|
||||||
|
|
||||||
def maccd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> float:
|
def maccd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> float:
|
||||||
return accd(true_prevs, estim_prevs).mean()
|
return accd(true_prevs, estim_prevs).mean()
|
||||||
|
|
||||||
|
|
||||||
ACCURACY_ERROR = {maccd}
|
ACCURACY_ERROR = {maccd}
|
||||||
ACCURACY_ERROR_SINGLE = {accd}
|
ACCURACY_ERROR_SINGLE = {accd}
|
||||||
ACCURACY_ERROR_NAMES = {func.__name__ for func in ACCURACY_ERROR}
|
ACCURACY_ERROR_NAMES = {func.__name__ for func in ACCURACY_ERROR}
|
||||||
ACCURACY_ERROR_SINGLE_NAMES = {func.__name__ for func in ACCURACY_ERROR_SINGLE}
|
ACCURACY_ERROR_SINGLE_NAMES = {func.__name__ for func in ACCURACY_ERROR_SINGLE}
|
||||||
ERROR_NAMES = ACCURACY_ERROR_NAMES | ACCURACY_ERROR_SINGLE_NAMES
|
ERROR_NAMES = ACCURACY_ERROR_NAMES | ACCURACY_ERROR_SINGLE_NAMES
|
||||||
|
|
|
||||||
|
|
@ -1,34 +1,34 @@
|
||||||
from typing import Callable, Union
|
from typing import Callable, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
|
from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
|
||||||
|
|
||||||
import quacc as qc
|
import quacc as qc
|
||||||
|
|
||||||
from ..method.base import BaseAccuracyEstimator
|
from ..method.base import BaseAccuracyEstimator
|
||||||
|
|
||||||
|
|
||||||
def evaluate(
|
def evaluate(
|
||||||
estimator: BaseAccuracyEstimator,
|
estimator: BaseAccuracyEstimator,
|
||||||
protocol: AbstractProtocol,
|
protocol: AbstractProtocol,
|
||||||
error_metric: Union[Callable | str],
|
error_metric: Union[Callable | str],
|
||||||
) -> float:
|
) -> float:
|
||||||
if isinstance(error_metric, str):
|
if isinstance(error_metric, str):
|
||||||
error_metric = qc.error.from_name(error_metric)
|
error_metric = qc.error.from_name(error_metric)
|
||||||
|
|
||||||
collator_bck_ = protocol.collator
|
collator_bck_ = protocol.collator
|
||||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
||||||
|
|
||||||
estim_prevs, true_prevs = [], []
|
estim_prevs, true_prevs = [], []
|
||||||
for sample in protocol():
|
for sample in protocol():
|
||||||
e_sample = estimator.extend(sample)
|
e_sample = estimator.extend(sample)
|
||||||
estim_prev = estimator.estimate(e_sample.X, ext=True)
|
estim_prev = estimator.estimate(e_sample.X, ext=True)
|
||||||
estim_prevs.append(estim_prev)
|
estim_prevs.append(estim_prev)
|
||||||
true_prevs.append(e_sample.prevalence())
|
true_prevs.append(e_sample.prevalence())
|
||||||
|
|
||||||
protocol.collator = collator_bck_
|
protocol.collator = collator_bck_
|
||||||
|
|
||||||
true_prevs = np.array(true_prevs)
|
true_prevs = np.array(true_prevs)
|
||||||
estim_prevs = np.array(estim_prevs)
|
estim_prevs = np.array(estim_prevs)
|
||||||
|
|
||||||
return error_metric(true_prevs, estim_prevs)
|
return error_metric(true_prevs, estim_prevs)
|
||||||
|
|
|
||||||
|
|
@ -1,299 +1,299 @@
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from statistics import mean
|
from statistics import mean
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sklearn.metrics as metrics
|
import sklearn.metrics as metrics
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.protocol import AbstractStochasticSeededProtocol
|
from quapy.protocol import AbstractStochasticSeededProtocol
|
||||||
from scipy.sparse import issparse
|
from scipy.sparse import issparse
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
from sklearn.model_selection import cross_validate
|
from sklearn.model_selection import cross_validate
|
||||||
|
|
||||||
import baselines.atc as atc
|
import baselines.atc as atc
|
||||||
import baselines.doc as doc
|
import baselines.doc as doc
|
||||||
import baselines.impweight as iw
|
import baselines.impweight as iw
|
||||||
import baselines.rca as rcalib
|
import baselines.rca as rcalib
|
||||||
|
|
||||||
from .report import EvaluationReport
|
from .report import EvaluationReport
|
||||||
|
|
||||||
_baselines = {}
|
_baselines = {}
|
||||||
|
|
||||||
|
|
||||||
def baseline(func):
|
def baseline(func):
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
def wrapper(c_model, validation, protocol):
|
def wrapper(c_model, validation, protocol):
|
||||||
return func(c_model, validation, protocol)
|
return func(c_model, validation, protocol)
|
||||||
|
|
||||||
_baselines[func.__name__] = wrapper
|
_baselines[func.__name__] = wrapper
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
@baseline
|
@baseline
|
||||||
def kfcv(
|
def kfcv(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict",
|
predict_method="predict",
|
||||||
):
|
):
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
|
||||||
scoring = ["accuracy", "f1_macro"]
|
scoring = ["accuracy", "f1_macro"]
|
||||||
scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)
|
scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)
|
||||||
acc_score = mean(scores["test_accuracy"])
|
acc_score = mean(scores["test_accuracy"])
|
||||||
f1_score = mean(scores["test_f1_macro"])
|
f1_score = mean(scores["test_f1_macro"])
|
||||||
|
|
||||||
report = EvaluationReport(name="kfcv")
|
report = EvaluationReport(name="kfcv")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
test_preds = c_model_predict(test.X)
|
test_preds = c_model_predict(test.X)
|
||||||
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
|
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
|
||||||
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
||||||
report.append_row(
|
report.append_row(
|
||||||
test.prevalence(),
|
test.prevalence(),
|
||||||
acc_score=acc_score,
|
acc_score=acc_score,
|
||||||
f1_score=f1_score,
|
f1_score=f1_score,
|
||||||
acc=meta_acc,
|
acc=meta_acc,
|
||||||
f1=meta_f1,
|
f1=meta_f1,
|
||||||
)
|
)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
@baseline
|
@baseline
|
||||||
def ref(
|
def ref(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
):
|
):
|
||||||
c_model_predict = getattr(c_model, "predict")
|
c_model_predict = getattr(c_model, "predict")
|
||||||
report = EvaluationReport(name="ref")
|
report = EvaluationReport(name="ref")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
test_preds = c_model_predict(test.X)
|
test_preds = c_model_predict(test.X)
|
||||||
report.append_row(
|
report.append_row(
|
||||||
test.prevalence(),
|
test.prevalence(),
|
||||||
acc_score=metrics.accuracy_score(test.y, test_preds),
|
acc_score=metrics.accuracy_score(test.y, test_preds),
|
||||||
f1_score=metrics.f1_score(test.y, test_preds),
|
f1_score=metrics.f1_score(test.y, test_preds),
|
||||||
)
|
)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
@baseline
|
@baseline
|
||||||
def atc_mc(
|
def atc_mc(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict_proba",
|
predict_method="predict_proba",
|
||||||
):
|
):
|
||||||
"""garg"""
|
"""garg"""
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
|
||||||
## Load ID validation data probs and labels
|
## Load ID validation data probs and labels
|
||||||
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
||||||
|
|
||||||
## score function, e.g., negative entropy or argmax confidence
|
## score function, e.g., negative entropy or argmax confidence
|
||||||
val_scores = atc.get_max_conf(val_probs)
|
val_scores = atc.get_max_conf(val_probs)
|
||||||
val_preds = np.argmax(val_probs, axis=-1)
|
val_preds = np.argmax(val_probs, axis=-1)
|
||||||
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
||||||
|
|
||||||
report = EvaluationReport(name="atc_mc")
|
report = EvaluationReport(name="atc_mc")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
## Load OOD test data probs
|
## Load OOD test data probs
|
||||||
test_probs = c_model_predict(test.X)
|
test_probs = c_model_predict(test.X)
|
||||||
test_preds = np.argmax(test_probs, axis=-1)
|
test_preds = np.argmax(test_probs, axis=-1)
|
||||||
test_scores = atc.get_max_conf(test_probs)
|
test_scores = atc.get_max_conf(test_probs)
|
||||||
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
|
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
|
||||||
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
|
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
|
||||||
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
|
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
|
||||||
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
||||||
report.append_row(
|
report.append_row(
|
||||||
test.prevalence(),
|
test.prevalence(),
|
||||||
acc=meta_acc,
|
acc=meta_acc,
|
||||||
acc_score=atc_accuracy,
|
acc_score=atc_accuracy,
|
||||||
f1_score=f1_score,
|
f1_score=f1_score,
|
||||||
f1=meta_f1,
|
f1=meta_f1,
|
||||||
)
|
)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
@baseline
|
@baseline
|
||||||
def atc_ne(
|
def atc_ne(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict_proba",
|
predict_method="predict_proba",
|
||||||
):
|
):
|
||||||
"""garg"""
|
"""garg"""
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
|
||||||
## Load ID validation data probs and labels
|
## Load ID validation data probs and labels
|
||||||
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
||||||
|
|
||||||
## score function, e.g., negative entropy or argmax confidence
|
## score function, e.g., negative entropy or argmax confidence
|
||||||
val_scores = atc.get_entropy(val_probs)
|
val_scores = atc.get_entropy(val_probs)
|
||||||
val_preds = np.argmax(val_probs, axis=-1)
|
val_preds = np.argmax(val_probs, axis=-1)
|
||||||
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
||||||
|
|
||||||
report = EvaluationReport(name="atc_ne")
|
report = EvaluationReport(name="atc_ne")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
## Load OOD test data probs
|
## Load OOD test data probs
|
||||||
test_probs = c_model_predict(test.X)
|
test_probs = c_model_predict(test.X)
|
||||||
test_preds = np.argmax(test_probs, axis=-1)
|
test_preds = np.argmax(test_probs, axis=-1)
|
||||||
test_scores = atc.get_entropy(test_probs)
|
test_scores = atc.get_entropy(test_probs)
|
||||||
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
|
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
|
||||||
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
|
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
|
||||||
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
|
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
|
||||||
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
||||||
report.append_row(
|
report.append_row(
|
||||||
test.prevalence(),
|
test.prevalence(),
|
||||||
acc=meta_acc,
|
acc=meta_acc,
|
||||||
acc_score=atc_accuracy,
|
acc_score=atc_accuracy,
|
||||||
f1_score=f1_score,
|
f1_score=f1_score,
|
||||||
f1=meta_f1,
|
f1=meta_f1,
|
||||||
)
|
)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
@baseline
|
@baseline
|
||||||
def doc_feat(
|
def doc_feat(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict_proba",
|
predict_method="predict_proba",
|
||||||
):
|
):
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
|
||||||
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
||||||
val_scores = np.max(val_probs, axis=-1)
|
val_scores = np.max(val_probs, axis=-1)
|
||||||
val_preds = np.argmax(val_probs, axis=-1)
|
val_preds = np.argmax(val_probs, axis=-1)
|
||||||
v1acc = np.mean(val_preds == val_labels) * 100
|
v1acc = np.mean(val_preds == val_labels) * 100
|
||||||
|
|
||||||
report = EvaluationReport(name="doc_feat")
|
report = EvaluationReport(name="doc_feat")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
test_probs = c_model_predict(test.X)
|
test_probs = c_model_predict(test.X)
|
||||||
test_preds = np.argmax(test_probs, axis=-1)
|
test_preds = np.argmax(test_probs, axis=-1)
|
||||||
test_scores = np.max(test_probs, axis=-1)
|
test_scores = np.max(test_probs, axis=-1)
|
||||||
score = (v1acc + doc.get_doc(val_scores, test_scores)) / 100.0
|
score = (v1acc + doc.get_doc(val_scores, test_scores)) / 100.0
|
||||||
meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))
|
meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))
|
||||||
report.append_row(test.prevalence(), acc=meta_acc, acc_score=score)
|
report.append_row(test.prevalence(), acc=meta_acc, acc_score=score)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
@baseline
|
@baseline
|
||||||
def rca(
|
def rca(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict",
|
predict_method="predict",
|
||||||
):
|
):
|
||||||
"""elsahar19"""
|
"""elsahar19"""
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
val_pred1 = c_model_predict(validation.X)
|
val_pred1 = c_model_predict(validation.X)
|
||||||
|
|
||||||
report = EvaluationReport(name="rca")
|
report = EvaluationReport(name="rca")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
try:
|
try:
|
||||||
test_pred = c_model_predict(test.X)
|
test_pred = c_model_predict(test.X)
|
||||||
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
||||||
c_model2_predict = getattr(c_model2, predict_method)
|
c_model2_predict = getattr(c_model2, predict_method)
|
||||||
val_pred2 = c_model2_predict(validation.X)
|
val_pred2 = c_model2_predict(validation.X)
|
||||||
rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
|
rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
|
||||||
meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
|
meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
|
||||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
|
report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
report.append_row(
|
report.append_row(
|
||||||
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
||||||
)
|
)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
@baseline
|
@baseline
|
||||||
def rca_star(
|
def rca_star(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict",
|
predict_method="predict",
|
||||||
):
|
):
|
||||||
"""elsahar19"""
|
"""elsahar19"""
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
validation1, validation2 = validation.split_stratified(
|
validation1, validation2 = validation.split_stratified(
|
||||||
train_prop=0.5, random_state=0
|
train_prop=0.5, random_state=0
|
||||||
)
|
)
|
||||||
val1_pred = c_model_predict(validation1.X)
|
val1_pred = c_model_predict(validation1.X)
|
||||||
c_model1 = rcalib.clone_fit(c_model, validation1.X, val1_pred)
|
c_model1 = rcalib.clone_fit(c_model, validation1.X, val1_pred)
|
||||||
c_model1_predict = getattr(c_model1, predict_method)
|
c_model1_predict = getattr(c_model1, predict_method)
|
||||||
val2_pred1 = c_model1_predict(validation2.X)
|
val2_pred1 = c_model1_predict(validation2.X)
|
||||||
|
|
||||||
report = EvaluationReport(name="rca_star")
|
report = EvaluationReport(name="rca_star")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
try:
|
try:
|
||||||
test_pred = c_model_predict(test.X)
|
test_pred = c_model_predict(test.X)
|
||||||
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
||||||
c_model2_predict = getattr(c_model2, predict_method)
|
c_model2_predict = getattr(c_model2, predict_method)
|
||||||
val2_pred2 = c_model2_predict(validation2.X)
|
val2_pred2 = c_model2_predict(validation2.X)
|
||||||
rca_star_score = 1.0 - rcalib.get_score(
|
rca_star_score = 1.0 - rcalib.get_score(
|
||||||
val2_pred1, val2_pred2, validation2.y
|
val2_pred1, val2_pred2, validation2.y
|
||||||
)
|
)
|
||||||
meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
|
meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
|
||||||
report.append_row(
|
report.append_row(
|
||||||
test.prevalence(), acc=meta_score, acc_score=rca_star_score
|
test.prevalence(), acc=meta_score, acc_score=rca_star_score
|
||||||
)
|
)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
report.append_row(
|
report.append_row(
|
||||||
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
||||||
)
|
)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
@baseline
|
@baseline
|
||||||
def logreg(
|
def logreg(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict",
|
predict_method="predict",
|
||||||
):
|
):
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
|
||||||
val_preds = c_model_predict(validation.X)
|
val_preds = c_model_predict(validation.X)
|
||||||
|
|
||||||
report = EvaluationReport(name="logreg")
|
report = EvaluationReport(name="logreg")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
wx = iw.logreg(validation.X, validation.y, test.X)
|
wx = iw.logreg(validation.X, validation.y, test.X)
|
||||||
test_preds = c_model_predict(test.X)
|
test_preds = c_model_predict(test.X)
|
||||||
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
||||||
true_acc = metrics.accuracy_score(test.y, test_preds)
|
true_acc = metrics.accuracy_score(test.y, test_preds)
|
||||||
meta_score = abs(estim_acc - true_acc)
|
meta_score = abs(estim_acc - true_acc)
|
||||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
@baseline
|
@baseline
|
||||||
def kdex2(
|
def kdex2(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict",
|
predict_method="predict",
|
||||||
):
|
):
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
|
||||||
val_preds = c_model_predict(validation.X)
|
val_preds = c_model_predict(validation.X)
|
||||||
log_likelihood_val = iw.kdex2_lltr(validation.X)
|
log_likelihood_val = iw.kdex2_lltr(validation.X)
|
||||||
Xval = validation.X.toarray() if issparse(validation.X) else validation.X
|
Xval = validation.X.toarray() if issparse(validation.X) else validation.X
|
||||||
|
|
||||||
report = EvaluationReport(name="kdex2")
|
report = EvaluationReport(name="kdex2")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
Xte = test.X.toarray() if issparse(test.X) else test.X
|
Xte = test.X.toarray() if issparse(test.X) else test.X
|
||||||
wx = iw.kdex2_weights(Xval, Xte, log_likelihood_val)
|
wx = iw.kdex2_weights(Xval, Xte, log_likelihood_val)
|
||||||
test_preds = c_model_predict(Xte)
|
test_preds = c_model_predict(Xte)
|
||||||
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
||||||
true_acc = metrics.accuracy_score(test.y, test_preds)
|
true_acc = metrics.accuracy_score(test.y, test_preds)
|
||||||
meta_score = abs(estim_acc - true_acc)
|
meta_score = abs(estim_acc - true_acc)
|
||||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
|
||||||
|
|
@ -1,128 +1,128 @@
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import time
|
import time
|
||||||
from traceback import print_exception as traceback
|
from traceback import print_exception as traceback
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
|
|
||||||
from quacc.dataset import Dataset
|
from quacc.dataset import Dataset
|
||||||
from quacc.environment import env
|
from quacc.environment import env
|
||||||
from quacc.evaluation import baseline, method
|
from quacc.evaluation import baseline, method
|
||||||
from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
|
from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
|
||||||
from quacc.evaluation.worker import estimate_worker
|
from quacc.evaluation.worker import estimate_worker
|
||||||
from quacc.logger import Logger
|
from quacc.logger import Logger
|
||||||
|
|
||||||
pd.set_option("display.float_format", "{:.4f}".format)
|
pd.set_option("display.float_format", "{:.4f}".format)
|
||||||
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
|
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
|
||||||
|
|
||||||
|
|
||||||
class CompEstimatorName_:
|
class CompEstimatorName_:
|
||||||
def __init__(self, ce):
|
def __init__(self, ce):
|
||||||
self.ce = ce
|
self.ce = ce
|
||||||
|
|
||||||
def __getitem__(self, e: str | List[str]):
|
def __getitem__(self, e: str | List[str]):
|
||||||
if isinstance(e, str):
|
if isinstance(e, str):
|
||||||
return self.ce._CompEstimator__get(e)[0]
|
return self.ce._CompEstimator__get(e)[0]
|
||||||
elif isinstance(e, list):
|
elif isinstance(e, list):
|
||||||
return list(self.ce._CompEstimator__get(e).keys())
|
return list(self.ce._CompEstimator__get(e).keys())
|
||||||
|
|
||||||
|
|
||||||
class CompEstimatorFunc_:
|
class CompEstimatorFunc_:
|
||||||
def __init__(self, ce):
|
def __init__(self, ce):
|
||||||
self.ce = ce
|
self.ce = ce
|
||||||
|
|
||||||
def __getitem__(self, e: str | List[str]):
|
def __getitem__(self, e: str | List[str]):
|
||||||
if isinstance(e, str):
|
if isinstance(e, str):
|
||||||
return self.ce._CompEstimator__get(e)[1]
|
return self.ce._CompEstimator__get(e)[1]
|
||||||
elif isinstance(e, list):
|
elif isinstance(e, list):
|
||||||
return list(self.ce._CompEstimator__get(e).values())
|
return list(self.ce._CompEstimator__get(e).values())
|
||||||
|
|
||||||
|
|
||||||
class CompEstimator:
|
class CompEstimator:
|
||||||
__dict = method._methods | baseline._baselines
|
__dict = method._methods | baseline._baselines
|
||||||
|
|
||||||
def __get(cls, e: str | List[str]):
|
def __get(cls, e: str | List[str]):
|
||||||
if isinstance(e, str):
|
if isinstance(e, str):
|
||||||
try:
|
try:
|
||||||
return (e, cls.__dict[e])
|
return (e, cls.__dict[e])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise KeyError(f"Invalid estimator: estimator {e} does not exist")
|
raise KeyError(f"Invalid estimator: estimator {e} does not exist")
|
||||||
elif isinstance(e, list):
|
elif isinstance(e, list):
|
||||||
_subtr = np.setdiff1d(e, list(cls.__dict.keys()))
|
_subtr = np.setdiff1d(e, list(cls.__dict.keys()))
|
||||||
if len(_subtr) > 0:
|
if len(_subtr) > 0:
|
||||||
raise KeyError(
|
raise KeyError(
|
||||||
f"Invalid estimator: estimator {_subtr[0]} does not exist"
|
f"Invalid estimator: estimator {_subtr[0]} does not exist"
|
||||||
)
|
)
|
||||||
|
|
||||||
e_fun = {k: fun for k, fun in cls.__dict.items() if k in e}
|
e_fun = {k: fun for k, fun in cls.__dict.items() if k in e}
|
||||||
if "ref" not in e:
|
if "ref" not in e:
|
||||||
e_fun["ref"] = cls.__dict["ref"]
|
e_fun["ref"] = cls.__dict["ref"]
|
||||||
|
|
||||||
return e_fun
|
return e_fun
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self):
|
def name(self):
|
||||||
return CompEstimatorName_(self)
|
return CompEstimatorName_(self)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def func(self):
|
def func(self):
|
||||||
return CompEstimatorFunc_(self)
|
return CompEstimatorFunc_(self)
|
||||||
|
|
||||||
|
|
||||||
CE = CompEstimator()
|
CE = CompEstimator()
|
||||||
|
|
||||||
|
|
||||||
def evaluate_comparison(dataset: Dataset, estimators=None) -> EvaluationReport:
|
def evaluate_comparison(dataset: Dataset, estimators=None) -> EvaluationReport:
|
||||||
log = Logger.logger()
|
log = Logger.logger()
|
||||||
# with multiprocessing.Pool(1) as pool:
|
# with multiprocessing.Pool(1) as pool:
|
||||||
with multiprocessing.Pool(len(estimators)) as pool:
|
with multiprocessing.Pool(len(estimators)) as pool:
|
||||||
dr = DatasetReport(dataset.name)
|
dr = DatasetReport(dataset.name)
|
||||||
log.info(f"dataset {dataset.name}")
|
log.info(f"dataset {dataset.name}")
|
||||||
for d in dataset():
|
for d in dataset():
|
||||||
log.info(
|
log.info(
|
||||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} started"
|
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} started"
|
||||||
)
|
)
|
||||||
tstart = time.time()
|
tstart = time.time()
|
||||||
tasks = [
|
tasks = [
|
||||||
(estim, d.train, d.validation, d.test) for estim in CE.func[estimators]
|
(estim, d.train, d.validation, d.test) for estim in CE.func[estimators]
|
||||||
]
|
]
|
||||||
results = [
|
results = [
|
||||||
pool.apply_async(estimate_worker, t, {"_env": env, "q": Logger.queue()})
|
pool.apply_async(estimate_worker, t, {"_env": env, "q": Logger.queue()})
|
||||||
for t in tasks
|
for t in tasks
|
||||||
]
|
]
|
||||||
|
|
||||||
results_got = []
|
results_got = []
|
||||||
for _r in results:
|
for _r in results:
|
||||||
try:
|
try:
|
||||||
r = _r.get()
|
r = _r.get()
|
||||||
if r["result"] is not None:
|
if r["result"] is not None:
|
||||||
results_got.append(r)
|
results_got.append(r)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning(
|
log.warning(
|
||||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
||||||
)
|
)
|
||||||
|
|
||||||
tend = time.time()
|
tend = time.time()
|
||||||
times = {r["name"]: r["time"] for r in results_got}
|
times = {r["name"]: r["time"] for r in results_got}
|
||||||
times["tot"] = tend - tstart
|
times["tot"] = tend - tstart
|
||||||
log.info(
|
log.info(
|
||||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s]"
|
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s]"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
cr = CompReport(
|
cr = CompReport(
|
||||||
[r["result"] for r in results_got],
|
[r["result"] for r in results_got],
|
||||||
name=dataset.name,
|
name=dataset.name,
|
||||||
train_prev=d.train_prev,
|
train_prev=d.train_prev,
|
||||||
valid_prev=d.validation_prev,
|
valid_prev=d.validation_prev,
|
||||||
times=times,
|
times=times,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning(
|
log.warning(
|
||||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
||||||
)
|
)
|
||||||
traceback(e)
|
traceback(e)
|
||||||
cr = None
|
cr = None
|
||||||
dr += cr
|
dr += cr
|
||||||
return dr
|
return dr
|
||||||
|
|
|
||||||
|
|
@ -1,305 +1,305 @@
|
||||||
import inspect
|
import inspect
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from quapy.method.aggregative import PACC, SLD, CC
|
from quapy.method.aggregative import PACC, SLD, CC
|
||||||
from quapy.protocol import UPP, AbstractProtocol
|
from quapy.protocol import UPP, AbstractProtocol
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
import quacc as qc
|
import quacc as qc
|
||||||
from quacc.evaluation.report import EvaluationReport
|
from quacc.evaluation.report import EvaluationReport
|
||||||
from quacc.method.model_selection import BQAEgsq, GridSearchAE, MCAEgsq
|
from quacc.method.model_selection import BQAEgsq, GridSearchAE, MCAEgsq
|
||||||
|
|
||||||
from ..method.base import BQAE, MCAE, BaseAccuracyEstimator
|
from ..method.base import BQAE, MCAE, BaseAccuracyEstimator
|
||||||
|
|
||||||
_methods = {}
|
_methods = {}
|
||||||
_sld_param_grid = {
|
_sld_param_grid = {
|
||||||
"q__classifier__C": np.logspace(-3, 3, 7),
|
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||||
"q__classifier__class_weight": [None, "balanced"],
|
"q__classifier__class_weight": [None, "balanced"],
|
||||||
"q__recalib": [None, "bcts"],
|
"q__recalib": [None, "bcts"],
|
||||||
"q__exact_train_prev": [True],
|
"q__exact_train_prev": [True],
|
||||||
"confidence": [None, "max_conf", "entropy"],
|
"confidence": [None, "max_conf", "entropy"],
|
||||||
}
|
}
|
||||||
_pacc_param_grid = {
|
_pacc_param_grid = {
|
||||||
"q__classifier__C": np.logspace(-3, 3, 7),
|
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||||
"q__classifier__class_weight": [None, "balanced"],
|
"q__classifier__class_weight": [None, "balanced"],
|
||||||
"confidence": [None, "max_conf", "entropy"],
|
"confidence": [None, "max_conf", "entropy"],
|
||||||
}
|
}
|
||||||
def method(func):
|
def method(func):
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
def wrapper(c_model, validation, protocol):
|
def wrapper(c_model, validation, protocol):
|
||||||
return func(c_model, validation, protocol)
|
return func(c_model, validation, protocol)
|
||||||
|
|
||||||
_methods[func.__name__] = wrapper
|
_methods[func.__name__] = wrapper
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
def evaluation_report(
|
def evaluation_report(
|
||||||
estimator: BaseAccuracyEstimator,
|
estimator: BaseAccuracyEstimator,
|
||||||
protocol: AbstractProtocol,
|
protocol: AbstractProtocol,
|
||||||
) -> EvaluationReport:
|
) -> EvaluationReport:
|
||||||
method_name = inspect.stack()[1].function
|
method_name = inspect.stack()[1].function
|
||||||
report = EvaluationReport(name=method_name)
|
report = EvaluationReport(name=method_name)
|
||||||
for sample in protocol():
|
for sample in protocol():
|
||||||
e_sample = estimator.extend(sample)
|
e_sample = estimator.extend(sample)
|
||||||
estim_prev = estimator.estimate(e_sample.X, ext=True)
|
estim_prev = estimator.estimate(e_sample.X, ext=True)
|
||||||
acc_score = qc.error.acc(estim_prev)
|
acc_score = qc.error.acc(estim_prev)
|
||||||
f1_score = qc.error.f1(estim_prev)
|
f1_score = qc.error.f1(estim_prev)
|
||||||
report.append_row(
|
report.append_row(
|
||||||
sample.prevalence(),
|
sample.prevalence(),
|
||||||
acc_score=acc_score,
|
acc_score=acc_score,
|
||||||
acc=abs(qc.error.acc(e_sample.prevalence()) - acc_score),
|
acc=abs(qc.error.acc(e_sample.prevalence()) - acc_score),
|
||||||
f1_score=f1_score,
|
f1_score=f1_score,
|
||||||
f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score),
|
f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score),
|
||||||
)
|
)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def bin_sld(c_model, validation, protocol) -> EvaluationReport:
|
def bin_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = BQAE(c_model, SLD(LogisticRegression())).fit(validation)
|
est = BQAE(c_model, SLD(LogisticRegression())).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def mul_sld(c_model, validation, protocol) -> EvaluationReport:
|
def mul_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = MCAE(c_model, SLD(LogisticRegression())).fit(validation)
|
est = MCAE(c_model, SLD(LogisticRegression())).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def binmc_sld(c_model, validation, protocol) -> EvaluationReport:
|
def binmc_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = BQAE(
|
est = BQAE(
|
||||||
c_model,
|
c_model,
|
||||||
SLD(LogisticRegression()),
|
SLD(LogisticRegression()),
|
||||||
confidence="max_conf",
|
confidence="max_conf",
|
||||||
).fit(validation)
|
).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def mulmc_sld(c_model, validation, protocol) -> EvaluationReport:
|
def mulmc_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = MCAE(
|
est = MCAE(
|
||||||
c_model,
|
c_model,
|
||||||
SLD(LogisticRegression()),
|
SLD(LogisticRegression()),
|
||||||
confidence="max_conf",
|
confidence="max_conf",
|
||||||
).fit(validation)
|
).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def binne_sld(c_model, validation, protocol) -> EvaluationReport:
|
def binne_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = BQAE(
|
est = BQAE(
|
||||||
c_model,
|
c_model,
|
||||||
SLD(LogisticRegression()),
|
SLD(LogisticRegression()),
|
||||||
confidence="entropy",
|
confidence="entropy",
|
||||||
).fit(validation)
|
).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def mulne_sld(c_model, validation, protocol) -> EvaluationReport:
|
def mulne_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = MCAE(
|
est = MCAE(
|
||||||
c_model,
|
c_model,
|
||||||
SLD(LogisticRegression()),
|
SLD(LogisticRegression()),
|
||||||
confidence="entropy",
|
confidence="entropy",
|
||||||
).fit(validation)
|
).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
|
def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||||
model = BQAE(c_model, SLD(LogisticRegression()))
|
model = BQAE(c_model, SLD(LogisticRegression()))
|
||||||
est = GridSearchAE(
|
est = GridSearchAE(
|
||||||
model=model,
|
model=model,
|
||||||
param_grid=_sld_param_grid,
|
param_grid=_sld_param_grid,
|
||||||
refit=False,
|
refit=False,
|
||||||
protocol=UPP(v_val, repeats=100),
|
protocol=UPP(v_val, repeats=100),
|
||||||
verbose=True,
|
verbose=True,
|
||||||
).fit(v_train)
|
).fit(v_train)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def mul_sld_gs(c_model, validation, protocol) -> EvaluationReport:
|
def mul_sld_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||||
model = MCAE(c_model, SLD(LogisticRegression()))
|
model = MCAE(c_model, SLD(LogisticRegression()))
|
||||||
est = GridSearchAE(
|
est = GridSearchAE(
|
||||||
model=model,
|
model=model,
|
||||||
param_grid=_sld_param_grid,
|
param_grid=_sld_param_grid,
|
||||||
refit=False,
|
refit=False,
|
||||||
protocol=UPP(v_val, repeats=100),
|
protocol=UPP(v_val, repeats=100),
|
||||||
verbose=True,
|
verbose=True,
|
||||||
).fit(v_train)
|
).fit(v_train)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def bin_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
|
def bin_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = BQAEgsq(
|
est = BQAEgsq(
|
||||||
c_model,
|
c_model,
|
||||||
SLD(LogisticRegression()),
|
SLD(LogisticRegression()),
|
||||||
param_grid={
|
param_grid={
|
||||||
"classifier__C": np.logspace(-3, 3, 7),
|
"classifier__C": np.logspace(-3, 3, 7),
|
||||||
"classifier__class_weight": [None, "balanced"],
|
"classifier__class_weight": [None, "balanced"],
|
||||||
"recalib": [None, "bcts", "vs"],
|
"recalib": [None, "bcts", "vs"],
|
||||||
},
|
},
|
||||||
refit=False,
|
refit=False,
|
||||||
verbose=False,
|
verbose=False,
|
||||||
).fit(validation)
|
).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def mul_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
|
def mul_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = MCAEgsq(
|
est = MCAEgsq(
|
||||||
c_model,
|
c_model,
|
||||||
SLD(LogisticRegression()),
|
SLD(LogisticRegression()),
|
||||||
param_grid={
|
param_grid={
|
||||||
"classifier__C": np.logspace(-3, 3, 7),
|
"classifier__C": np.logspace(-3, 3, 7),
|
||||||
"classifier__class_weight": [None, "balanced"],
|
"classifier__class_weight": [None, "balanced"],
|
||||||
"recalib": [None, "bcts", "vs"],
|
"recalib": [None, "bcts", "vs"],
|
||||||
},
|
},
|
||||||
refit=False,
|
refit=False,
|
||||||
verbose=False,
|
verbose=False,
|
||||||
).fit(validation)
|
).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def bin_pacc(c_model, validation, protocol) -> EvaluationReport:
|
def bin_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = BQAE(c_model, PACC(LogisticRegression())).fit(validation)
|
est = BQAE(c_model, PACC(LogisticRegression())).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def mul_pacc(c_model, validation, protocol) -> EvaluationReport:
|
def mul_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = MCAE(c_model, PACC(LogisticRegression())).fit(validation)
|
est = MCAE(c_model, PACC(LogisticRegression())).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def binmc_pacc(c_model, validation, protocol) -> EvaluationReport:
|
def binmc_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = BQAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
|
est = BQAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def mulmc_pacc(c_model, validation, protocol) -> EvaluationReport:
|
def mulmc_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = MCAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
|
est = MCAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def binne_pacc(c_model, validation, protocol) -> EvaluationReport:
|
def binne_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = BQAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
|
est = BQAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def mulne_pacc(c_model, validation, protocol) -> EvaluationReport:
|
def mulne_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = MCAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
|
est = MCAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def bin_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
|
def bin_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||||
model = BQAE(c_model, PACC(LogisticRegression()))
|
model = BQAE(c_model, PACC(LogisticRegression()))
|
||||||
est = GridSearchAE(
|
est = GridSearchAE(
|
||||||
model=model,
|
model=model,
|
||||||
param_grid=_pacc_param_grid,
|
param_grid=_pacc_param_grid,
|
||||||
refit=False,
|
refit=False,
|
||||||
protocol=UPP(v_val, repeats=100),
|
protocol=UPP(v_val, repeats=100),
|
||||||
verbose=False,
|
verbose=False,
|
||||||
).fit(v_train)
|
).fit(v_train)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def mul_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
|
def mul_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||||
model = MCAE(c_model, PACC(LogisticRegression()))
|
model = MCAE(c_model, PACC(LogisticRegression()))
|
||||||
est = GridSearchAE(
|
est = GridSearchAE(
|
||||||
model=model,
|
model=model,
|
||||||
param_grid=_pacc_param_grid,
|
param_grid=_pacc_param_grid,
|
||||||
refit=False,
|
refit=False,
|
||||||
protocol=UPP(v_val, repeats=100),
|
protocol=UPP(v_val, repeats=100),
|
||||||
verbose=False,
|
verbose=False,
|
||||||
).fit(v_train)
|
).fit(v_train)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def bin_cc(c_model, validation, protocol) -> EvaluationReport:
|
def bin_cc(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = BQAE(c_model, CC(LogisticRegression())).fit(validation)
|
est = BQAE(c_model, CC(LogisticRegression())).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@method
|
@method
|
||||||
def mul_cc(c_model, validation, protocol) -> EvaluationReport:
|
def mul_cc(c_model, validation, protocol) -> EvaluationReport:
|
||||||
est = MCAE(c_model, CC(LogisticRegression())).fit(validation)
|
est = MCAE(c_model, CC(LogisticRegression())).fit(validation)
|
||||||
return evaluation_report(
|
return evaluation_report(
|
||||||
estimator=est,
|
estimator=est,
|
||||||
protocol=protocol,
|
protocol=protocol,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,44 +1,44 @@
|
||||||
import time
|
import time
|
||||||
from traceback import print_exception as traceback
|
from traceback import print_exception as traceback
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.protocol import APP
|
from quapy.protocol import APP
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
from quacc.logger import SubLogger
|
from quacc.logger import SubLogger
|
||||||
|
|
||||||
|
|
||||||
def estimate_worker(_estimate, train, validation, test, _env=None, q=None):
|
def estimate_worker(_estimate, train, validation, test, _env=None, q=None):
|
||||||
qp.environ["SAMPLE_SIZE"] = _env.SAMPLE_SIZE
|
qp.environ["SAMPLE_SIZE"] = _env.SAMPLE_SIZE
|
||||||
SubLogger.setup(q)
|
SubLogger.setup(q)
|
||||||
log = SubLogger.logger()
|
log = SubLogger.logger()
|
||||||
|
|
||||||
model = LogisticRegression()
|
model = LogisticRegression()
|
||||||
|
|
||||||
model.fit(*train.Xy)
|
model.fit(*train.Xy)
|
||||||
protocol = APP(
|
protocol = APP(
|
||||||
test,
|
test,
|
||||||
n_prevalences=_env.PROTOCOL_N_PREVS,
|
n_prevalences=_env.PROTOCOL_N_PREVS,
|
||||||
repeats=_env.PROTOCOL_REPEATS,
|
repeats=_env.PROTOCOL_REPEATS,
|
||||||
return_type="labelled_collection",
|
return_type="labelled_collection",
|
||||||
)
|
)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
try:
|
try:
|
||||||
result = _estimate(model, validation, protocol)
|
result = _estimate(model, validation, protocol)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning(f"Method {_estimate.__name__} failed. Exception: {e}")
|
log.warning(f"Method {_estimate.__name__} failed. Exception: {e}")
|
||||||
traceback(e)
|
traceback(e)
|
||||||
return {
|
return {
|
||||||
"name": _estimate.__name__,
|
"name": _estimate.__name__,
|
||||||
"result": None,
|
"result": None,
|
||||||
"time": 0,
|
"time": 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
|
log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"name": _estimate.__name__,
|
"name": _estimate.__name__,
|
||||||
"result": result,
|
"result": result,
|
||||||
"time": end - start,
|
"time": end - start,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
272
quacc/logger.py
272
quacc/logger.py
|
|
@ -1,136 +1,136 @@
|
||||||
import logging
|
import logging
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import threading
|
import threading
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
class Logger:
|
class Logger:
|
||||||
__logger_file = "quacc.log"
|
__logger_file = "quacc.log"
|
||||||
__logger_name = "queue_logger"
|
__logger_name = "queue_logger"
|
||||||
__manager = None
|
__manager = None
|
||||||
__queue = None
|
__queue = None
|
||||||
__thread = None
|
__thread = None
|
||||||
__setup = False
|
__setup = False
|
||||||
__handlers = []
|
__handlers = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __logger_listener(cls, q):
|
def __logger_listener(cls, q):
|
||||||
while True:
|
while True:
|
||||||
record = q.get()
|
record = q.get()
|
||||||
if record is None:
|
if record is None:
|
||||||
break
|
break
|
||||||
root = logging.getLogger("listener")
|
root = logging.getLogger("listener")
|
||||||
root.handle(record)
|
root.handle(record)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup(cls):
|
def setup(cls):
|
||||||
if cls.__setup:
|
if cls.__setup:
|
||||||
return
|
return
|
||||||
|
|
||||||
# setup root
|
# setup root
|
||||||
root = logging.getLogger("listener")
|
root = logging.getLogger("listener")
|
||||||
root.setLevel(logging.DEBUG)
|
root.setLevel(logging.DEBUG)
|
||||||
rh = logging.FileHandler(cls.__logger_file, mode="a")
|
rh = logging.FileHandler(cls.__logger_file, mode="a")
|
||||||
rh.setLevel(logging.DEBUG)
|
rh.setLevel(logging.DEBUG)
|
||||||
root.addHandler(rh)
|
root.addHandler(rh)
|
||||||
|
|
||||||
# setup logger
|
# setup logger
|
||||||
if cls.__manager is None:
|
if cls.__manager is None:
|
||||||
cls.__manager = multiprocessing.Manager()
|
cls.__manager = multiprocessing.Manager()
|
||||||
|
|
||||||
if cls.__queue is None:
|
if cls.__queue is None:
|
||||||
cls.__queue = cls.__manager.Queue()
|
cls.__queue = cls.__manager.Queue()
|
||||||
|
|
||||||
logger = logging.getLogger(cls.__logger_name)
|
logger = logging.getLogger(cls.__logger_name)
|
||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.DEBUG)
|
||||||
qh = logging.handlers.QueueHandler(cls.__queue)
|
qh = logging.handlers.QueueHandler(cls.__queue)
|
||||||
qh.setLevel(logging.DEBUG)
|
qh.setLevel(logging.DEBUG)
|
||||||
qh.setFormatter(
|
qh.setFormatter(
|
||||||
logging.Formatter(
|
logging.Formatter(
|
||||||
fmt="%(asctime)s| %(levelname)-8s %(message)s",
|
fmt="%(asctime)s| %(levelname)-8s %(message)s",
|
||||||
datefmt="%d/%m/%y %H:%M:%S",
|
datefmt="%d/%m/%y %H:%M:%S",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
logger.addHandler(qh)
|
logger.addHandler(qh)
|
||||||
|
|
||||||
# start listener
|
# start listener
|
||||||
cls.__thread = threading.Thread(
|
cls.__thread = threading.Thread(
|
||||||
target=cls.__logger_listener,
|
target=cls.__logger_listener,
|
||||||
args=(cls.__queue,),
|
args=(cls.__queue,),
|
||||||
)
|
)
|
||||||
cls.__thread.start()
|
cls.__thread.start()
|
||||||
|
|
||||||
cls.__setup = True
|
cls.__setup = True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def add_handler(cls, path: Path):
|
def add_handler(cls, path: Path):
|
||||||
root = logging.getLogger("listener")
|
root = logging.getLogger("listener")
|
||||||
rh = logging.FileHandler(path, mode="a")
|
rh = logging.FileHandler(path, mode="a")
|
||||||
rh.setLevel(logging.DEBUG)
|
rh.setLevel(logging.DEBUG)
|
||||||
cls.__handlers.append(rh)
|
cls.__handlers.append(rh)
|
||||||
root.addHandler(rh)
|
root.addHandler(rh)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def clear_handlers(cls):
|
def clear_handlers(cls):
|
||||||
root = logging.getLogger("listener")
|
root = logging.getLogger("listener")
|
||||||
for h in cls.__handlers:
|
for h in cls.__handlers:
|
||||||
root.removeHandler(h)
|
root.removeHandler(h)
|
||||||
cls.__handlers.clear()
|
cls.__handlers.clear()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def queue(cls):
|
def queue(cls):
|
||||||
if not cls.__setup:
|
if not cls.__setup:
|
||||||
cls.setup()
|
cls.setup()
|
||||||
|
|
||||||
return cls.__queue
|
return cls.__queue
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def logger(cls):
|
def logger(cls):
|
||||||
if not cls.__setup:
|
if not cls.__setup:
|
||||||
cls.setup()
|
cls.setup()
|
||||||
|
|
||||||
return logging.getLogger(cls.__logger_name)
|
return logging.getLogger(cls.__logger_name)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def close(cls):
|
def close(cls):
|
||||||
if cls.__setup and cls.__thread is not None:
|
if cls.__setup and cls.__thread is not None:
|
||||||
root = logging.getLogger("listener")
|
root = logging.getLogger("listener")
|
||||||
root.info("-" * 100)
|
root.info("-" * 100)
|
||||||
cls.__queue.put(None)
|
cls.__queue.put(None)
|
||||||
cls.__thread.join()
|
cls.__thread.join()
|
||||||
# cls.__manager.close()
|
# cls.__manager.close()
|
||||||
|
|
||||||
|
|
||||||
class SubLogger:
|
class SubLogger:
|
||||||
__queue = None
|
__queue = None
|
||||||
__setup = False
|
__setup = False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup(cls, q):
|
def setup(cls, q):
|
||||||
if cls.__setup:
|
if cls.__setup:
|
||||||
return
|
return
|
||||||
|
|
||||||
cls.__queue = q
|
cls.__queue = q
|
||||||
|
|
||||||
# setup root
|
# setup root
|
||||||
root = logging.getLogger()
|
root = logging.getLogger()
|
||||||
root.setLevel(logging.DEBUG)
|
root.setLevel(logging.DEBUG)
|
||||||
rh = logging.handlers.QueueHandler(q)
|
rh = logging.handlers.QueueHandler(q)
|
||||||
rh.setLevel(logging.DEBUG)
|
rh.setLevel(logging.DEBUG)
|
||||||
rh.setFormatter(
|
rh.setFormatter(
|
||||||
logging.Formatter(
|
logging.Formatter(
|
||||||
fmt="%(asctime)s| %(levelname)-12s%(message)s",
|
fmt="%(asctime)s| %(levelname)-12s%(message)s",
|
||||||
datefmt="%d/%m/%y %H:%M:%S",
|
datefmt="%d/%m/%y %H:%M:%S",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
root.addHandler(rh)
|
root.addHandler(rh)
|
||||||
|
|
||||||
cls.__setup = True
|
cls.__setup = True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def logger(cls):
|
def logger(cls):
|
||||||
if not cls.__setup:
|
if not cls.__setup:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return logging.getLogger()
|
return logging.getLogger()
|
||||||
|
|
|
||||||
150
quacc/main.py
150
quacc/main.py
|
|
@ -1,75 +1,75 @@
|
||||||
from sys import platform
|
from sys import platform
|
||||||
from traceback import print_exception as traceback
|
from traceback import print_exception as traceback
|
||||||
|
|
||||||
import quacc.evaluation.comp as comp
|
import quacc.evaluation.comp as comp
|
||||||
from quacc.dataset import Dataset
|
from quacc.dataset import Dataset
|
||||||
from quacc.environment import env
|
from quacc.environment import env
|
||||||
from quacc.logger import Logger
|
from quacc.logger import Logger
|
||||||
from quacc.utils import create_dataser_dir
|
from quacc.utils import create_dataser_dir
|
||||||
|
|
||||||
CE = comp.CompEstimator()
|
CE = comp.CompEstimator()
|
||||||
|
|
||||||
|
|
||||||
def toast():
|
def toast():
|
||||||
if platform == "win32":
|
if platform == "win32":
|
||||||
import win11toast
|
import win11toast
|
||||||
|
|
||||||
win11toast.notify("Comp", "Completed Execution")
|
win11toast.notify("Comp", "Completed Execution")
|
||||||
|
|
||||||
|
|
||||||
def estimate_comparison():
|
def estimate_comparison():
|
||||||
log = Logger.logger()
|
log = Logger.logger()
|
||||||
for conf in env.get_confs():
|
for conf in env.get_confs():
|
||||||
dataset = Dataset(
|
dataset = Dataset(
|
||||||
env.DATASET_NAME,
|
env.DATASET_NAME,
|
||||||
target=env.DATASET_TARGET,
|
target=env.DATASET_TARGET,
|
||||||
n_prevalences=env.DATASET_N_PREVS,
|
n_prevalences=env.DATASET_N_PREVS,
|
||||||
prevs=env.DATASET_PREVS,
|
prevs=env.DATASET_PREVS,
|
||||||
)
|
)
|
||||||
create_dataser_dir(dataset.name, update=env.DATASET_DIR_UPDATE)
|
create_dataser_dir(dataset.name, update=env.DATASET_DIR_UPDATE)
|
||||||
Logger.add_handler(env.OUT_DIR / f"{dataset.name}.log")
|
Logger.add_handler(env.OUT_DIR / f"{dataset.name}.log")
|
||||||
try:
|
try:
|
||||||
dr = comp.evaluate_comparison(
|
dr = comp.evaluate_comparison(
|
||||||
dataset,
|
dataset,
|
||||||
estimators=CE.name[env.COMP_ESTIMATORS],
|
estimators=CE.name[env.COMP_ESTIMATORS],
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"Evaluation over {dataset.name} failed. Exception: {e}")
|
log.error(f"Evaluation over {dataset.name} failed. Exception: {e}")
|
||||||
traceback(e)
|
traceback(e)
|
||||||
for plot_conf in env.get_plot_confs():
|
for plot_conf in env.get_plot_confs():
|
||||||
for m in env.METRICS:
|
for m in env.METRICS:
|
||||||
output_path = env.OUT_DIR / f"{plot_conf}_{m}.md"
|
output_path = env.OUT_DIR / f"{plot_conf}_{m}.md"
|
||||||
try:
|
try:
|
||||||
_repr = dr.to_md(
|
_repr = dr.to_md(
|
||||||
conf=plot_conf,
|
conf=plot_conf,
|
||||||
metric=m,
|
metric=m,
|
||||||
estimators=CE.name[env.PLOT_ESTIMATORS],
|
estimators=CE.name[env.PLOT_ESTIMATORS],
|
||||||
stdev=env.PLOT_STDEV,
|
stdev=env.PLOT_STDEV,
|
||||||
)
|
)
|
||||||
with open(output_path, "w") as f:
|
with open(output_path, "w") as f:
|
||||||
f.write(_repr)
|
f.write(_repr)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(
|
log.error(
|
||||||
f"Failed while saving configuration {plot_conf} of {dataset.name}. Exception: {e}"
|
f"Failed while saving configuration {plot_conf} of {dataset.name}. Exception: {e}"
|
||||||
)
|
)
|
||||||
traceback(e)
|
traceback(e)
|
||||||
Logger.clear_handlers()
|
Logger.clear_handlers()
|
||||||
|
|
||||||
# print(df.to_latex(float_format="{:.4f}".format))
|
# print(df.to_latex(float_format="{:.4f}".format))
|
||||||
# print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))
|
# print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
log = Logger.logger()
|
log = Logger.logger()
|
||||||
try:
|
try:
|
||||||
estimate_comparison()
|
estimate_comparison()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"estimate comparison failed. Exceprion: {e}")
|
log.error(f"estimate comparison failed. Exceprion: {e}")
|
||||||
traceback(e)
|
traceback(e)
|
||||||
|
|
||||||
toast()
|
toast()
|
||||||
Logger.close()
|
Logger.close()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
|
|
@ -1,120 +1,120 @@
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import win11toast
|
import win11toast
|
||||||
from quapy.method.aggregative import SLD
|
from quapy.method.aggregative import SLD
|
||||||
from quapy.protocol import APP, UPP
|
from quapy.protocol import APP, UPP
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
import quacc as qc
|
import quacc as qc
|
||||||
from quacc.dataset import Dataset
|
from quacc.dataset import Dataset
|
||||||
from quacc.error import acc
|
from quacc.error import acc
|
||||||
from quacc.evaluation.baseline import ref
|
from quacc.evaluation.baseline import ref
|
||||||
from quacc.evaluation.method import mulmc_sld
|
from quacc.evaluation.method import mulmc_sld
|
||||||
from quacc.evaluation.report import CompReport, EvaluationReport
|
from quacc.evaluation.report import CompReport, EvaluationReport
|
||||||
from quacc.method.base import MCAE, BinaryQuantifierAccuracyEstimator
|
from quacc.method.base import MCAE, BinaryQuantifierAccuracyEstimator
|
||||||
from quacc.method.model_selection import GridSearchAE
|
from quacc.method.model_selection import GridSearchAE
|
||||||
|
|
||||||
|
|
||||||
def test_gs():
|
def test_gs():
|
||||||
d = Dataset(name="rcv1", target="CCAT", n_prevalences=1).get_raw()
|
d = Dataset(name="rcv1", target="CCAT", n_prevalences=1).get_raw()
|
||||||
|
|
||||||
classifier = LogisticRegression()
|
classifier = LogisticRegression()
|
||||||
classifier.fit(*d.train.Xy)
|
classifier.fit(*d.train.Xy)
|
||||||
|
|
||||||
quantifier = SLD(LogisticRegression())
|
quantifier = SLD(LogisticRegression())
|
||||||
# estimator = MultiClassAccuracyEstimator(classifier, quantifier)
|
# estimator = MultiClassAccuracyEstimator(classifier, quantifier)
|
||||||
estimator = BinaryQuantifierAccuracyEstimator(classifier, quantifier)
|
estimator = BinaryQuantifierAccuracyEstimator(classifier, quantifier)
|
||||||
|
|
||||||
v_train, v_val = d.validation.split_stratified(0.6, random_state=0)
|
v_train, v_val = d.validation.split_stratified(0.6, random_state=0)
|
||||||
gs_protocol = UPP(v_val, sample_size=1000, repeats=100)
|
gs_protocol = UPP(v_val, sample_size=1000, repeats=100)
|
||||||
gs_estimator = GridSearchAE(
|
gs_estimator = GridSearchAE(
|
||||||
model=deepcopy(estimator),
|
model=deepcopy(estimator),
|
||||||
param_grid={
|
param_grid={
|
||||||
"q__classifier__C": np.logspace(-3, 3, 7),
|
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||||
"q__classifier__class_weight": [None, "balanced"],
|
"q__classifier__class_weight": [None, "balanced"],
|
||||||
"q__recalib": [None, "bcts", "ts"],
|
"q__recalib": [None, "bcts", "ts"],
|
||||||
},
|
},
|
||||||
refit=False,
|
refit=False,
|
||||||
protocol=gs_protocol,
|
protocol=gs_protocol,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
).fit(v_train)
|
).fit(v_train)
|
||||||
|
|
||||||
estimator.fit(d.validation)
|
estimator.fit(d.validation)
|
||||||
|
|
||||||
tstart = time()
|
tstart = time()
|
||||||
erb, ergs = EvaluationReport("base"), EvaluationReport("gs")
|
erb, ergs = EvaluationReport("base"), EvaluationReport("gs")
|
||||||
protocol = APP(
|
protocol = APP(
|
||||||
d.test,
|
d.test,
|
||||||
sample_size=1000,
|
sample_size=1000,
|
||||||
n_prevalences=21,
|
n_prevalences=21,
|
||||||
repeats=100,
|
repeats=100,
|
||||||
return_type="labelled_collection",
|
return_type="labelled_collection",
|
||||||
)
|
)
|
||||||
for sample in protocol():
|
for sample in protocol():
|
||||||
e_sample = gs_estimator.extend(sample)
|
e_sample = gs_estimator.extend(sample)
|
||||||
estim_prev_b = estimator.estimate(e_sample.X, ext=True)
|
estim_prev_b = estimator.estimate(e_sample.X, ext=True)
|
||||||
estim_prev_gs = gs_estimator.estimate(e_sample.X, ext=True)
|
estim_prev_gs = gs_estimator.estimate(e_sample.X, ext=True)
|
||||||
erb.append_row(
|
erb.append_row(
|
||||||
sample.prevalence(),
|
sample.prevalence(),
|
||||||
acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_b)),
|
acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_b)),
|
||||||
)
|
)
|
||||||
ergs.append_row(
|
ergs.append_row(
|
||||||
sample.prevalence(),
|
sample.prevalence(),
|
||||||
acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_gs)),
|
acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_gs)),
|
||||||
)
|
)
|
||||||
|
|
||||||
cr = CompReport(
|
cr = CompReport(
|
||||||
[erb, ergs],
|
[erb, ergs],
|
||||||
"test",
|
"test",
|
||||||
train_prev=d.train_prev,
|
train_prev=d.train_prev,
|
||||||
valid_prev=d.validation_prev,
|
valid_prev=d.validation_prev,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(cr.table())
|
print(cr.table())
|
||||||
print(f"[took {time() - tstart:.3f}s]")
|
print(f"[took {time() - tstart:.3f}s]")
|
||||||
win11toast.notify("Test", "completed")
|
win11toast.notify("Test", "completed")
|
||||||
|
|
||||||
|
|
||||||
def test_mc():
|
def test_mc():
|
||||||
d = Dataset(name="rcv1", target="CCAT", prevs=[0.9]).get()[0]
|
d = Dataset(name="rcv1", target="CCAT", prevs=[0.9]).get()[0]
|
||||||
classifier = LogisticRegression().fit(*d.train.Xy)
|
classifier = LogisticRegression().fit(*d.train.Xy)
|
||||||
protocol = APP(
|
protocol = APP(
|
||||||
d.test,
|
d.test,
|
||||||
sample_size=1000,
|
sample_size=1000,
|
||||||
repeats=100,
|
repeats=100,
|
||||||
n_prevalences=21,
|
n_prevalences=21,
|
||||||
return_type="labelled_collection",
|
return_type="labelled_collection",
|
||||||
)
|
)
|
||||||
|
|
||||||
ref_er = ref(classifier, d.validation, protocol)
|
ref_er = ref(classifier, d.validation, protocol)
|
||||||
mulmc_er = mulmc_sld(classifier, d.validation, protocol)
|
mulmc_er = mulmc_sld(classifier, d.validation, protocol)
|
||||||
|
|
||||||
cr = CompReport(
|
cr = CompReport(
|
||||||
[mulmc_er, ref_er],
|
[mulmc_er, ref_er],
|
||||||
name="test_mc",
|
name="test_mc",
|
||||||
train_prev=d.train_prev,
|
train_prev=d.train_prev,
|
||||||
valid_prev=d.validation_prev,
|
valid_prev=d.validation_prev,
|
||||||
)
|
)
|
||||||
|
|
||||||
with open("test_mc.md", "w") as f:
|
with open("test_mc.md", "w") as f:
|
||||||
f.write(cr.data().to_markdown())
|
f.write(cr.data().to_markdown())
|
||||||
|
|
||||||
|
|
||||||
def test_et():
|
def test_et():
|
||||||
d = Dataset(name="imdb", prevs=[0.5]).get()[0]
|
d = Dataset(name="imdb", prevs=[0.5]).get()[0]
|
||||||
classifier = LogisticRegression().fit(*d.train.Xy)
|
classifier = LogisticRegression().fit(*d.train.Xy)
|
||||||
estimator = MCAE(
|
estimator = MCAE(
|
||||||
classifier,
|
classifier,
|
||||||
SLD(LogisticRegression(), exact_train_prev=False),
|
SLD(LogisticRegression(), exact_train_prev=False),
|
||||||
confidence="max_conf",
|
confidence="max_conf",
|
||||||
).fit(d.validation)
|
).fit(d.validation)
|
||||||
e_test = estimator.extend(d.test)
|
e_test = estimator.extend(d.test)
|
||||||
ep = estimator.estimate(e_test.X, ext=True)
|
ep = estimator.estimate(e_test.X, ext=True)
|
||||||
print(f"{qc.error.acc(ep) = }")
|
print(f"{qc.error.acc(ep) = }")
|
||||||
print(f"{qc.error.acc(e_test.prevalence()) = }")
|
print(f"{qc.error.acc(e_test.prevalence()) = }")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_et()
|
test_et()
|
||||||
|
|
|
||||||
|
|
@ -1,177 +1,177 @@
|
||||||
import math
|
import math
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.method.aggregative import BaseQuantifier
|
from quapy.method.aggregative import BaseQuantifier
|
||||||
from scipy.sparse import csr_matrix
|
from scipy.sparse import csr_matrix
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
|
|
||||||
from quacc.data import ExtendedCollection
|
from quacc.data import ExtendedCollection
|
||||||
|
|
||||||
|
|
||||||
class BaseAccuracyEstimator(BaseQuantifier):
|
class BaseAccuracyEstimator(BaseQuantifier):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator,
|
||||||
quantifier: BaseQuantifier,
|
quantifier: BaseQuantifier,
|
||||||
confidence=None,
|
confidence=None,
|
||||||
):
|
):
|
||||||
self.__check_classifier(classifier)
|
self.__check_classifier(classifier)
|
||||||
self.quantifier = quantifier
|
self.quantifier = quantifier
|
||||||
self.confidence = confidence
|
self.confidence = confidence
|
||||||
|
|
||||||
def __check_classifier(self, classifier):
|
def __check_classifier(self, classifier):
|
||||||
if not hasattr(classifier, "predict_proba"):
|
if not hasattr(classifier, "predict_proba"):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Passed classifier {classifier.__class__.__name__} cannot predict probabilities."
|
f"Passed classifier {classifier.__class__.__name__} cannot predict probabilities."
|
||||||
)
|
)
|
||||||
self.classifier = classifier
|
self.classifier = classifier
|
||||||
|
|
||||||
def __get_confidence(self):
|
def __get_confidence(self):
|
||||||
def max_conf(probas):
|
def max_conf(probas):
|
||||||
_mc = np.max(probas, axis=-1)
|
_mc = np.max(probas, axis=-1)
|
||||||
_min = 1.0 / probas.shape[1]
|
_min = 1.0 / probas.shape[1]
|
||||||
_norm_mc = (_mc - _min) / (1.0 - _min)
|
_norm_mc = (_mc - _min) / (1.0 - _min)
|
||||||
return _norm_mc
|
return _norm_mc
|
||||||
|
|
||||||
def entropy(probas):
|
def entropy(probas):
|
||||||
_ent = np.sum(np.multiply(probas, np.log(probas + 1e-20)), axis=1)
|
_ent = np.sum(np.multiply(probas, np.log(probas + 1e-20)), axis=1)
|
||||||
return _ent
|
return _ent
|
||||||
|
|
||||||
if self.confidence is None:
|
if self.confidence is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
__confs = {
|
__confs = {
|
||||||
"max_conf": max_conf,
|
"max_conf": max_conf,
|
||||||
"entropy": entropy,
|
"entropy": entropy,
|
||||||
}
|
}
|
||||||
return __confs.get(self.confidence, None)
|
return __confs.get(self.confidence, None)
|
||||||
|
|
||||||
def __get_ext(self, pred_proba):
|
def __get_ext(self, pred_proba):
|
||||||
_ext = pred_proba
|
_ext = pred_proba
|
||||||
_f_conf = self.__get_confidence()
|
_f_conf = self.__get_confidence()
|
||||||
if _f_conf is not None:
|
if _f_conf is not None:
|
||||||
_confs = _f_conf(pred_proba).reshape((len(pred_proba), 1))
|
_confs = _f_conf(pred_proba).reshape((len(pred_proba), 1))
|
||||||
_ext = np.concatenate((_confs, pred_proba), axis=1)
|
_ext = np.concatenate((_confs, pred_proba), axis=1)
|
||||||
|
|
||||||
return _ext
|
return _ext
|
||||||
|
|
||||||
def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
||||||
if pred_proba is None:
|
if pred_proba is None:
|
||||||
pred_proba = self.classifier.predict_proba(coll.X)
|
pred_proba = self.classifier.predict_proba(coll.X)
|
||||||
|
|
||||||
_ext = self.__get_ext(pred_proba)
|
_ext = self.__get_ext(pred_proba)
|
||||||
return ExtendedCollection.extend_collection(coll, pred_proba=_ext)
|
return ExtendedCollection.extend_collection(coll, pred_proba=_ext)
|
||||||
|
|
||||||
def _extend_instances(self, instances: np.ndarray | csr_matrix, pred_proba=None):
|
def _extend_instances(self, instances: np.ndarray | csr_matrix, pred_proba=None):
|
||||||
if pred_proba is None:
|
if pred_proba is None:
|
||||||
pred_proba = self.classifier.predict_proba(instances)
|
pred_proba = self.classifier.predict_proba(instances)
|
||||||
|
|
||||||
_ext = self.__get_ext(pred_proba)
|
_ext = self.__get_ext(pred_proba)
|
||||||
return ExtendedCollection.extend_instances(instances, _ext)
|
return ExtendedCollection.extend_instances(instances, _ext)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def fit(self, train: LabelledCollection | ExtendedCollection):
|
def fit(self, train: LabelledCollection | ExtendedCollection):
|
||||||
...
|
...
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def estimate(self, instances, ext=False) -> np.ndarray:
|
def estimate(self, instances, ext=False) -> np.ndarray:
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
class MultiClassAccuracyEstimator(BaseAccuracyEstimator):
|
class MultiClassAccuracyEstimator(BaseAccuracyEstimator):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator,
|
||||||
quantifier: BaseQuantifier,
|
quantifier: BaseQuantifier,
|
||||||
confidence: str = None,
|
confidence: str = None,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
classifier=classifier,
|
classifier=classifier,
|
||||||
quantifier=quantifier,
|
quantifier=quantifier,
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
)
|
)
|
||||||
self.e_train = None
|
self.e_train = None
|
||||||
|
|
||||||
def fit(self, train: LabelledCollection):
|
def fit(self, train: LabelledCollection):
|
||||||
self.e_train = self.extend(train)
|
self.e_train = self.extend(train)
|
||||||
|
|
||||||
self.quantifier.fit(self.e_train)
|
self.quantifier.fit(self.e_train)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def estimate(self, instances, ext=False) -> np.ndarray:
|
def estimate(self, instances, ext=False) -> np.ndarray:
|
||||||
e_inst = instances if ext else self._extend_instances(instances)
|
e_inst = instances if ext else self._extend_instances(instances)
|
||||||
|
|
||||||
estim_prev = self.quantifier.quantify(e_inst)
|
estim_prev = self.quantifier.quantify(e_inst)
|
||||||
return self._check_prevalence_classes(estim_prev, self.quantifier.classes_)
|
return self._check_prevalence_classes(estim_prev, self.quantifier.classes_)
|
||||||
|
|
||||||
def _check_prevalence_classes(self, estim_prev, estim_classes) -> np.ndarray:
|
def _check_prevalence_classes(self, estim_prev, estim_classes) -> np.ndarray:
|
||||||
true_classes = self.e_train.classes_
|
true_classes = self.e_train.classes_
|
||||||
for _cls in true_classes:
|
for _cls in true_classes:
|
||||||
if _cls not in estim_classes:
|
if _cls not in estim_classes:
|
||||||
estim_prev = np.insert(estim_prev, _cls, [0.0], axis=0)
|
estim_prev = np.insert(estim_prev, _cls, [0.0], axis=0)
|
||||||
return estim_prev
|
return estim_prev
|
||||||
|
|
||||||
|
|
||||||
class BinaryQuantifierAccuracyEstimator(BaseAccuracyEstimator):
|
class BinaryQuantifierAccuracyEstimator(BaseAccuracyEstimator):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator,
|
||||||
quantifier: BaseAccuracyEstimator,
|
quantifier: BaseAccuracyEstimator,
|
||||||
confidence: str = None,
|
confidence: str = None,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
classifier=classifier,
|
classifier=classifier,
|
||||||
quantifier=quantifier,
|
quantifier=quantifier,
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
)
|
)
|
||||||
self.quantifiers = []
|
self.quantifiers = []
|
||||||
self.e_trains = []
|
self.e_trains = []
|
||||||
|
|
||||||
def fit(self, train: LabelledCollection | ExtendedCollection):
|
def fit(self, train: LabelledCollection | ExtendedCollection):
|
||||||
self.e_train = self.extend(train)
|
self.e_train = self.extend(train)
|
||||||
|
|
||||||
self.n_classes = self.e_train.n_classes
|
self.n_classes = self.e_train.n_classes
|
||||||
self.e_trains = self.e_train.split_by_pred()
|
self.e_trains = self.e_train.split_by_pred()
|
||||||
|
|
||||||
self.quantifiers = []
|
self.quantifiers = []
|
||||||
for train in self.e_trains:
|
for train in self.e_trains:
|
||||||
quant = deepcopy(self.quantifier)
|
quant = deepcopy(self.quantifier)
|
||||||
quant.fit(train)
|
quant.fit(train)
|
||||||
self.quantifiers.append(quant)
|
self.quantifiers.append(quant)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def estimate(self, instances, ext=False):
|
def estimate(self, instances, ext=False):
|
||||||
# TODO: test
|
# TODO: test
|
||||||
e_inst = instances if ext else self._extend_instances(instances)
|
e_inst = instances if ext else self._extend_instances(instances)
|
||||||
|
|
||||||
_ncl = int(math.sqrt(self.n_classes))
|
_ncl = int(math.sqrt(self.n_classes))
|
||||||
s_inst, norms = ExtendedCollection.split_inst_by_pred(_ncl, e_inst)
|
s_inst, norms = ExtendedCollection.split_inst_by_pred(_ncl, e_inst)
|
||||||
estim_prevs = self._quantify_helper(s_inst, norms)
|
estim_prevs = self._quantify_helper(s_inst, norms)
|
||||||
|
|
||||||
estim_prev = np.array([prev_row for prev_row in zip(*estim_prevs)]).flatten()
|
estim_prev = np.array([prev_row for prev_row in zip(*estim_prevs)]).flatten()
|
||||||
return estim_prev
|
return estim_prev
|
||||||
|
|
||||||
def _quantify_helper(
|
def _quantify_helper(
|
||||||
self,
|
self,
|
||||||
s_inst: List[np.ndarray | csr_matrix],
|
s_inst: List[np.ndarray | csr_matrix],
|
||||||
norms: List[float],
|
norms: List[float],
|
||||||
):
|
):
|
||||||
estim_prevs = []
|
estim_prevs = []
|
||||||
for quant, inst, norm in zip(self.quantifiers, s_inst, norms):
|
for quant, inst, norm in zip(self.quantifiers, s_inst, norms):
|
||||||
if inst.shape[0] > 0:
|
if inst.shape[0] > 0:
|
||||||
estim_prevs.append(quant.quantify(inst) * norm)
|
estim_prevs.append(quant.quantify(inst) * norm)
|
||||||
else:
|
else:
|
||||||
estim_prevs.append(np.asarray([0.0, 0.0]))
|
estim_prevs.append(np.asarray([0.0, 0.0]))
|
||||||
|
|
||||||
return estim_prevs
|
return estim_prevs
|
||||||
|
|
||||||
|
|
||||||
BAE = BaseAccuracyEstimator
|
BAE = BaseAccuracyEstimator
|
||||||
MCAE = MultiClassAccuracyEstimator
|
MCAE = MultiClassAccuracyEstimator
|
||||||
BQAE = BinaryQuantifierAccuracyEstimator
|
BQAE = BinaryQuantifierAccuracyEstimator
|
||||||
|
|
|
||||||
|
|
@ -1,307 +1,307 @@
|
||||||
import itertools
|
import itertools
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from time import time
|
from time import time
|
||||||
from typing import Callable, Union
|
from typing import Callable, Union
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.model_selection import GridSearchQ
|
from quapy.model_selection import GridSearchQ
|
||||||
from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
|
from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
|
|
||||||
import quacc as qc
|
import quacc as qc
|
||||||
import quacc.error
|
import quacc.error
|
||||||
from quacc.data import ExtendedCollection
|
from quacc.data import ExtendedCollection
|
||||||
from quacc.evaluation import evaluate
|
from quacc.evaluation import evaluate
|
||||||
from quacc.logger import SubLogger
|
from quacc.logger import SubLogger
|
||||||
from quacc.method.base import (
|
from quacc.method.base import (
|
||||||
BaseAccuracyEstimator,
|
BaseAccuracyEstimator,
|
||||||
BinaryQuantifierAccuracyEstimator,
|
BinaryQuantifierAccuracyEstimator,
|
||||||
MultiClassAccuracyEstimator,
|
MultiClassAccuracyEstimator,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GridSearchAE(BaseAccuracyEstimator):
|
class GridSearchAE(BaseAccuracyEstimator):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
model: BaseAccuracyEstimator,
|
model: BaseAccuracyEstimator,
|
||||||
param_grid: dict,
|
param_grid: dict,
|
||||||
protocol: AbstractProtocol,
|
protocol: AbstractProtocol,
|
||||||
error: Union[Callable, str] = qc.error.maccd,
|
error: Union[Callable, str] = qc.error.maccd,
|
||||||
refit=True,
|
refit=True,
|
||||||
# timeout=-1,
|
# timeout=-1,
|
||||||
# n_jobs=None,
|
# n_jobs=None,
|
||||||
verbose=False,
|
verbose=False,
|
||||||
):
|
):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.param_grid = self.__normalize_params(param_grid)
|
self.param_grid = self.__normalize_params(param_grid)
|
||||||
self.protocol = protocol
|
self.protocol = protocol
|
||||||
self.refit = refit
|
self.refit = refit
|
||||||
# self.timeout = timeout
|
# self.timeout = timeout
|
||||||
# self.n_jobs = qp._get_njobs(n_jobs)
|
# self.n_jobs = qp._get_njobs(n_jobs)
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.__check_error(error)
|
self.__check_error(error)
|
||||||
assert isinstance(protocol, AbstractProtocol), "unknown protocol"
|
assert isinstance(protocol, AbstractProtocol), "unknown protocol"
|
||||||
|
|
||||||
def _sout(self, msg):
|
def _sout(self, msg):
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print(f"[{self.__class__.__name__}]: {msg}")
|
print(f"[{self.__class__.__name__}]: {msg}")
|
||||||
|
|
||||||
def __normalize_params(self, params):
|
def __normalize_params(self, params):
|
||||||
__remap = {}
|
__remap = {}
|
||||||
for key in params.keys():
|
for key in params.keys():
|
||||||
k, delim, sub_key = key.partition("__")
|
k, delim, sub_key = key.partition("__")
|
||||||
if delim and k == "q":
|
if delim and k == "q":
|
||||||
__remap[key] = f"quantifier__{sub_key}"
|
__remap[key] = f"quantifier__{sub_key}"
|
||||||
|
|
||||||
return {(__remap[k] if k in __remap else k): v for k, v in params.items()}
|
return {(__remap[k] if k in __remap else k): v for k, v in params.items()}
|
||||||
|
|
||||||
def __check_error(self, error):
|
def __check_error(self, error):
|
||||||
if error in qc.error.ACCURACY_ERROR:
|
if error in qc.error.ACCURACY_ERROR:
|
||||||
self.error = error
|
self.error = error
|
||||||
elif isinstance(error, str):
|
elif isinstance(error, str):
|
||||||
self.error = qc.error.from_name(error)
|
self.error = qc.error.from_name(error)
|
||||||
elif hasattr(error, "__call__"):
|
elif hasattr(error, "__call__"):
|
||||||
self.error = error
|
self.error = error
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"unexpected error type; must either be a callable function or a str representing\n"
|
f"unexpected error type; must either be a callable function or a str representing\n"
|
||||||
f"the name of an error function in {qc.error.ACCURACY_ERROR_NAMES}"
|
f"the name of an error function in {qc.error.ACCURACY_ERROR_NAMES}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def fit(self, training: LabelledCollection):
|
def fit(self, training: LabelledCollection):
|
||||||
"""Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
|
"""Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
|
||||||
the error metric.
|
the error metric.
|
||||||
|
|
||||||
:param training: the training set on which to optimize the hyperparameters
|
:param training: the training set on which to optimize the hyperparameters
|
||||||
:return: self
|
:return: self
|
||||||
"""
|
"""
|
||||||
params_keys = list(self.param_grid.keys())
|
params_keys = list(self.param_grid.keys())
|
||||||
params_values = list(self.param_grid.values())
|
params_values = list(self.param_grid.values())
|
||||||
|
|
||||||
protocol = self.protocol
|
protocol = self.protocol
|
||||||
|
|
||||||
self.param_scores_ = {}
|
self.param_scores_ = {}
|
||||||
self.best_score_ = None
|
self.best_score_ = None
|
||||||
|
|
||||||
tinit = time()
|
tinit = time()
|
||||||
|
|
||||||
hyper = [
|
hyper = [
|
||||||
dict(zip(params_keys, val)) for val in itertools.product(*params_values)
|
dict(zip(params_keys, val)) for val in itertools.product(*params_values)
|
||||||
]
|
]
|
||||||
|
|
||||||
# self._sout(f"starting model selection with {self.n_jobs =}")
|
# self._sout(f"starting model selection with {self.n_jobs =}")
|
||||||
self._sout("starting model selection")
|
self._sout("starting model selection")
|
||||||
|
|
||||||
scores = [self.__params_eval(params, training) for params in hyper]
|
scores = [self.__params_eval(params, training) for params in hyper]
|
||||||
|
|
||||||
for params, score, model in scores:
|
for params, score, model in scores:
|
||||||
if score is not None:
|
if score is not None:
|
||||||
if self.best_score_ is None or score < self.best_score_:
|
if self.best_score_ is None or score < self.best_score_:
|
||||||
self.best_score_ = score
|
self.best_score_ = score
|
||||||
self.best_params_ = params
|
self.best_params_ = params
|
||||||
self.best_model_ = model
|
self.best_model_ = model
|
||||||
self.param_scores_[str(params)] = score
|
self.param_scores_[str(params)] = score
|
||||||
else:
|
else:
|
||||||
self.param_scores_[str(params)] = "timeout"
|
self.param_scores_[str(params)] = "timeout"
|
||||||
|
|
||||||
tend = time() - tinit
|
tend = time() - tinit
|
||||||
|
|
||||||
if self.best_score_ is None:
|
if self.best_score_ is None:
|
||||||
raise TimeoutError("no combination of hyperparameters seem to work")
|
raise TimeoutError("no combination of hyperparameters seem to work")
|
||||||
|
|
||||||
self._sout(
|
self._sout(
|
||||||
f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
|
f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
|
||||||
f"[took {tend:.4f}s]"
|
f"[took {tend:.4f}s]"
|
||||||
)
|
)
|
||||||
log = SubLogger.logger()
|
log = SubLogger.logger()
|
||||||
log.debug(
|
log.debug(
|
||||||
f"[{self.model.__class__.__name__}] "
|
f"[{self.model.__class__.__name__}] "
|
||||||
f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
|
f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
|
||||||
f"[took {tend:.4f}s]"
|
f"[took {tend:.4f}s]"
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.refit:
|
if self.refit:
|
||||||
if isinstance(protocol, OnLabelledCollectionProtocol):
|
if isinstance(protocol, OnLabelledCollectionProtocol):
|
||||||
self._sout("refitting on the whole development set")
|
self._sout("refitting on the whole development set")
|
||||||
self.best_model_.fit(training + protocol.get_labelled_collection())
|
self.best_model_.fit(training + protocol.get_labelled_collection())
|
||||||
else:
|
else:
|
||||||
raise RuntimeWarning(
|
raise RuntimeWarning(
|
||||||
f'"refit" was requested, but the protocol does not '
|
f'"refit" was requested, but the protocol does not '
|
||||||
f"implement the {OnLabelledCollectionProtocol.__name__} interface"
|
f"implement the {OnLabelledCollectionProtocol.__name__} interface"
|
||||||
)
|
)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __params_eval(self, params, training):
|
def __params_eval(self, params, training):
|
||||||
protocol = self.protocol
|
protocol = self.protocol
|
||||||
error = self.error
|
error = self.error
|
||||||
|
|
||||||
# if self.timeout > 0:
|
# if self.timeout > 0:
|
||||||
|
|
||||||
# def handler(signum, frame):
|
# def handler(signum, frame):
|
||||||
# raise TimeoutError()
|
# raise TimeoutError()
|
||||||
|
|
||||||
# signal.signal(signal.SIGALRM, handler)
|
# signal.signal(signal.SIGALRM, handler)
|
||||||
|
|
||||||
tinit = time()
|
tinit = time()
|
||||||
|
|
||||||
# if self.timeout > 0:
|
# if self.timeout > 0:
|
||||||
# signal.alarm(self.timeout)
|
# signal.alarm(self.timeout)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
model = deepcopy(self.model)
|
model = deepcopy(self.model)
|
||||||
# overrides default parameters with the parameters being explored at this iteration
|
# overrides default parameters with the parameters being explored at this iteration
|
||||||
model.set_params(**params)
|
model.set_params(**params)
|
||||||
# print({k: v for k, v in model.get_params().items() if k in params})
|
# print({k: v for k, v in model.get_params().items() if k in params})
|
||||||
model.fit(training)
|
model.fit(training)
|
||||||
score = evaluate(model, protocol=protocol, error_metric=error)
|
score = evaluate(model, protocol=protocol, error_metric=error)
|
||||||
|
|
||||||
ttime = time() - tinit
|
ttime = time() - tinit
|
||||||
self._sout(
|
self._sout(
|
||||||
f"hyperparams={params}\t got score {score:.5f} [took {ttime:.4f}s]"
|
f"hyperparams={params}\t got score {score:.5f} [took {ttime:.4f}s]"
|
||||||
)
|
)
|
||||||
|
|
||||||
# if self.timeout > 0:
|
# if self.timeout > 0:
|
||||||
# signal.alarm(0)
|
# signal.alarm(0)
|
||||||
# except TimeoutError:
|
# except TimeoutError:
|
||||||
# self._sout(f"timeout ({self.timeout}s) reached for config {params}")
|
# self._sout(f"timeout ({self.timeout}s) reached for config {params}")
|
||||||
# score = None
|
# score = None
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
self._sout(f"the combination of hyperparameters {params} is invalid")
|
self._sout(f"the combination of hyperparameters {params} is invalid")
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._sout(f"something went wrong for config {params}; skipping:")
|
self._sout(f"something went wrong for config {params}; skipping:")
|
||||||
self._sout(f"\tException: {e}")
|
self._sout(f"\tException: {e}")
|
||||||
score = None
|
score = None
|
||||||
|
|
||||||
return params, score, model
|
return params, score, model
|
||||||
|
|
||||||
def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
||||||
assert hasattr(self, "best_model_"), "quantify called before fit"
|
assert hasattr(self, "best_model_"), "quantify called before fit"
|
||||||
return self.best_model().extend(coll, pred_proba=pred_proba)
|
return self.best_model().extend(coll, pred_proba=pred_proba)
|
||||||
|
|
||||||
def estimate(self, instances, ext=False):
|
def estimate(self, instances, ext=False):
|
||||||
"""Estimate class prevalence values using the best model found after calling the :meth:`fit` method.
|
"""Estimate class prevalence values using the best model found after calling the :meth:`fit` method.
|
||||||
|
|
||||||
:param instances: sample contanining the instances
|
:param instances: sample contanining the instances
|
||||||
:return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found
|
:return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found
|
||||||
by the model selection process.
|
by the model selection process.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
assert hasattr(self, "best_model_"), "estimate called before fit"
|
assert hasattr(self, "best_model_"), "estimate called before fit"
|
||||||
return self.best_model().estimate(instances, ext=ext)
|
return self.best_model().estimate(instances, ext=ext)
|
||||||
|
|
||||||
def set_params(self, **parameters):
|
def set_params(self, **parameters):
|
||||||
"""Sets the hyper-parameters to explore.
|
"""Sets the hyper-parameters to explore.
|
||||||
|
|
||||||
:param parameters: a dictionary with keys the parameter names and values the list of values to explore
|
:param parameters: a dictionary with keys the parameter names and values the list of values to explore
|
||||||
"""
|
"""
|
||||||
self.param_grid = parameters
|
self.param_grid = parameters
|
||||||
|
|
||||||
def get_params(self, deep=True):
|
def get_params(self, deep=True):
|
||||||
"""Returns the dictionary of hyper-parameters to explore (`param_grid`)
|
"""Returns the dictionary of hyper-parameters to explore (`param_grid`)
|
||||||
|
|
||||||
:param deep: Unused
|
:param deep: Unused
|
||||||
:return: the dictionary `param_grid`
|
:return: the dictionary `param_grid`
|
||||||
"""
|
"""
|
||||||
return self.param_grid
|
return self.param_grid
|
||||||
|
|
||||||
def best_model(self):
|
def best_model(self):
|
||||||
"""
|
"""
|
||||||
Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination
|
Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination
|
||||||
of hyper-parameters that minimized the error function.
|
of hyper-parameters that minimized the error function.
|
||||||
|
|
||||||
:return: a trained quantifier
|
:return: a trained quantifier
|
||||||
"""
|
"""
|
||||||
if hasattr(self, "best_model_"):
|
if hasattr(self, "best_model_"):
|
||||||
return self.best_model_
|
return self.best_model_
|
||||||
raise ValueError("best_model called before fit")
|
raise ValueError("best_model called before fit")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MCAEgsq(MultiClassAccuracyEstimator):
|
class MCAEgsq(MultiClassAccuracyEstimator):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator,
|
||||||
quantifier: BaseAccuracyEstimator,
|
quantifier: BaseAccuracyEstimator,
|
||||||
param_grid: dict,
|
param_grid: dict,
|
||||||
error: Union[Callable, str] = qp.error.mae,
|
error: Union[Callable, str] = qp.error.mae,
|
||||||
refit=True,
|
refit=True,
|
||||||
timeout=-1,
|
timeout=-1,
|
||||||
n_jobs=None,
|
n_jobs=None,
|
||||||
verbose=False,
|
verbose=False,
|
||||||
):
|
):
|
||||||
self.param_grid = param_grid
|
self.param_grid = param_grid
|
||||||
self.refit = refit
|
self.refit = refit
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.n_jobs = n_jobs
|
self.n_jobs = n_jobs
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.error = error
|
self.error = error
|
||||||
super().__init__(classifier, quantifier)
|
super().__init__(classifier, quantifier)
|
||||||
|
|
||||||
def fit(self, train: LabelledCollection):
|
def fit(self, train: LabelledCollection):
|
||||||
self.e_train = self.extend(train)
|
self.e_train = self.extend(train)
|
||||||
t_train, t_val = self.e_train.split_stratified(0.6, random_state=0)
|
t_train, t_val = self.e_train.split_stratified(0.6, random_state=0)
|
||||||
self.quantifier = GridSearchQ(
|
self.quantifier = GridSearchQ(
|
||||||
deepcopy(self.quantifier),
|
deepcopy(self.quantifier),
|
||||||
param_grid=self.param_grid,
|
param_grid=self.param_grid,
|
||||||
protocol=UPP(t_val, repeats=100),
|
protocol=UPP(t_val, repeats=100),
|
||||||
error=self.error,
|
error=self.error,
|
||||||
refit=self.refit,
|
refit=self.refit,
|
||||||
timeout=self.timeout,
|
timeout=self.timeout,
|
||||||
n_jobs=self.n_jobs,
|
n_jobs=self.n_jobs,
|
||||||
verbose=self.verbose,
|
verbose=self.verbose,
|
||||||
).fit(self.e_train)
|
).fit(self.e_train)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def estimate(self, instances, ext=False) -> np.ndarray:
|
def estimate(self, instances, ext=False) -> np.ndarray:
|
||||||
e_inst = instances if ext else self._extend_instances(instances)
|
e_inst = instances if ext else self._extend_instances(instances)
|
||||||
estim_prev = self.quantifier.quantify(e_inst)
|
estim_prev = self.quantifier.quantify(e_inst)
|
||||||
return self._check_prevalence_classes(estim_prev, self.quantifier.best_model().classes_)
|
return self._check_prevalence_classes(estim_prev, self.quantifier.best_model().classes_)
|
||||||
|
|
||||||
|
|
||||||
class BQAEgsq(BinaryQuantifierAccuracyEstimator):
|
class BQAEgsq(BinaryQuantifierAccuracyEstimator):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator,
|
||||||
quantifier: BaseAccuracyEstimator,
|
quantifier: BaseAccuracyEstimator,
|
||||||
param_grid: dict,
|
param_grid: dict,
|
||||||
error: Union[Callable, str] = qp.error.mae,
|
error: Union[Callable, str] = qp.error.mae,
|
||||||
refit=True,
|
refit=True,
|
||||||
timeout=-1,
|
timeout=-1,
|
||||||
n_jobs=None,
|
n_jobs=None,
|
||||||
verbose=False,
|
verbose=False,
|
||||||
):
|
):
|
||||||
self.param_grid = param_grid
|
self.param_grid = param_grid
|
||||||
self.refit = refit
|
self.refit = refit
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.n_jobs = n_jobs
|
self.n_jobs = n_jobs
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.error = error
|
self.error = error
|
||||||
super().__init__(classifier=classifier, quantifier=quantifier)
|
super().__init__(classifier=classifier, quantifier=quantifier)
|
||||||
|
|
||||||
def fit(self, train: LabelledCollection):
|
def fit(self, train: LabelledCollection):
|
||||||
self.e_train = self.extend(train)
|
self.e_train = self.extend(train)
|
||||||
|
|
||||||
self.n_classes = self.e_train.n_classes
|
self.n_classes = self.e_train.n_classes
|
||||||
self.e_trains = self.e_train.split_by_pred()
|
self.e_trains = self.e_train.split_by_pred()
|
||||||
|
|
||||||
self.quantifiers = []
|
self.quantifiers = []
|
||||||
for e_train in self.e_trains:
|
for e_train in self.e_trains:
|
||||||
t_train, t_val = e_train.split_stratified(0.6, random_state=0)
|
t_train, t_val = e_train.split_stratified(0.6, random_state=0)
|
||||||
quantifier = GridSearchQ(
|
quantifier = GridSearchQ(
|
||||||
model=deepcopy(self.quantifier),
|
model=deepcopy(self.quantifier),
|
||||||
param_grid=self.param_grid,
|
param_grid=self.param_grid,
|
||||||
protocol=UPP(t_val, repeats=100),
|
protocol=UPP(t_val, repeats=100),
|
||||||
error=self.error,
|
error=self.error,
|
||||||
refit=self.refit,
|
refit=self.refit,
|
||||||
timeout=self.timeout,
|
timeout=self.timeout,
|
||||||
n_jobs=self.n_jobs,
|
n_jobs=self.n_jobs,
|
||||||
verbose=self.verbose,
|
verbose=self.verbose,
|
||||||
).fit(t_train)
|
).fit(t_train)
|
||||||
self.quantifiers.append(quantifier)
|
self.quantifiers.append(quantifier)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
|
||||||
478
quacc/plot.py
478
quacc/plot.py
|
|
@ -1,239 +1,239 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import matplotlib
|
import matplotlib
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from cycler import cycler
|
from cycler import cycler
|
||||||
|
|
||||||
from quacc.environment import env
|
from quacc.environment import env
|
||||||
|
|
||||||
matplotlib.use("agg")
|
matplotlib.use("agg")
|
||||||
|
|
||||||
|
|
||||||
def _get_markers(n: int):
|
def _get_markers(n: int):
|
||||||
ls = "ovx+sDph*^1234X><.Pd"
|
ls = "ovx+sDph*^1234X><.Pd"
|
||||||
if n > len(ls):
|
if n > len(ls):
|
||||||
ls = ls * (n / len(ls) + 1)
|
ls = ls * (n / len(ls) + 1)
|
||||||
return list(ls)[:n]
|
return list(ls)[:n]
|
||||||
|
|
||||||
|
|
||||||
def plot_delta(
|
def plot_delta(
|
||||||
base_prevs,
|
base_prevs,
|
||||||
columns,
|
columns,
|
||||||
data,
|
data,
|
||||||
*,
|
*,
|
||||||
stdevs=None,
|
stdevs=None,
|
||||||
pos_class=1,
|
pos_class=1,
|
||||||
metric="acc",
|
metric="acc",
|
||||||
name="default",
|
name="default",
|
||||||
train_prev=None,
|
train_prev=None,
|
||||||
legend=True,
|
legend=True,
|
||||||
avg=None,
|
avg=None,
|
||||||
) -> Path:
|
) -> Path:
|
||||||
_base_title = "delta_stdev" if stdevs is not None else "delta"
|
_base_title = "delta_stdev" if stdevs is not None else "delta"
|
||||||
if train_prev is not None:
|
if train_prev is not None:
|
||||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||||
title = f"{_base_title}_{name}_{t_prev_pos}_{metric}"
|
title = f"{_base_title}_{name}_{t_prev_pos}_{metric}"
|
||||||
else:
|
else:
|
||||||
title = f"{_base_title}_{name}_avg_{avg}_{metric}"
|
title = f"{_base_title}_{name}_avg_{avg}_{metric}"
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.set_aspect("auto")
|
ax.set_aspect("auto")
|
||||||
ax.grid()
|
ax.grid()
|
||||||
|
|
||||||
NUM_COLORS = len(data)
|
NUM_COLORS = len(data)
|
||||||
cm = plt.get_cmap("tab10")
|
cm = plt.get_cmap("tab10")
|
||||||
if NUM_COLORS > 10:
|
if NUM_COLORS > 10:
|
||||||
cm = plt.get_cmap("tab20")
|
cm = plt.get_cmap("tab20")
|
||||||
cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
|
cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
|
||||||
|
|
||||||
base_prevs = base_prevs[:, pos_class]
|
base_prevs = base_prevs[:, pos_class]
|
||||||
for method, deltas, _cy in zip(columns, data, cy):
|
for method, deltas, _cy in zip(columns, data, cy):
|
||||||
ax.plot(
|
ax.plot(
|
||||||
base_prevs,
|
base_prevs,
|
||||||
deltas,
|
deltas,
|
||||||
label=method,
|
label=method,
|
||||||
color=_cy["color"],
|
color=_cy["color"],
|
||||||
linestyle="-",
|
linestyle="-",
|
||||||
marker="o",
|
marker="o",
|
||||||
markersize=3,
|
markersize=3,
|
||||||
zorder=2,
|
zorder=2,
|
||||||
)
|
)
|
||||||
if stdevs is not None:
|
if stdevs is not None:
|
||||||
_col_idx = np.where(columns == method)[0]
|
_col_idx = np.where(columns == method)[0]
|
||||||
stdev = stdevs[_col_idx].flatten()
|
stdev = stdevs[_col_idx].flatten()
|
||||||
nn_idx = np.intersect1d(
|
nn_idx = np.intersect1d(
|
||||||
np.where(deltas != np.nan)[0],
|
np.where(deltas != np.nan)[0],
|
||||||
np.where(stdev != np.nan)[0],
|
np.where(stdev != np.nan)[0],
|
||||||
)
|
)
|
||||||
_bps, _ds, _st = base_prevs[nn_idx], deltas[nn_idx], stdev[nn_idx]
|
_bps, _ds, _st = base_prevs[nn_idx], deltas[nn_idx], stdev[nn_idx]
|
||||||
ax.fill_between(
|
ax.fill_between(
|
||||||
_bps,
|
_bps,
|
||||||
_ds - _st,
|
_ds - _st,
|
||||||
_ds + _st,
|
_ds + _st,
|
||||||
color=_cy["color"],
|
color=_cy["color"],
|
||||||
alpha=0.25,
|
alpha=0.25,
|
||||||
)
|
)
|
||||||
|
|
||||||
x_label = "test" if avg is None or avg == "train" else "train"
|
x_label = "test" if avg is None or avg == "train" else "train"
|
||||||
ax.set(
|
ax.set(
|
||||||
xlabel=f"{x_label} prevalence",
|
xlabel=f"{x_label} prevalence",
|
||||||
ylabel=metric,
|
ylabel=metric,
|
||||||
title=title,
|
title=title,
|
||||||
)
|
)
|
||||||
|
|
||||||
if legend:
|
if legend:
|
||||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||||
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
||||||
fig.savefig(output_path, bbox_inches="tight")
|
fig.savefig(output_path, bbox_inches="tight")
|
||||||
|
|
||||||
return output_path
|
return output_path
|
||||||
|
|
||||||
|
|
||||||
def plot_diagonal(
|
def plot_diagonal(
|
||||||
reference,
|
reference,
|
||||||
columns,
|
columns,
|
||||||
data,
|
data,
|
||||||
*,
|
*,
|
||||||
pos_class=1,
|
pos_class=1,
|
||||||
metric="acc",
|
metric="acc",
|
||||||
name="default",
|
name="default",
|
||||||
train_prev=None,
|
train_prev=None,
|
||||||
legend=True,
|
legend=True,
|
||||||
):
|
):
|
||||||
if train_prev is not None:
|
if train_prev is not None:
|
||||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||||
title = f"diagonal_{name}_{t_prev_pos}_{metric}"
|
title = f"diagonal_{name}_{t_prev_pos}_{metric}"
|
||||||
else:
|
else:
|
||||||
title = f"diagonal_{name}_{metric}"
|
title = f"diagonal_{name}_{metric}"
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.set_aspect("auto")
|
ax.set_aspect("auto")
|
||||||
ax.grid()
|
ax.grid()
|
||||||
ax.set_aspect("equal")
|
ax.set_aspect("equal")
|
||||||
|
|
||||||
NUM_COLORS = len(data)
|
NUM_COLORS = len(data)
|
||||||
cm = plt.get_cmap("tab10")
|
cm = plt.get_cmap("tab10")
|
||||||
if NUM_COLORS > 10:
|
if NUM_COLORS > 10:
|
||||||
cm = plt.get_cmap("tab20")
|
cm = plt.get_cmap("tab20")
|
||||||
cy = cycler(
|
cy = cycler(
|
||||||
color=[cm(i) for i in range(NUM_COLORS)],
|
color=[cm(i) for i in range(NUM_COLORS)],
|
||||||
marker=_get_markers(NUM_COLORS),
|
marker=_get_markers(NUM_COLORS),
|
||||||
)
|
)
|
||||||
|
|
||||||
reference = np.array(reference)
|
reference = np.array(reference)
|
||||||
x_ticks = np.unique(reference)
|
x_ticks = np.unique(reference)
|
||||||
x_ticks.sort()
|
x_ticks.sort()
|
||||||
|
|
||||||
for deltas, _cy in zip(data, cy):
|
for deltas, _cy in zip(data, cy):
|
||||||
ax.plot(
|
ax.plot(
|
||||||
reference,
|
reference,
|
||||||
deltas,
|
deltas,
|
||||||
color=_cy["color"],
|
color=_cy["color"],
|
||||||
linestyle="None",
|
linestyle="None",
|
||||||
marker=_cy["marker"],
|
marker=_cy["marker"],
|
||||||
markersize=3,
|
markersize=3,
|
||||||
zorder=2,
|
zorder=2,
|
||||||
alpha=0.25,
|
alpha=0.25,
|
||||||
)
|
)
|
||||||
|
|
||||||
# ensure limits are equal for both axes
|
# ensure limits are equal for both axes
|
||||||
_alims = np.stack(((ax.get_xlim(), ax.get_ylim())), axis=-1)
|
_alims = np.stack(((ax.get_xlim(), ax.get_ylim())), axis=-1)
|
||||||
_lims = np.array([f(ls) for f, ls in zip([np.min, np.max], _alims)])
|
_lims = np.array([f(ls) for f, ls in zip([np.min, np.max], _alims)])
|
||||||
ax.set(xlim=tuple(_lims), ylim=tuple(_lims))
|
ax.set(xlim=tuple(_lims), ylim=tuple(_lims))
|
||||||
|
|
||||||
for method, deltas, _cy in zip(columns, data, cy):
|
for method, deltas, _cy in zip(columns, data, cy):
|
||||||
slope, interc = np.polyfit(reference, deltas, 1)
|
slope, interc = np.polyfit(reference, deltas, 1)
|
||||||
y_lr = np.array([slope * x + interc for x in _lims])
|
y_lr = np.array([slope * x + interc for x in _lims])
|
||||||
ax.plot(
|
ax.plot(
|
||||||
_lims,
|
_lims,
|
||||||
y_lr,
|
y_lr,
|
||||||
label=method,
|
label=method,
|
||||||
color=_cy["color"],
|
color=_cy["color"],
|
||||||
linestyle="-",
|
linestyle="-",
|
||||||
markersize="0",
|
markersize="0",
|
||||||
zorder=1,
|
zorder=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
# plot reference line
|
# plot reference line
|
||||||
ax.plot(
|
ax.plot(
|
||||||
_lims,
|
_lims,
|
||||||
_lims,
|
_lims,
|
||||||
color="black",
|
color="black",
|
||||||
linestyle="--",
|
linestyle="--",
|
||||||
markersize=0,
|
markersize=0,
|
||||||
zorder=1,
|
zorder=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
ax.set(xlabel=f"true {metric}", ylabel=f"estim. {metric}", title=title)
|
ax.set(xlabel=f"true {metric}", ylabel=f"estim. {metric}", title=title)
|
||||||
|
|
||||||
if legend:
|
if legend:
|
||||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||||
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
||||||
fig.savefig(output_path, bbox_inches="tight")
|
fig.savefig(output_path, bbox_inches="tight")
|
||||||
return output_path
|
return output_path
|
||||||
|
|
||||||
|
|
||||||
def plot_shift(
|
def plot_shift(
|
||||||
shift_prevs,
|
shift_prevs,
|
||||||
columns,
|
columns,
|
||||||
data,
|
data,
|
||||||
*,
|
*,
|
||||||
counts=None,
|
counts=None,
|
||||||
pos_class=1,
|
pos_class=1,
|
||||||
metric="acc",
|
metric="acc",
|
||||||
name="default",
|
name="default",
|
||||||
train_prev=None,
|
train_prev=None,
|
||||||
legend=True,
|
legend=True,
|
||||||
) -> Path:
|
) -> Path:
|
||||||
if train_prev is not None:
|
if train_prev is not None:
|
||||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||||
title = f"shift_{name}_{t_prev_pos}_{metric}"
|
title = f"shift_{name}_{t_prev_pos}_{metric}"
|
||||||
else:
|
else:
|
||||||
title = f"shift_{name}_avg_{metric}"
|
title = f"shift_{name}_avg_{metric}"
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.set_aspect("auto")
|
ax.set_aspect("auto")
|
||||||
ax.grid()
|
ax.grid()
|
||||||
|
|
||||||
NUM_COLORS = len(data)
|
NUM_COLORS = len(data)
|
||||||
cm = plt.get_cmap("tab10")
|
cm = plt.get_cmap("tab10")
|
||||||
if NUM_COLORS > 10:
|
if NUM_COLORS > 10:
|
||||||
cm = plt.get_cmap("tab20")
|
cm = plt.get_cmap("tab20")
|
||||||
cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
|
cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
|
||||||
|
|
||||||
shift_prevs = shift_prevs[:, pos_class]
|
shift_prevs = shift_prevs[:, pos_class]
|
||||||
for method, shifts, _cy in zip(columns, data, cy):
|
for method, shifts, _cy in zip(columns, data, cy):
|
||||||
ax.plot(
|
ax.plot(
|
||||||
shift_prevs,
|
shift_prevs,
|
||||||
shifts,
|
shifts,
|
||||||
label=method,
|
label=method,
|
||||||
color=_cy["color"],
|
color=_cy["color"],
|
||||||
linestyle="-",
|
linestyle="-",
|
||||||
marker="o",
|
marker="o",
|
||||||
markersize=3,
|
markersize=3,
|
||||||
zorder=2,
|
zorder=2,
|
||||||
)
|
)
|
||||||
if counts is not None:
|
if counts is not None:
|
||||||
_col_idx = np.where(columns == method)[0]
|
_col_idx = np.where(columns == method)[0]
|
||||||
count = counts[_col_idx].flatten()
|
count = counts[_col_idx].flatten()
|
||||||
for prev, shift, cnt in zip(shift_prevs, shifts, count):
|
for prev, shift, cnt in zip(shift_prevs, shifts, count):
|
||||||
label = f"{cnt}"
|
label = f"{cnt}"
|
||||||
plt.annotate(
|
plt.annotate(
|
||||||
label,
|
label,
|
||||||
(prev, shift),
|
(prev, shift),
|
||||||
textcoords="offset points",
|
textcoords="offset points",
|
||||||
xytext=(0, 10),
|
xytext=(0, 10),
|
||||||
ha="center",
|
ha="center",
|
||||||
color=_cy["color"],
|
color=_cy["color"],
|
||||||
fontsize=12.0,
|
fontsize=12.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
ax.set(xlabel="dataset shift", ylabel=metric, title=title)
|
ax.set(xlabel="dataset shift", ylabel=metric, title=title)
|
||||||
|
|
||||||
if legend:
|
if legend:
|
||||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||||
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
||||||
fig.savefig(output_path, bbox_inches="tight")
|
fig.savefig(output_path, bbox_inches="tight")
|
||||||
|
|
||||||
return output_path
|
return output_path
|
||||||
|
|
|
||||||
118
quacc/utils.py
118
quacc/utils.py
|
|
@ -1,59 +1,59 @@
|
||||||
import functools
|
import functools
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from quacc.environment import env
|
from quacc.environment import env
|
||||||
|
|
||||||
|
|
||||||
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
|
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
|
||||||
if len(dfs) < 1:
|
if len(dfs) < 1:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
if len(dfs) == 1:
|
if len(dfs) == 1:
|
||||||
return dfs[0]
|
return dfs[0]
|
||||||
df = dfs[0]
|
df = dfs[0]
|
||||||
for ndf in dfs[1:]:
|
for ndf in dfs[1:]:
|
||||||
df = df.join(ndf.set_index(df_index), on=df_index)
|
df = df.join(ndf.set_index(df_index), on=df_index)
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
|
def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
def _reduce_func(s1, s2):
|
def _reduce_func(s1, s2):
|
||||||
return {(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()}
|
return {(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()}
|
||||||
|
|
||||||
lst = df.to_dict(orient="records")[1:-1]
|
lst = df.to_dict(orient="records")[1:-1]
|
||||||
summed_series = functools.reduce(_reduce_func, lst)
|
summed_series = functools.reduce(_reduce_func, lst)
|
||||||
idx = df.columns.drop([("base", "T"), ("base", "F")])
|
idx = df.columns.drop([("base", "T"), ("base", "F")])
|
||||||
avg_report = {
|
avg_report = {
|
||||||
(n1, n2): (v / len(lst))
|
(n1, n2): (v / len(lst))
|
||||||
for ((n1, n2), v) in summed_series.items()
|
for ((n1, n2), v) in summed_series.items()
|
||||||
if n1 != "base"
|
if n1 != "base"
|
||||||
}
|
}
|
||||||
return pd.DataFrame([avg_report], columns=idx)
|
return pd.DataFrame([avg_report], columns=idx)
|
||||||
|
|
||||||
|
|
||||||
def fmt_line_md(s):
|
def fmt_line_md(s):
|
||||||
return f"> {s} \n"
|
return f"> {s} \n"
|
||||||
|
|
||||||
|
|
||||||
def create_dataser_dir(dir_name, update=False):
|
def create_dataser_dir(dir_name, update=False):
|
||||||
base_out_dir = Path(env.OUT_DIR_NAME)
|
base_out_dir = Path(env.OUT_DIR_NAME)
|
||||||
if not base_out_dir.exists():
|
if not base_out_dir.exists():
|
||||||
os.mkdir(base_out_dir)
|
os.mkdir(base_out_dir)
|
||||||
|
|
||||||
dataset_dir = base_out_dir / dir_name
|
dataset_dir = base_out_dir / dir_name
|
||||||
env.OUT_DIR = dataset_dir
|
env.OUT_DIR = dataset_dir
|
||||||
if update:
|
if update:
|
||||||
if not dataset_dir.exists():
|
if not dataset_dir.exists():
|
||||||
os.mkdir(dataset_dir)
|
os.mkdir(dataset_dir)
|
||||||
else:
|
else:
|
||||||
shutil.rmtree(dataset_dir, ignore_errors=True)
|
shutil.rmtree(dataset_dir, ignore_errors=True)
|
||||||
os.mkdir(dataset_dir)
|
os.mkdir(dataset_dir)
|
||||||
|
|
||||||
plot_dir_path = dataset_dir / "plot"
|
plot_dir_path = dataset_dir / "plot"
|
||||||
env.PLOT_OUT_DIR = plot_dir_path
|
env.PLOT_OUT_DIR = plot_dir_path
|
||||||
if not plot_dir_path.exists():
|
if not plot_dir_path.exists():
|
||||||
os.mkdir(plot_dir_path)
|
os.mkdir(plot_dir_path)
|
||||||
|
|
|
||||||
80
roadmap.md
80
roadmap.md
|
|
@ -1,40 +1,40 @@
|
||||||
|
|
||||||
## Roadmap
|
## Roadmap
|
||||||
|
|
||||||
#### quantificator domain
|
#### quantificator domain
|
||||||
|
|
||||||
- single multilabel quantificator
|
- single multilabel quantificator
|
||||||
|
|
||||||
- vector of binary quantificators
|
- vector of binary quantificators
|
||||||
|
|
||||||
| quantificator | | |
|
| quantificator | | |
|
||||||
|:-------------------:|:--------------:|:--------------:|
|
|:-------------------:|:--------------:|:--------------:|
|
||||||
| true quantificator | true positive | false positive |
|
| true quantificator | true positive | false positive |
|
||||||
| false quantificator | false negative | true negative |
|
| false quantificator | false negative | true negative |
|
||||||
|
|
||||||
#### dataset split
|
#### dataset split
|
||||||
|
|
||||||
- train | test
|
- train | test
|
||||||
- classificator C is fit on train
|
- classificator C is fit on train
|
||||||
- quantificator Q is fit on cross validation of C over train
|
- quantificator Q is fit on cross validation of C over train
|
||||||
- train | validation | test
|
- train | validation | test
|
||||||
- classificator C is fit on train
|
- classificator C is fit on train
|
||||||
- quantificator Q is fit on validation
|
- quantificator Q is fit on validation
|
||||||
|
|
||||||
#### classificator origin
|
#### classificator origin
|
||||||
|
|
||||||
- black box
|
- black box
|
||||||
- crystal box
|
- crystal box
|
||||||
|
|
||||||
#### test metrics
|
#### test metrics
|
||||||
|
|
||||||
- f1_score
|
- f1_score
|
||||||
- K
|
- K
|
||||||
|
|
||||||
#### models
|
#### models
|
||||||
|
|
||||||
- classificator
|
- classificator
|
||||||
- quantificator
|
- quantificator
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
4202
test_mc.md
4202
test_mc.md
File diff suppressed because it is too large
Load Diff
|
|
@ -1,225 +1,225 @@
|
||||||
import pytest
|
import pytest
|
||||||
from quacc.data import ExClassManager as ECM, ExtendedCollection
|
from quacc.data import ExClassManager as ECM, ExtendedCollection
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.sparse as sp
|
import scipy.sparse as sp
|
||||||
|
|
||||||
|
|
||||||
class TestExClassManager:
|
class TestExClassManager:
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"true_class,pred_class,result",
|
"true_class,pred_class,result",
|
||||||
[
|
[
|
||||||
(0, 0, 0),
|
(0, 0, 0),
|
||||||
(0, 1, 1),
|
(0, 1, 1),
|
||||||
(1, 0, 2),
|
(1, 0, 2),
|
||||||
(1, 1, 3),
|
(1, 1, 3),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_get_ex(self, true_class, pred_class, result):
|
def test_get_ex(self, true_class, pred_class, result):
|
||||||
ncl = 2
|
ncl = 2
|
||||||
assert ECM.get_ex(ncl, true_class, pred_class) == result
|
assert ECM.get_ex(ncl, true_class, pred_class) == result
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"ex_class,result",
|
"ex_class,result",
|
||||||
[
|
[
|
||||||
(0, 0),
|
(0, 0),
|
||||||
(1, 1),
|
(1, 1),
|
||||||
(2, 0),
|
(2, 0),
|
||||||
(3, 1),
|
(3, 1),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_get_pred(self, ex_class, result):
|
def test_get_pred(self, ex_class, result):
|
||||||
ncl = 2
|
ncl = 2
|
||||||
assert ECM.get_pred(ncl, ex_class) == result
|
assert ECM.get_pred(ncl, ex_class) == result
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"ex_class,result",
|
"ex_class,result",
|
||||||
[
|
[
|
||||||
(0, 0),
|
(0, 0),
|
||||||
(1, 0),
|
(1, 0),
|
||||||
(2, 1),
|
(2, 1),
|
||||||
(3, 1),
|
(3, 1),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_get_true(self, ex_class, result):
|
def test_get_true(self, ex_class, result):
|
||||||
ncl = 2
|
ncl = 2
|
||||||
assert ECM.get_true(ncl, ex_class) == result
|
assert ECM.get_true(ncl, ex_class) == result
|
||||||
|
|
||||||
|
|
||||||
class TestExtendedCollection:
|
class TestExtendedCollection:
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"instances,result",
|
"instances,result",
|
||||||
[
|
[
|
||||||
(
|
(
|
||||||
np.asarray(
|
np.asarray(
|
||||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||||
),
|
),
|
||||||
[np.asarray([1, 3]), np.asarray([0, 2])],
|
[np.asarray([1, 3]), np.asarray([0, 2])],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix(
|
sp.csr_matrix(
|
||||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||||
),
|
),
|
||||||
[np.asarray([1, 3]), np.asarray([0, 2])],
|
[np.asarray([1, 3]), np.asarray([0, 2])],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
[np.asarray([], dtype=int), np.asarray([0, 1])],
|
[np.asarray([], dtype=int), np.asarray([0, 1])],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
[np.asarray([], dtype=int), np.asarray([0, 1])],
|
[np.asarray([], dtype=int), np.asarray([0, 1])],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
[np.asarray([0, 1]), np.asarray([], dtype=int)],
|
[np.asarray([0, 1]), np.asarray([], dtype=int)],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
[np.asarray([0, 1]), np.asarray([], dtype=int)],
|
[np.asarray([0, 1]), np.asarray([], dtype=int)],
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test__split_index_by_pred(self, instances, result):
|
def test__split_index_by_pred(self, instances, result):
|
||||||
ncl = 2
|
ncl = 2
|
||||||
assert all(
|
assert all(
|
||||||
np.array_equal(a, b)
|
np.array_equal(a, b)
|
||||||
for (a, b) in zip(
|
for (a, b) in zip(
|
||||||
ExtendedCollection._split_index_by_pred(ncl, instances),
|
ExtendedCollection._split_index_by_pred(ncl, instances),
|
||||||
result,
|
result,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"instances,s_inst,norms",
|
"instances,s_inst,norms",
|
||||||
[
|
[
|
||||||
(
|
(
|
||||||
np.asarray(
|
np.asarray(
|
||||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||||
),
|
),
|
||||||
[
|
[
|
||||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
],
|
],
|
||||||
[0.5, 0.5],
|
[0.5, 0.5],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix(
|
sp.csr_matrix(
|
||||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||||
),
|
),
|
||||||
[
|
[
|
||||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
],
|
],
|
||||||
[0.5, 0.5],
|
[0.5, 0.5],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
[
|
[
|
||||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
np.asarray([], dtype=int),
|
np.asarray([], dtype=int),
|
||||||
],
|
],
|
||||||
[1.0, 0.0],
|
[1.0, 0.0],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
[
|
[
|
||||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
sp.csr_matrix([], dtype=int),
|
sp.csr_matrix([], dtype=int),
|
||||||
],
|
],
|
||||||
[1.0, 0.0],
|
[1.0, 0.0],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
[
|
[
|
||||||
np.asarray([], dtype=int),
|
np.asarray([], dtype=int),
|
||||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
],
|
],
|
||||||
[0.0, 1.0],
|
[0.0, 1.0],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
[
|
[
|
||||||
sp.csr_matrix([], dtype=int),
|
sp.csr_matrix([], dtype=int),
|
||||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
],
|
],
|
||||||
[0.0, 1.0],
|
[0.0, 1.0],
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_split_inst_by_pred(self, instances, s_inst, norms):
|
def test_split_inst_by_pred(self, instances, s_inst, norms):
|
||||||
ncl = 2
|
ncl = 2
|
||||||
_s_inst, _norms = ExtendedCollection.split_inst_by_pred(ncl, instances)
|
_s_inst, _norms = ExtendedCollection.split_inst_by_pred(ncl, instances)
|
||||||
if isinstance(s_inst, np.ndarray):
|
if isinstance(s_inst, np.ndarray):
|
||||||
assert all(np.array_equal(a, b) for (a, b) in zip(_s_inst, s_inst))
|
assert all(np.array_equal(a, b) for (a, b) in zip(_s_inst, s_inst))
|
||||||
if isinstance(s_inst, sp.csr_matrix):
|
if isinstance(s_inst, sp.csr_matrix):
|
||||||
assert all((a != b).nnz == 0 for (a, b) in zip(_s_inst, s_inst))
|
assert all((a != b).nnz == 0 for (a, b) in zip(_s_inst, s_inst))
|
||||||
assert all(a == b for (a, b) in zip(_norms, norms))
|
assert all(a == b for (a, b) in zip(_norms, norms))
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"instances,labels,inst0,lbl0,inst1,lbl1",
|
"instances,labels,inst0,lbl0,inst1,lbl1",
|
||||||
[
|
[
|
||||||
(
|
(
|
||||||
np.asarray(
|
np.asarray(
|
||||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||||
),
|
),
|
||||||
np.asarray([3, 0, 1, 2]),
|
np.asarray([3, 0, 1, 2]),
|
||||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
np.asarray([0, 1]),
|
np.asarray([0, 1]),
|
||||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
np.asarray([1, 0]),
|
np.asarray([1, 0]),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix(
|
sp.csr_matrix(
|
||||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||||
),
|
),
|
||||||
np.asarray([3, 0, 1, 2]),
|
np.asarray([3, 0, 1, 2]),
|
||||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
np.asarray([0, 1]),
|
np.asarray([0, 1]),
|
||||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
np.asarray([1, 0]),
|
np.asarray([1, 0]),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
np.asarray([3, 1]),
|
np.asarray([3, 1]),
|
||||||
np.asarray([], dtype=int),
|
np.asarray([], dtype=int),
|
||||||
np.asarray([], dtype=int),
|
np.asarray([], dtype=int),
|
||||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
np.asarray([1, 0]),
|
np.asarray([1, 0]),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
np.asarray([3, 1]),
|
np.asarray([3, 1]),
|
||||||
sp.csr_matrix(np.empty((0, 0), dtype=int)),
|
sp.csr_matrix(np.empty((0, 0), dtype=int)),
|
||||||
np.asarray([], dtype=int),
|
np.asarray([], dtype=int),
|
||||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
np.asarray([1, 0]),
|
np.asarray([1, 0]),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
np.asarray([0, 2]),
|
np.asarray([0, 2]),
|
||||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
np.asarray([0, 1]),
|
np.asarray([0, 1]),
|
||||||
np.asarray([], dtype=int),
|
np.asarray([], dtype=int),
|
||||||
np.asarray([], dtype=int),
|
np.asarray([], dtype=int),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
np.asarray([0, 2]),
|
np.asarray([0, 2]),
|
||||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
np.asarray([0, 1]),
|
np.asarray([0, 1]),
|
||||||
sp.csr_matrix(np.empty((0, 0), dtype=int)),
|
sp.csr_matrix(np.empty((0, 0), dtype=int)),
|
||||||
np.asarray([], dtype=int),
|
np.asarray([], dtype=int),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_split_by_pred(self, instances, labels, inst0, lbl0, inst1, lbl1):
|
def test_split_by_pred(self, instances, labels, inst0, lbl0, inst1, lbl1):
|
||||||
ec = ExtendedCollection(instances, labels, classes=range(0, 4))
|
ec = ExtendedCollection(instances, labels, classes=range(0, 4))
|
||||||
[ec0, ec1] = ec.split_by_pred()
|
[ec0, ec1] = ec.split_by_pred()
|
||||||
if isinstance(instances, np.ndarray):
|
if isinstance(instances, np.ndarray):
|
||||||
assert np.array_equal(ec0.X, inst0)
|
assert np.array_equal(ec0.X, inst0)
|
||||||
assert np.array_equal(ec1.X, inst1)
|
assert np.array_equal(ec1.X, inst1)
|
||||||
if isinstance(instances, sp.csr_matrix):
|
if isinstance(instances, sp.csr_matrix):
|
||||||
assert (ec0.X != inst0).nnz == 0
|
assert (ec0.X != inst0).nnz == 0
|
||||||
assert (ec1.X != inst1).nnz == 0
|
assert (ec1.X != inst1).nnz == 0
|
||||||
assert np.array_equal(ec0.y, lbl0)
|
assert np.array_equal(ec0.y, lbl0)
|
||||||
assert np.array_equal(ec1.y, lbl1)
|
assert np.array_equal(ec1.y, lbl1)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,3 @@
|
||||||
|
|
||||||
class TestDataset:
|
class TestDataset:
|
||||||
pass
|
pass
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
from quacc.dataset import Dataset
|
from quacc.dataset import Dataset
|
||||||
from quacc.evaluation.baseline import kfcv
|
from quacc.evaluation.baseline import kfcv
|
||||||
|
|
||||||
|
|
||||||
class TestBaseline:
|
class TestBaseline:
|
||||||
def test_kfcv(self):
|
def test_kfcv(self):
|
||||||
spambase = Dataset("spambase", n_prevalences=1).get_raw()
|
spambase = Dataset("spambase", n_prevalences=1).get_raw()
|
||||||
c_model = LogisticRegression()
|
c_model = LogisticRegression()
|
||||||
c_model.fit(spambase.train.X, spambase.train.y)
|
c_model.fit(spambase.train.X, spambase.train.y)
|
||||||
assert "f1_score" in kfcv(c_model, spambase.validation)
|
assert "f1_score" in kfcv(c_model, spambase.validation)
|
||||||
|
|
|
||||||
|
|
@ -1,66 +1,66 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
import scipy.sparse as sp
|
import scipy.sparse as sp
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
from quacc.method.base import BinaryQuantifierAccuracyEstimator
|
from quacc.method.base import BinaryQuantifierAccuracyEstimator
|
||||||
|
|
||||||
|
|
||||||
class TestBQAE:
|
class TestBQAE:
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"instances,preds0,preds1,result",
|
"instances,preds0,preds1,result",
|
||||||
[
|
[
|
||||||
(
|
(
|
||||||
np.asarray(
|
np.asarray(
|
||||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||||
),
|
),
|
||||||
np.asarray([0.3, 0.7]),
|
np.asarray([0.3, 0.7]),
|
||||||
np.asarray([0.4, 0.6]),
|
np.asarray([0.4, 0.6]),
|
||||||
np.asarray([0.15, 0.2, 0.35, 0.3]),
|
np.asarray([0.15, 0.2, 0.35, 0.3]),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix(
|
sp.csr_matrix(
|
||||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||||
),
|
),
|
||||||
np.asarray([0.3, 0.7]),
|
np.asarray([0.3, 0.7]),
|
||||||
np.asarray([0.4, 0.6]),
|
np.asarray([0.4, 0.6]),
|
||||||
np.asarray([0.15, 0.2, 0.35, 0.3]),
|
np.asarray([0.15, 0.2, 0.35, 0.3]),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
np.asarray([0.3, 0.7]),
|
np.asarray([0.3, 0.7]),
|
||||||
np.asarray([0.4, 0.6]),
|
np.asarray([0.4, 0.6]),
|
||||||
np.asarray([0.0, 0.4, 0.0, 0.6]),
|
np.asarray([0.0, 0.4, 0.0, 0.6]),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||||
np.asarray([0.3, 0.7]),
|
np.asarray([0.3, 0.7]),
|
||||||
np.asarray([0.4, 0.6]),
|
np.asarray([0.4, 0.6]),
|
||||||
np.asarray([0.0, 0.4, 0.0, 0.6]),
|
np.asarray([0.0, 0.4, 0.0, 0.6]),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
np.asarray([0.3, 0.7]),
|
np.asarray([0.3, 0.7]),
|
||||||
np.asarray([0.4, 0.6]),
|
np.asarray([0.4, 0.6]),
|
||||||
np.asarray([0.3, 0.0, 0.7, 0.0]),
|
np.asarray([0.3, 0.0, 0.7, 0.0]),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||||
np.asarray([0.3, 0.7]),
|
np.asarray([0.3, 0.7]),
|
||||||
np.asarray([0.4, 0.6]),
|
np.asarray([0.4, 0.6]),
|
||||||
np.asarray([0.3, 0.0, 0.7, 0.0]),
|
np.asarray([0.3, 0.0, 0.7, 0.0]),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_estimate_ndarray(self, mocker, instances, preds0, preds1, result):
|
def test_estimate_ndarray(self, mocker, instances, preds0, preds1, result):
|
||||||
estimator = BinaryQuantifierAccuracyEstimator(LogisticRegression())
|
estimator = BinaryQuantifierAccuracyEstimator(LogisticRegression())
|
||||||
estimator.n_classes = 4
|
estimator.n_classes = 4
|
||||||
with mocker.patch.object(estimator.q_model_0, "quantify"), mocker.patch.object(
|
with mocker.patch.object(estimator.q_model_0, "quantify"), mocker.patch.object(
|
||||||
estimator.q_model_1, "quantify"
|
estimator.q_model_1, "quantify"
|
||||||
):
|
):
|
||||||
estimator.q_model_0.quantify.return_value = preds0
|
estimator.q_model_0.quantify.return_value = preds0
|
||||||
estimator.q_model_1.quantify.return_value = preds1
|
estimator.q_model_1.quantify.return_value = preds1
|
||||||
assert np.array_equal(
|
assert np.array_equal(
|
||||||
estimator.estimate(instances, ext=True),
|
estimator.estimate(instances, ext=True),
|
||||||
result,
|
result,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,2 +1,2 @@
|
||||||
class TestMCAE:
|
class TestMCAE:
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue