logger implemented

This commit is contained in:
Lorenzo Volpi 2023-10-28 16:14:37 +02:00
parent 568f200e3f
commit 345807977c
9 changed files with 114 additions and 96 deletions

4
.gitignore vendored
View File

@ -12,4 +12,6 @@ elsahar19_rca/__pycache__/*
*.coverage *.coverage
.coverage .coverage
scp_sync.py scp_sync.py
out/* out/*
output/*
*.log

View File

@ -7,6 +7,7 @@ debug_conf: &debug_conf
datasets: datasets:
- DATASET_NAME: rcv1 - DATASET_NAME: rcv1
DATASET_TARGET: CCAT DATASET_TARGET: CCAT
- DATASET_NAME: imdb
plot_confs: plot_confs:
debug: debug:
@ -49,14 +50,14 @@ main_conf: &main_conf
DATASET_N_PREVS: 9 DATASET_N_PREVS: 9
datasets: datasets:
- DATASET_NAME: rcv1 - DATASET_NAME: imdb
DATASET_TARGET: CCAT
datasets_bck: datasets_bck:
- DATASET_NAME: rcv1 - DATASET_NAME: rcv1
DATASET_TARGET: GCAT DATASET_TARGET: GCAT
- DATASET_NAME: rcv1 - DATASET_NAME: rcv1
DATASET_TARGET: MCAT DATASET_TARGET: MCAT
- DATASET_NAME: imdb - DATASET_NAME: rcv1
DATASET_TARGET: CCAT
plot_confs: plot_confs:
gs_vs_atc: gs_vs_atc:
@ -99,4 +100,4 @@ main_conf: &main_conf
- atc_ne - atc_ne
- doc_feat - doc_feat
exec: *debug_conf exec: *main_conf

View File

@ -1,26 +0,0 @@
dataset rcv1_CCAT
28/10/23 00:45:46| INFO: dataset rcv1_CCAT
Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:50| INFO: Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:51| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.8041s
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:52| INFO: Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.8041s
Configuration rcv1_CCAT_1prevs failed. Exception: too many indices for array: array is 1-dimensional, but 2 were indexed
28/10/23 00:45:52| ERROR: Configuration rcv1_CCAT_1prevs failed. Exception: too many indices for array: array is 1-dimensional, but 2 were indexed
dataset rcv1_CCAT
28/10/23 00:47:52| INFO: dataset rcv1_CCAT
Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:47:56| INFO: Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:47:57| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.7186s

View File

@ -34,9 +34,17 @@ class DatasetSample:
class Dataset: class Dataset:
def __init__(self, name, n_prevalences=9, target=None): def __init__(self, name, n_prevalences=9, prevs=None, target=None):
self._name = name self._name = name
self._target = target self._target = target
self.prevs = None
if prevs is not None:
prevs = np.unique([p for p in prevs if p > 0.0 and p < 1.0])
if prevs.shape[0] > 0:
self.prevs = np.sort(prevs)
self.n_prevs = self.prevs.shape[0]
self.n_prevs = n_prevalences self.n_prevs = n_prevalences
def __spambase(self): def __spambase(self):
@ -92,10 +100,14 @@ class Dataset:
) )
# sample prevalences # sample prevalences
prevalences = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:] if self.prevs is not None:
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevalences) prevs = self.prevs
else:
prevs = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevs)
datasets = [] datasets = []
for p in prevalences: for p in prevs:
all_train_sampled = all_train.sampling(at_size, p, random_state=0) all_train_sampled = all_train.sampling(at_size, p, random_state=0)
train, validation = all_train_sampled.split_stratified( train, validation = all_train_sampled.split_stratified(
train_prop=TRAIN_VAL_PROP, random_state=0 train_prop=TRAIN_VAL_PROP, random_state=0

View File

@ -8,6 +8,7 @@ defalut_env = {
"PLOT_ESTIMATORS": [], "PLOT_ESTIMATORS": [],
"PLOT_STDEV": False, "PLOT_STDEV": False,
"DATASET_N_PREVS": 9, "DATASET_N_PREVS": 9,
"DATASET_PREVS": None,
"OUT_DIR_NAME": "output", "OUT_DIR_NAME": "output",
"OUT_DIR": None, "OUT_DIR": None,
"PLOT_DIR_NAME": "plot", "PLOT_DIR_NAME": "plot",

View File

@ -1,21 +1,21 @@
import multiprocessing import multiprocessing
import time import time
import traceback
from typing import List from typing import List
import pandas as pd import pandas as pd
import quapy as qp import quapy as qp
from quapy.protocol import APP
from sklearn.linear_model import LogisticRegression
from quacc.dataset import Dataset from quacc.dataset import Dataset
from quacc.environment import env from quacc.environment import env
from quacc.evaluation import baseline, method from quacc.evaluation import baseline, method
from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
from quacc.logger import Logger, SubLogger from quacc.evaluation.worker import estimate_worker
from quacc.logging import Logger
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
pd.set_option("display.float_format", "{:.4f}".format) pd.set_option("display.float_format", "{:.4f}".format)
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
log = Logger.logger()
class CompEstimator: class CompEstimator:
@ -40,45 +40,9 @@ class CompEstimator:
CE = CompEstimator CE = CompEstimator
def fit_and_estimate(_estimate, train, validation, test, _env=None, q=None):
_env = env if _env is None else _env
SubLogger.setup(q)
log = SubLogger.logger()
model = LogisticRegression()
model.fit(*train.Xy)
protocol = APP(
test,
n_prevalences=_env.PROTOCOL_N_PREVS,
repeats=_env.PROTOCOL_REPEATS,
return_type="labelled_collection",
)
start = time.time()
try:
result = _estimate(model, validation, protocol)
except Exception as e:
log.error(f"Method {_estimate.__name__} failed. Exception: {e}")
return {
"name": _estimate.__name__,
"result": None,
"time": 0,
}
end = time.time()
log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
return {
"name": _estimate.__name__,
"result": result,
"time": end - start,
}
def evaluate_comparison( def evaluate_comparison(
dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"] dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"]
) -> EvaluationReport: ) -> EvaluationReport:
log = Logger.logger()
# with multiprocessing.Pool(1) as pool: # with multiprocessing.Pool(1) as pool:
with multiprocessing.Pool(len(estimators)) as pool: with multiprocessing.Pool(len(estimators)) as pool:
dr = DatasetReport(dataset.name) dr = DatasetReport(dataset.name)
@ -90,9 +54,7 @@ def evaluate_comparison(
tstart = time.time() tstart = time.time()
tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]] tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]]
results = [ results = [
pool.apply_async( pool.apply_async(estimate_worker, t, {"_env": env, "q": Logger.queue()})
fit_and_estimate, t, {"_env": env, "q": Logger.queue()}
)
for t in tasks for t in tasks
] ]
@ -103,7 +65,7 @@ def evaluate_comparison(
if r["result"] is not None: if r["result"] is not None:
results_got.append(r) results_got.append(r)
except Exception as e: except Exception as e:
log.error( log.warning(
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}" f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
) )
@ -111,14 +73,21 @@ def evaluate_comparison(
times = {r["name"]: r["time"] for r in results_got} times = {r["name"]: r["time"] for r in results_got}
times["tot"] = tend - tstart times["tot"] = tend - tstart
log.info( log.info(
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s" f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s]"
) )
dr += CompReport( try:
[r["result"] for r in results_got], cr = CompReport(
name=dataset.name, [r["result"] for r in results_got],
train_prev=d.train_prev, name=dataset.name,
valid_prev=d.validation_prev, train_prev=d.train_prev,
times=times, valid_prev=d.validation_prev,
) times=times,
)
except Exception as e:
log.warning(
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
)
traceback(e)
cr = None
dr += cr
return dr return dr

View File

@ -0,0 +1,42 @@
import time
import quapy as qp
from quapy.protocol import APP
from sklearn.linear_model import LogisticRegression
from quacc.logging import SubLogger
def estimate_worker(_estimate, train, validation, test, _env=None, q=None):
qp.environ["SAMPLE_SIZE"] = _env.SAMPLE_SIZE
SubLogger.setup(q)
log = SubLogger.logger()
model = LogisticRegression()
model.fit(*train.Xy)
protocol = APP(
test,
n_prevalences=_env.PROTOCOL_N_PREVS,
repeats=_env.PROTOCOL_REPEATS,
return_type="labelled_collection",
)
start = time.time()
try:
result = _estimate(model, validation, protocol)
except Exception as e:
log.warning(f"Method {_estimate.__name__} failed. Exception: {e}")
return {
"name": _estimate.__name__,
"result": None,
"time": 0,
}
end = time.time()
log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
return {
"name": _estimate.__name__,
"result": result,
"time": end - start,
}

View File

@ -7,6 +7,7 @@ import threading
class Logger: class Logger:
__logger_file = "quacc.log" __logger_file = "quacc.log"
__logger_name = "queue_logger" __logger_name = "queue_logger"
__manager = None
__queue = None __queue = None
__thread = None __thread = None
__setup = False __setup = False
@ -17,7 +18,7 @@ class Logger:
record = q.get() record = q.get()
if record is None: if record is None:
break break
root = logging.getLogger() root = logging.getLogger("listener")
root.handle(record) root.handle(record)
@classmethod @classmethod
@ -26,13 +27,19 @@ class Logger:
return return
# setup root # setup root
root = logging.getLogger() root = logging.getLogger("listener")
root.setLevel(logging.DEBUG)
rh = logging.FileHandler(cls.__logger_file, mode="a") rh = logging.FileHandler(cls.__logger_file, mode="a")
rh.setLevel(logging.DEBUG)
root.addHandler(rh) root.addHandler(rh)
root.info("-" * 100)
# setup logger # setup logger
if cls.__manager is None:
cls.__manager = multiprocessing.Manager()
if cls.__queue is None: if cls.__queue is None:
cls.__queue = multiprocessing.Queue() cls.__queue = cls.__manager.Queue()
logger = logging.getLogger(cls.__logger_name) logger = logging.getLogger(cls.__logger_name)
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
@ -70,9 +77,11 @@ class Logger:
return logging.getLogger(cls.__logger_name) return logging.getLogger(cls.__logger_name)
@classmethod @classmethod
def join_listener(cls): def close(cls):
if cls.__setup and cls.__thread is not None: if cls.__setup and cls.__thread is not None:
cls.__queue.put(None)
cls.__thread.join() cls.__thread.join()
# cls.__manager.close()
class SubLogger: class SubLogger:
@ -88,7 +97,9 @@ class SubLogger:
# setup root # setup root
root = logging.getLogger() root = logging.getLogger()
root.setLevel(logging.DEBUG)
rh = logging.handlers.QueueHandler(q) rh = logging.handlers.QueueHandler(q)
rh.setLevel(logging.DEBUG)
rh.setFormatter( rh.setFormatter(
logging.Formatter( logging.Formatter(
fmt="%(asctime)s| %(levelname)s: %(message)s", fmt="%(asctime)s| %(levelname)s: %(message)s",

View File

@ -4,7 +4,7 @@ from sys import platform
import quacc.evaluation.comp as comp import quacc.evaluation.comp as comp
from quacc.dataset import Dataset from quacc.dataset import Dataset
from quacc.environment import env from quacc.environment import env
from quacc.logger import Logger from quacc.logging import Logger
from quacc.utils import create_dataser_dir from quacc.utils import create_dataser_dir
log = Logger.logger() log = Logger.logger()
@ -24,6 +24,7 @@ def estimate_comparison():
env.DATASET_NAME, env.DATASET_NAME,
target=env.DATASET_TARGET, target=env.DATASET_TARGET,
n_prevalences=env.DATASET_N_PREVS, n_prevalences=env.DATASET_N_PREVS,
prevs=env.DATASET_PREVS,
) )
try: try:
dr = comp.evaluate_comparison(dataset, estimators=env.COMP_ESTIMATORS) dr = comp.evaluate_comparison(dataset, estimators=env.COMP_ESTIMATORS)
@ -48,9 +49,14 @@ def estimate_comparison():
def main(): def main():
estimate_comparison() try:
estimate_comparison()
except Exception as e:
log.error(f"estimate comparison failed. Exceprion: {e}")
traceback(e)
toast() toast()
Logger.join_listener() Logger.close()
if __name__ == "__main__": if __name__ == "__main__":