logger added, bugs to fix

This commit is contained in:
Lorenzo Volpi 2023-10-28 00:56:49 +02:00
parent 3345514c99
commit 568f200e3f
5 changed files with 153 additions and 15 deletions

View File

@ -99,4 +99,4 @@ main_conf: &main_conf
- atc_ne
- doc_feat
exec: *main_conf
exec: *debug_conf

26
quacc.log Normal file
View File

@ -0,0 +1,26 @@
dataset rcv1_CCAT
28/10/23 00:45:46| INFO: dataset rcv1_CCAT
Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:50| INFO: Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:51| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.8041s
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:52| INFO: Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.8041s
Configuration rcv1_CCAT_1prevs failed. Exception: too many indices for array: array is 1-dimensional, but 2 were indexed
28/10/23 00:45:52| ERROR: Configuration rcv1_CCAT_1prevs failed. Exception: too many indices for array: array is 1-dimensional, but 2 were indexed
dataset rcv1_CCAT
28/10/23 00:47:52| INFO: dataset rcv1_CCAT
Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:47:56| INFO: Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:47:57| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.7186s

View File

@ -1,9 +1,7 @@
import logging as log
import multiprocessing
import time
from typing import List
import numpy as np
import pandas as pd
import quapy as qp
from quapy.protocol import APP
@ -13,6 +11,7 @@ from quacc.dataset import Dataset
from quacc.environment import env
from quacc.evaluation import baseline, method
from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
from quacc.logger import Logger, SubLogger
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
@ -41,8 +40,11 @@ class CompEstimator:
CE = CompEstimator
def fit_and_estimate(_estimate, train, validation, test, _env=None):
def fit_and_estimate(_estimate, train, validation, test, _env=None, q=None):
_env = env if _env is None else _env
SubLogger.setup(q)
log = SubLogger.logger()
model = LogisticRegression()
model.fit(*train.Xy)
@ -76,16 +78,22 @@ def fit_and_estimate(_estimate, train, validation, test, _env=None):
def evaluate_comparison(
dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"]
) -> EvaluationReport:
log = Logger.logger()
# with multiprocessing.Pool(1) as pool:
with multiprocessing.Pool(len(estimators)) as pool:
dr = DatasetReport(dataset.name)
log.info(f"dataset {dataset.name}")
for d in dataset():
log.info(f"train prev.: {np.around(d.train_prev, decimals=2)}")
log.info(
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} started"
)
tstart = time.time()
tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]]
results = [
pool.apply_async(fit_and_estimate, t, {"_env": env}) for t in tasks
pool.apply_async(
fit_and_estimate, t, {"_env": env, "q": Logger.queue()}
)
for t in tasks
]
results_got = []
@ -96,14 +104,14 @@ def evaluate_comparison(
results_got.append(r)
except Exception as e:
log.error(
f"Dataset sample {d.train[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
)
tend = time.time()
times = {r["name"]: r["time"] for r in results_got}
times["tot"] = tend - tstart
log.info(
f"Dataset sample {d.train[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s"
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s"
)
dr += CompReport(
[r["result"] for r in results_got],

107
quacc/logger.py Normal file
View File

@ -0,0 +1,107 @@
import logging
import logging.handlers
import multiprocessing
import threading
class Logger:
__logger_file = "quacc.log"
__logger_name = "queue_logger"
__queue = None
__thread = None
__setup = False
@classmethod
def __logger_listener(cls, q):
while True:
record = q.get()
if record is None:
break
root = logging.getLogger()
root.handle(record)
@classmethod
def setup(cls):
if cls.__setup:
return
# setup root
root = logging.getLogger()
rh = logging.FileHandler(cls.__logger_file, mode="a")
root.addHandler(rh)
# setup logger
if cls.__queue is None:
cls.__queue = multiprocessing.Queue()
logger = logging.getLogger(cls.__logger_name)
logger.setLevel(logging.DEBUG)
qh = logging.handlers.QueueHandler(cls.__queue)
qh.setLevel(logging.DEBUG)
qh.setFormatter(
logging.Formatter(
fmt="%(asctime)s| %(levelname)s: %(message)s",
datefmt="%d/%m/%y %H:%M:%S",
)
)
logger.addHandler(qh)
# start listener
cls.__thread = threading.Thread(
target=cls.__logger_listener,
args=(cls.__queue,),
)
cls.__thread.start()
cls.__setup = True
@classmethod
def queue(cls):
if not cls.__setup:
cls.setup()
return cls.__queue
@classmethod
def logger(cls):
if not cls.__setup:
cls.setup()
return logging.getLogger(cls.__logger_name)
@classmethod
def join_listener(cls):
if cls.__setup and cls.__thread is not None:
cls.__thread.join()
class SubLogger:
__queue = None
__setup = False
@classmethod
def setup(cls, q):
if cls.__setup:
return
cls.__queue = q
# setup root
root = logging.getLogger()
rh = logging.handlers.QueueHandler(q)
rh.setFormatter(
logging.Formatter(
fmt="%(asctime)s| %(levelname)s: %(message)s",
datefmt="%d/%m/%y %H:%M:%S",
)
)
root.addHandler(rh)
cls.__setup = True
@classmethod
def logger(cls):
if not cls.__setup:
return None
return logging.getLogger()

View File

@ -1,12 +1,14 @@
import logging as log
import traceback
from sys import platform
import quacc.evaluation.comp as comp
from quacc.dataset import Dataset
from quacc.environment import env
from quacc.logger import Logger
from quacc.utils import create_dataser_dir
log = Logger.logger()
def toast():
if platform == "win32":
@ -46,14 +48,9 @@ def estimate_comparison():
def main():
log.basicConfig(
filename="quacc.log",
filemode="a",
format="%(asctime)s| %(levelname)s: %(message)s",
datefmt="%d/%m/%y %H:%M:%S",
)
estimate_comparison()
toast()
Logger.join_listener()
if __name__ == "__main__":