logger added, bugs to fix

This commit is contained in:
Lorenzo Volpi 2023-10-28 00:56:49 +02:00
parent 3345514c99
commit 568f200e3f
5 changed files with 153 additions and 15 deletions

View File

@ -99,4 +99,4 @@ main_conf: &main_conf
- atc_ne - atc_ne
- doc_feat - doc_feat
exec: *main_conf exec: *debug_conf

26
quacc.log Normal file
View File

@ -0,0 +1,26 @@
dataset rcv1_CCAT
28/10/23 00:45:46| INFO: dataset rcv1_CCAT
Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:50| INFO: Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:51| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.8041s
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:45:52| INFO: Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.8041s
Configuration rcv1_CCAT_1prevs failed. Exception: too many indices for array: array is 1-dimensional, but 2 were indexed
28/10/23 00:45:52| ERROR: Configuration rcv1_CCAT_1prevs failed. Exception: too many indices for array: array is 1-dimensional, but 2 were indexed
dataset rcv1_CCAT
28/10/23 00:47:52| INFO: dataset rcv1_CCAT
Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:47:56| INFO: Dataset sample 0.50 of dataset rcv1_CCAT started
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
28/10/23 00:47:57| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.7186s

View File

@ -1,9 +1,7 @@
import logging as log
import multiprocessing import multiprocessing
import time import time
from typing import List from typing import List
import numpy as np
import pandas as pd import pandas as pd
import quapy as qp import quapy as qp
from quapy.protocol import APP from quapy.protocol import APP
@ -13,6 +11,7 @@ from quacc.dataset import Dataset
from quacc.environment import env from quacc.environment import env
from quacc.evaluation import baseline, method from quacc.evaluation import baseline, method
from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
from quacc.logger import Logger, SubLogger
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
@ -41,8 +40,11 @@ class CompEstimator:
CE = CompEstimator CE = CompEstimator
def fit_and_estimate(_estimate, train, validation, test, _env=None): def fit_and_estimate(_estimate, train, validation, test, _env=None, q=None):
_env = env if _env is None else _env _env = env if _env is None else _env
SubLogger.setup(q)
log = SubLogger.logger()
model = LogisticRegression() model = LogisticRegression()
model.fit(*train.Xy) model.fit(*train.Xy)
@ -76,16 +78,22 @@ def fit_and_estimate(_estimate, train, validation, test, _env=None):
def evaluate_comparison( def evaluate_comparison(
dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"] dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"]
) -> EvaluationReport: ) -> EvaluationReport:
log = Logger.logger()
# with multiprocessing.Pool(1) as pool: # with multiprocessing.Pool(1) as pool:
with multiprocessing.Pool(len(estimators)) as pool: with multiprocessing.Pool(len(estimators)) as pool:
dr = DatasetReport(dataset.name) dr = DatasetReport(dataset.name)
log.info(f"dataset {dataset.name}") log.info(f"dataset {dataset.name}")
for d in dataset(): for d in dataset():
log.info(f"train prev.: {np.around(d.train_prev, decimals=2)}") log.info(
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} started"
)
tstart = time.time() tstart = time.time()
tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]] tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]]
results = [ results = [
pool.apply_async(fit_and_estimate, t, {"_env": env}) for t in tasks pool.apply_async(
fit_and_estimate, t, {"_env": env, "q": Logger.queue()}
)
for t in tasks
] ]
results_got = [] results_got = []
@ -96,14 +104,14 @@ def evaluate_comparison(
results_got.append(r) results_got.append(r)
except Exception as e: except Exception as e:
log.error( log.error(
f"Dataset sample {d.train[1]:.2f} of dataset {dataset.name} failed. Exception: {e}" f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
) )
tend = time.time() tend = time.time()
times = {r["name"]: r["time"] for r in results_got} times = {r["name"]: r["time"] for r in results_got}
times["tot"] = tend - tstart times["tot"] = tend - tstart
log.info( log.info(
f"Dataset sample {d.train[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s" f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s"
) )
dr += CompReport( dr += CompReport(
[r["result"] for r in results_got], [r["result"] for r in results_got],

107
quacc/logger.py Normal file
View File

@ -0,0 +1,107 @@
import logging
import logging.handlers
import multiprocessing
import threading
class Logger:
__logger_file = "quacc.log"
__logger_name = "queue_logger"
__queue = None
__thread = None
__setup = False
@classmethod
def __logger_listener(cls, q):
while True:
record = q.get()
if record is None:
break
root = logging.getLogger()
root.handle(record)
@classmethod
def setup(cls):
if cls.__setup:
return
# setup root
root = logging.getLogger()
rh = logging.FileHandler(cls.__logger_file, mode="a")
root.addHandler(rh)
# setup logger
if cls.__queue is None:
cls.__queue = multiprocessing.Queue()
logger = logging.getLogger(cls.__logger_name)
logger.setLevel(logging.DEBUG)
qh = logging.handlers.QueueHandler(cls.__queue)
qh.setLevel(logging.DEBUG)
qh.setFormatter(
logging.Formatter(
fmt="%(asctime)s| %(levelname)s: %(message)s",
datefmt="%d/%m/%y %H:%M:%S",
)
)
logger.addHandler(qh)
# start listener
cls.__thread = threading.Thread(
target=cls.__logger_listener,
args=(cls.__queue,),
)
cls.__thread.start()
cls.__setup = True
@classmethod
def queue(cls):
if not cls.__setup:
cls.setup()
return cls.__queue
@classmethod
def logger(cls):
if not cls.__setup:
cls.setup()
return logging.getLogger(cls.__logger_name)
@classmethod
def join_listener(cls):
if cls.__setup and cls.__thread is not None:
cls.__thread.join()
class SubLogger:
__queue = None
__setup = False
@classmethod
def setup(cls, q):
if cls.__setup:
return
cls.__queue = q
# setup root
root = logging.getLogger()
rh = logging.handlers.QueueHandler(q)
rh.setFormatter(
logging.Formatter(
fmt="%(asctime)s| %(levelname)s: %(message)s",
datefmt="%d/%m/%y %H:%M:%S",
)
)
root.addHandler(rh)
cls.__setup = True
@classmethod
def logger(cls):
if not cls.__setup:
return None
return logging.getLogger()

View File

@ -1,12 +1,14 @@
import logging as log
import traceback import traceback
from sys import platform from sys import platform
import quacc.evaluation.comp as comp import quacc.evaluation.comp as comp
from quacc.dataset import Dataset from quacc.dataset import Dataset
from quacc.environment import env from quacc.environment import env
from quacc.logger import Logger
from quacc.utils import create_dataser_dir from quacc.utils import create_dataser_dir
log = Logger.logger()
def toast(): def toast():
if platform == "win32": if platform == "win32":
@ -46,14 +48,9 @@ def estimate_comparison():
def main(): def main():
log.basicConfig(
filename="quacc.log",
filemode="a",
format="%(asctime)s| %(levelname)s: %(message)s",
datefmt="%d/%m/%y %H:%M:%S",
)
estimate_comparison() estimate_comparison()
toast() toast()
Logger.join_listener()
if __name__ == "__main__": if __name__ == "__main__":