QuAcc/quacc/utils.py

86 lines
2.2 KiB
Python

import functools
import os
import shutil
from pathlib import Path
from urllib.request import urlretrieve
import pandas as pd
from tqdm import tqdm
from quacc.environment import env
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
if len(dfs) < 1:
raise ValueError
if len(dfs) == 1:
return dfs[0]
df = dfs[0]
for ndf in dfs[1:]:
df = df.join(ndf.set_index(df_index), on=df_index)
return df
def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
def _reduce_func(s1, s2):
return {(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()}
lst = df.to_dict(orient="records")[1:-1]
summed_series = functools.reduce(_reduce_func, lst)
idx = df.columns.drop([("base", "T"), ("base", "F")])
avg_report = {
(n1, n2): (v / len(lst))
for ((n1, n2), v) in summed_series.items()
if n1 != "base"
}
return pd.DataFrame([avg_report], columns=idx)
def fmt_line_md(s):
return f"> {s} \n"
def create_dataser_dir(dir_name, update=False, create_md=False):
base_out_dir = Path(env.OUT_DIR_NAME)
os.makedirs(base_out_dir, exist_ok=True)
dataset_dir = base_out_dir / dir_name
env.OUT_DIR = dataset_dir
if update:
if not dataset_dir.exists():
os.mkdir(dataset_dir)
else:
shutil.rmtree(dataset_dir, ignore_errors=True)
os.mkdir(dataset_dir)
if create_md:
plot_dir_path = dataset_dir / "plot"
env.PLOT_OUT_DIR = plot_dir_path
if not plot_dir_path.exists():
os.mkdir(plot_dir_path)
def get_quacc_home():
home = Path("~/quacc_home").expanduser()
os.makedirs(home, exist_ok=True)
return home
class TqdmUpTo(tqdm):
def update_to(self, b=1, bsize=1, tsize=None):
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n)
def download_file(url: str, downloaded_path: Path):
with TqdmUpTo(
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
desc=downloaded_path.name,
) as t:
urlretrieve(url, filename=downloaded_path, reporthook=t.update_to)