From adb41ffb35c71bfce4941e5a3ba52e256299e67a Mon Sep 17 00:00:00 2001 From: Lorenzo Volpi Date: Sun, 5 Nov 2023 14:17:03 +0100 Subject: [PATCH] baselines code updated --- baselines/impweight.py | 14 ++++++++++++++ baselines/models.py | 2 +- baselines/pykliep.py | 4 +++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/baselines/impweight.py b/baselines/impweight.py index 83e7f6e..f144bce 100644 --- a/baselines/impweight.py +++ b/baselines/impweight.py @@ -4,6 +4,20 @@ from sklearn.linear_model import LogisticRegression from sklearn.model_selection import GridSearchCV from sklearn.neighbors import KernelDensity +from baselines import densratio +from baselines.pykliep import DensityRatioEstimator + + +def kliep(Xtr, ytr, Xte): + kliep = DensityRatioEstimator() + kliep.fit(Xtr, Xte) + return kliep.predict(Xtr) + + +def usilf(Xtr, ytr, Xte, alpha=0.0): + dense_ratio_obj = densratio(Xtr, Xte, alpha=alpha, verbose=False) + return dense_ratio_obj.compute_density_ratio(Xtr) + def logreg(Xtr, ytr, Xte): # check "Direct Density Ratio Estimation for diff --git a/baselines/models.py b/baselines/models.py index 001f02c..a0e8c35 100644 --- a/baselines/models.py +++ b/baselines/models.py @@ -123,7 +123,7 @@ if __name__ == "__main__": results = [] for sample in protocol(): - wx = iw.logreg(d.validation.X, d.validation.y, sample.X) + wx = iw.kliep(d.validation.X, d.validation.y, sample.X) test_preds = lr.predict(sample.X) estim_acc = np.sum((1.0 * (val_preds == d.validation.y)) * wx) / np.sum(wx) true_acc = metrics.accuracy_score(sample.y, test_preds) diff --git a/baselines/pykliep.py b/baselines/pykliep.py index b9ccedd..8c67ea4 100644 --- a/baselines/pykliep.py +++ b/baselines/pykliep.py @@ -74,7 +74,9 @@ class DensityRatioEstimator: # X_test_shuffled = X_test.copy() X_test_shuffled = X_test.copy() - np.random.shuffle(X_test_shuffled) + X_test_index = np.arange(X_test_shuffled.shape[0]) + np.random.shuffle(X_test_index) + X_test_shuffled = X_test_shuffled[X_test_index, :] j_scores = {}