diff --git a/baselines/impweight.py b/baselines/impweight.py index 83e7f6e..f144bce 100644 --- a/baselines/impweight.py +++ b/baselines/impweight.py @@ -4,6 +4,20 @@ from sklearn.linear_model import LogisticRegression from sklearn.model_selection import GridSearchCV from sklearn.neighbors import KernelDensity +from baselines import densratio +from baselines.pykliep import DensityRatioEstimator + + +def kliep(Xtr, ytr, Xte): + kliep = DensityRatioEstimator() + kliep.fit(Xtr, Xte) + return kliep.predict(Xtr) + + +def usilf(Xtr, ytr, Xte, alpha=0.0): + dense_ratio_obj = densratio(Xtr, Xte, alpha=alpha, verbose=False) + return dense_ratio_obj.compute_density_ratio(Xtr) + def logreg(Xtr, ytr, Xte): # check "Direct Density Ratio Estimation for diff --git a/baselines/models.py b/baselines/models.py index 001f02c..a0e8c35 100644 --- a/baselines/models.py +++ b/baselines/models.py @@ -123,7 +123,7 @@ if __name__ == "__main__": results = [] for sample in protocol(): - wx = iw.logreg(d.validation.X, d.validation.y, sample.X) + wx = iw.kliep(d.validation.X, d.validation.y, sample.X) test_preds = lr.predict(sample.X) estim_acc = np.sum((1.0 * (val_preds == d.validation.y)) * wx) / np.sum(wx) true_acc = metrics.accuracy_score(sample.y, test_preds) diff --git a/baselines/pykliep.py b/baselines/pykliep.py index b9ccedd..8c67ea4 100644 --- a/baselines/pykliep.py +++ b/baselines/pykliep.py @@ -74,7 +74,9 @@ class DensityRatioEstimator: # X_test_shuffled = X_test.copy() X_test_shuffled = X_test.copy() - np.random.shuffle(X_test_shuffled) + X_test_index = np.arange(X_test_shuffled.shape[0]) + np.random.shuffle(X_test_index) + X_test_shuffled = X_test_shuffled[X_test_index, :] j_scores = {}