From de8bc9a70c16676ce7a261a3a795c8d078d2d3cf Mon Sep 17 00:00:00 2001 From: "paolo.bolettieri" Date: Fri, 15 Jul 2022 17:59:07 +0200 Subject: [PATCH] added FAISS Searcher --- src/BEBLIDRescorer.py | 6 +- src/BEBLIDRescorerFAISS.py | 118 ++++++++++++++++++++ src/GroundTruthEvaluation.py | 9 +- src/LFUtilities.py | 59 ++-------- src/LFUtilitiesOld.py | 202 +++++++++++++++++++++++++++++++++++ src/Searcher.py | 15 ++- 6 files changed, 346 insertions(+), 63 deletions(-) create mode 100644 src/BEBLIDRescorerFAISS.py create mode 100644 src/LFUtilitiesOld.py diff --git a/src/BEBLIDRescorer.py b/src/BEBLIDRescorer.py index 99e5abd..cd105d0 100644 --- a/src/BEBLIDRescorer.py +++ b/src/BEBLIDRescorer.py @@ -61,8 +61,10 @@ class BEBLIDRescorer: res.sort(key=lambda result: result[1], reverse=True) return res - def add(self, lf): - self.lf.append(lf) + def add(self, kp, des, id): + # LFUtilities.save_img_lf(dest, filename, kp, des) + # LFUtilities.savez_img_lf(dest, filename, kp, des) + LFUtilities.pickle_img_lf(settings.DATASET_LF_FOLDER, id, kp, des) def remove(self, idx): self.descs = np.delete(self.descs, idx, axis=0) diff --git a/src/BEBLIDRescorerFAISS.py b/src/BEBLIDRescorerFAISS.py new file mode 100644 index 0000000..0299af9 --- /dev/null +++ b/src/BEBLIDRescorerFAISS.py @@ -0,0 +1,118 @@ +import cv2 +import numpy as np + +import LFUtilities +import BEBLIDParameters +import ImageRecognitionSettings as settings +from line_profiler_pycharm import profile +import faiss + +class BEBLIDRescorerFAISS: + + def __init__(self): + #self.lf = LFUtilities.load(settings.DATASET_BEBLID) + #self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist() + #self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING) + + def rescore_by_id(self, query_id, resultset): + #query_idx = self.ids.index(query_id) + query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id) + return self.rescore_by_img(query, resultset) + + @profile + def rescore_by_img(self, query, resultset): + max_inliers = -1 + res = [] + counter = 0 + if len(query[0]) > BEBLIDParameters.MIN_GOOD_MATCHES: + for data_id, _ in resultset: + try: + #data_el = LFUtilities.loadz_img_lf(settings.DATASET_LF_FOLDER, data_id) + data_el = LFUtilities.unpickle_img_lf(settings.DATASET_LF_FOLDER, data_id) + + if len(data_el[1]) > BEBLIDParameters.MIN_GOOD_MATCHES: + #nn_matches = self.bf.knnMatch(query[1], data_el[1], 2) + #good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance] + + # Dimension of the vectors. + d = 256 + + # Vectors to be indexed, each represented by d / 8 bytes in a nb + # i.e. the i-th vector is db[i]. + db = data_el[1] + + # Vectors to be queried from the index. + queries = query[1] + + # Initializing index. + #index = faiss.IndexBinaryFlat(d) + nbits = 64 + index = faiss.IndexBinaryHash(d, nbits) + # index = faiss.IndexBinaryHNSW(d, 256) + + # Adding the database vectors. + index.add(db) + + # Number of nearest neighbors to retrieve per query vector. + k = 2 + + # Querying the index + index.nflip = 1 + + D, I = index.search(queries, k) + + # D[i, j] contains the distance from the i-th query vector to its j-th nearest neighbor. + # I[i, j] contains the id of the j-th nearest neighbor of the i-th query vector. + + f_good = (D[:, 0] < BEBLIDParameters.NN_MATCH_RATIO * D[:, 1]) + Qgood = np.asarray(np.nonzero(f_good))[0] + Igood = I[f_good, 0] + + if Qgood.size > BEBLIDParameters.MIN_GOOD_MATCHES: + # src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2) + #dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2) + # dst_pts = data_el[0][[m.trainIdx for m in good]].reshape(-1, 1, 2) + + src_pts = np.float32([query[0][m].pt for m in Qgood]).reshape(-1, 1, 2) + #dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2) + dst_pts = data_el[0][[m for m in Igood]].reshape(-1, 1, 2) + + M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) + matches_mask = mask.ravel().tolist() + # print(len(good)) + inliers = np.count_nonzero(matches_mask) + # print(inliers) + if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers): + max_inliers = inliers + res.append((data_id, round(inliers/Qgood.size, 3))) + print(data_id) + print(f'candidate n. {counter}') + #to get just the first candidate + break + except Exception as e: + print('rescore error evaluating ' + data_id) + print(e) + pass + counter += 1 + + if res: + res.sort(key=lambda result: result[1], reverse=True) + return res + + def add(self, lf): + self.lf.append(lf) + + def remove(self, idx): + self.descs = np.delete(self.descs, idx, axis=0) + + def save(self, is_backup=False): + lf_save_file = settings.DATASET_LF + ids_file = settings.DATASET_IDS_LF + if lf_save_file != "None": + if is_backup: + lf_save_file += '.bak' + ids_file += '.bak' + + LFUtilities.save(lf_save_file, self.lf) + np.savetxt(ids_file, self.ids, fmt='%s') diff --git a/src/GroundTruthEvaluation.py b/src/GroundTruthEvaluation.py index ccded25..3826d19 100644 --- a/src/GroundTruthEvaluation.py +++ b/src/GroundTruthEvaluation.py @@ -42,20 +42,19 @@ if __name__ == '__main__': print('Analyzing images...') for path in tqdm.tqdm(paths_list): key = path.name - exprected_id = groundtruth[key] + expected_id = groundtruth[key] # print(exprected_id) try: img_file = {'image': ( 'query', open(os.path.join(path.parent, path.name), 'rb'))} - params = {'searchDeepLevel': 1} + params = {'searchDeepLevel': 3} r = requests.post(IMG_REC_SERVICE + 'searchByImg', data=params, files=img_file) res = r.json() - print(res) for i in range (0, len(res)): - if res[i][0] in exprected_id: - print("found at " + str(i)) + if res[i][0] in expected_id: + #print("found at " + str(i)) precision_at[i] = precision_at[i] + 1 except Exception as e: print("cannot process '%s'" % path) diff --git a/src/LFUtilities.py b/src/LFUtilities.py index 6d741f6..5612505 100644 --- a/src/LFUtilities.py +++ b/src/LFUtilities.py @@ -75,6 +75,14 @@ def pickle_img_lf(dest, id, keypoints, descriptors): kps, des = serialize(keypoints, descriptors) pickle.dump([kps, des], open(dest_path, 'wb')) +def delete_img_lf(dest, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + if os.path.exists(dest_path): + os.remove(dest_path) + @profile def unpickle_img_lf(lf_path, id): dest_folder_name = id[0:3] @@ -139,56 +147,5 @@ def unpickle_keypoints(array): return keypoints, np.array(descriptors) -def load(lf_path): - print('loading LF dataset ' + lf_path) - ser_dataset = pickle.load(open(lf_path, "rb")) - lf_dataset = [] - for item in ser_dataset: - kp, desc = unpickle_keypoints(item) - lf_dataset.append((kp, desc)) - return lf_dataset -def save(lf_data, lf_path): - data = [] - for lf in lf_data: - data.append(pickle_keypoints(lf[0], lf[1])) - pickle.dump(data, open(lf_path, 'wb')) - - -def save_img_lf(dest, id, keypoints, descriptors): - dest_folder_name = id[0:3] - filename = id + '.dat' - dest_folder_path = os.path.join(dest, dest_folder_name) - if (not os.path.exists(dest_folder_path)): - os.mkdir(dest_folder_path) - dest_path = os.path.join(dest_folder_path, filename) - data = pickle_keypoints(keypoints, descriptors) - pickle.dump(data, open(dest_path, 'wb')) - - -def load_img_lf(lf_path, id): - dest_folder_name = id[0:3] - filename = id + '.dat' - dest_folder_path = os.path.join(lf_path, dest_folder_name) - dest_path = os.path.join(dest_folder_path, filename) - data = pickle.load(open(dest_path, "rb")) - kp, desc = unpickle_keypoints(data) - return (kp, desc) - - -def load_img_lf_GPU(lf_path, id): - dest_folder_name = id[0:3] - filename = id + '.dat' - dest_folder_path = os.path.join(lf_path, dest_folder_name) - dest_path = os.path.join(dest_folder_path, filename) - data = pickle.load(open(dest_path, "rb")) - kp, desc = unpickle_keypoints(data) - - data_gpu_mat = cv2.cuda_GpuMat(np.zeros((1500,), dtype=int)) - if len(desc) > 0: - data_gpu_mat = cv2.cuda_GpuMat(desc) - desc = data_gpu_mat - - return (kp, desc) - diff --git a/src/LFUtilitiesOld.py b/src/LFUtilitiesOld.py new file mode 100644 index 0000000..906d9a7 --- /dev/null +++ b/src/LFUtilitiesOld.py @@ -0,0 +1,202 @@ +import cv2 +import numpy as np +import pickle as pickle +import os +from line_profiler_pycharm import profile + +def resize(max_side, img): + if img.shape[1] > img.shape[0]: + r = max_side / img.shape[1] + dim = (max_side, int(img.shape[0] * r)) + else: + r = max_side / img.shape[0] + dim = (int(img.shape[1] * r), max_side) + + # perform the actual resizing of the image and show it + resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA) + return resized + + +def pickle_keypoints(keypoints, descriptors): + i = 0 + temp_array = [] + for point in keypoints: + temp = (point.pt, point.size, point.angle, point.response, point.octave, + point.class_id, descriptors[i]) + i += 1 + temp_array.append(temp) + return temp_array + + +def serialize_object(obj): + return pickle.dumps(obj) + + +def deserialize_object(serialized_obj): + return pickle.loads(serialized_obj) + + +def serializeV1(keypoints, descriptors): + temp_array = [] + for point in keypoints: + kp = [point.pt, point.size, point.angle, point.response, point.octave, point.class_id] + temp_array.append(kp) + return temp_array, descriptors + + +def serialize(keypoints, descriptors): + pts = np.float32([keypoints[i].pt for i in range(0, len(keypoints))]) + return pts, descriptors + +def deserialize(ser_kp, ser_des): + keypoints = [] + #data_list = array.tolist() + for point in ser_kp: + temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5]) + keypoints.append(temp_feature) + return keypoints, ser_des + + +def deserializev1(ser_kp, ser_des): + keypoints = [] + #data_list = array.tolist() + for point in ser_kp: + temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5]) + keypoints.append(temp_feature) + return keypoints, ser_des + +def pickle_img_lf(dest, id, keypoints, descriptors): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + if (not os.path.exists(dest_folder_path)): + os.mkdir(dest_folder_path) + dest_path = os.path.join(dest_folder_path, filename) + kps, des = serialize(keypoints, descriptors) + pickle.dump([kps, des], open(dest_path, 'wb')) + +def delete_img_lf(dest, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + if os.path.exists(dest_path): + os.remove(dest_path) + +@profile +def unpickle_img_lf(lf_path, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(lf_path, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + kps, des = pickle.load((open(dest_path, "rb"))) + return kps, des + + +@profile +def loadz_img_lf(lf_path, id): + dest_folder_name = id[0:3] + filename = id + '.dat.npz' + dest_folder_path = os.path.join(lf_path, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + data = np.load(dest_path, allow_pickle=False) + kps = data.f.kps + des = data.f.des + #kps = data['kps'] + #des = data['des'] + #kp, desc = deserialize(data['kps'], data['des']) + return kps, des + + +def savez_img_lf(dest, id, keypoints, descriptors): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + if (not os.path.exists(dest_folder_path)): + os.mkdir(dest_folder_path) + dest_path = os.path.join(dest_folder_path, filename) + kps, des = serialize(keypoints, descriptors) + #np.savez(dest_path, data) + np.savez(dest_path, kps=kps, des=des) + + +@profile +def loadz_img_lf(lf_path, id): + dest_folder_name = id[0:3] + filename = id + '.dat.npz' + dest_folder_path = os.path.join(lf_path, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + data = np.load(dest_path, allow_pickle=False) + kps = data.f.kps + des = data.f.des + #kps = data['kps'] + #des = data['des'] + #kp, desc = deserialize(data['kps'], data['des']) + return kps, des + + +def unpickle_keypoints(array): + keypoints = [] + descriptors = [] + data_list = array.tolist() + for point in array: + temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5]) + temp_descriptor = point[6] + keypoints.append(temp_feature) + descriptors.append(temp_descriptor) + return keypoints, np.array(descriptors) + + +def load(lf_path): + print('loading LF dataset ' + lf_path) + ser_dataset = pickle.load(open(lf_path, "rb")) + lf_dataset = [] + for item in ser_dataset: + kp, desc = unpickle_keypoints(item) + lf_dataset.append((kp, desc)) + return lf_dataset + + +def save(lf_data, lf_path): + data = [] + for lf in lf_data: + data.append(pickle_keypoints(lf[0], lf[1])) + pickle.dump(data, open(lf_path, 'wb')) + + +def save_img_lf(dest, id, keypoints, descriptors): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + if (not os.path.exists(dest_folder_path)): + os.mkdir(dest_folder_path) + dest_path = os.path.join(dest_folder_path, filename) + data = pickle_keypoints(keypoints, descriptors) + pickle.dump(data, open(dest_path, 'wb')) + + +def load_img_lf(lf_path, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(lf_path, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + data = pickle.load(open(dest_path, "rb")) + kp, desc = unpickle_keypoints(data) + return (kp, desc) + + +def load_img_lf_GPU(lf_path, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(lf_path, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + data = pickle.load(open(dest_path, "rb")) + kp, desc = unpickle_keypoints(data) + + data_gpu_mat = cv2.cuda_GpuMat(np.zeros((1500,), dtype=int)) + if len(desc) > 0: + data_gpu_mat = cv2.cuda_GpuMat(desc) + desc = data_gpu_mat + + return (kp, desc) + diff --git a/src/Searcher.py b/src/Searcher.py index bd1aaa4..2260b1a 100644 --- a/src/Searcher.py +++ b/src/Searcher.py @@ -5,12 +5,14 @@ import pickle as pickle import LFUtilities import ImageRecognitionSettings as settings #from BEBLIDRescorerDB import BEBLIDRescorerDB +#from BEBLIDRescorerFAISS import BEBLIDRescorerFAISS from BEBLIDRescorer import BEBLIDRescorer import SearcherParameters from FAISSSearchEngine import FAISSSearchEngine import FeatureExtractor as fe -import BEBLIDExtractorQ as lf +import BEBLIDExtractorQ as lfQ +import BEBLIDExtractorD as lfD from line_profiler_pycharm import profile @@ -30,9 +32,12 @@ class Searcher: self.save(True) desc = fe.extract(img_file) - #orb = lf.extract(img_file) self.search_engine.add(desc, id) - #self.rescorer.add(orb) + + lf = lfD.extract(img_file) + self.rescorer.add(lf, id) + + #orb = lf.extract(img_file) self.save() print('added ' + id) @@ -62,7 +67,7 @@ class Searcher: query_desc = fe.extract(query_img) res = self.search_engine.search_by_img(query_desc, kq) if search_deep_level > 0: - query_lf = lf.extract(query_img) + query_lf = lfQ.extract(query_img) res_lf = self.rescorer.rescore_by_img(query_lf, res) #res = res_lf if res_lf else res[:k] res = res_lf if res_lf else res[:k] @@ -71,4 +76,4 @@ class Searcher: def save(self, is_backup=False): self.search_engine.save(is_backup) - #self.rescorer.save(is_backup) + self.rescorer.save(is_backup)