From 7484f767b3092a2af71a582e013accadac212364 Mon Sep 17 00:00:00 2001 From: "paolo.bolettieri" Date: Tue, 12 Jul 2022 18:02:34 +0200 Subject: [PATCH] added FAISS Searcher --- src/BEBLIDBulkExtraction.py | 2 +- src/BEBLIDBulkExtractionFromFileList.py | 2 +- ...BEBLIDExtractor.py => BEBLIDExtractorD.py} | 4 +- src/BEBLIDExtractorQ.py | 19 ++++ src/BEBLIDParameters.py | 9 +- src/BEBLIDRescorer.py | 15 +-- src/GroundTruthEvaluation.py | 4 +- src/LFBulkExtraction.py | 2 +- src/LFBulkExtraction4File.py | 9 +- src/LFBulkExtractionToDB.py | 2 +- src/LFUtilities.py | 94 ++++++++++++++++++- src/Searcher.py | 17 ++-- 12 files changed, 151 insertions(+), 28 deletions(-) rename src/{BEBLIDExtractor.py => BEBLIDExtractorD.py} (72%) create mode 100644 src/BEBLIDExtractorQ.py diff --git a/src/BEBLIDBulkExtraction.py b/src/BEBLIDBulkExtraction.py index 16e0ded..b33dc5a 100644 --- a/src/BEBLIDBulkExtraction.py +++ b/src/BEBLIDBulkExtraction.py @@ -2,7 +2,7 @@ from pathlib import Path import tqdm import LFUtilities -import BEBLIDExtractor as lf +import BEBLIDExtractorQ as lf import argparse import os diff --git a/src/BEBLIDBulkExtractionFromFileList.py b/src/BEBLIDBulkExtractionFromFileList.py index 44f7062..c53f8af 100644 --- a/src/BEBLIDBulkExtractionFromFileList.py +++ b/src/BEBLIDBulkExtractionFromFileList.py @@ -2,7 +2,7 @@ from pathlib import Path import tqdm import LFUtilities -import BEBLIDExtractor as lf +import BEBLIDExtractorQ as lf import argparse import os diff --git a/src/BEBLIDExtractor.py b/src/BEBLIDExtractorD.py similarity index 72% rename from src/BEBLIDExtractor.py rename to src/BEBLIDExtractorD.py index 3982331..efa9e9d 100644 --- a/src/BEBLIDExtractor.py +++ b/src/BEBLIDExtractorD.py @@ -7,12 +7,12 @@ import LFUtilities import BEBLIDParameters as params -detector = cv2.ORB_create(params.KEYPOINTS) +detector = cv2.ORB_create(params.KEYPOINTS_D) descriptor = cv2.xfeatures2d.BEBLID_create(0.75, 101) def extract(img_path): - img = LFUtilities.resize(params.IMG_SIZE, cv2.imread(img_path)) + img = LFUtilities.resize(params.IMG_SIZE_D, cv2.imread(img_path)) kp = detector.detect(img, None) kp, des = descriptor.compute(img, kp) return (kp, des) diff --git a/src/BEBLIDExtractorQ.py b/src/BEBLIDExtractorQ.py new file mode 100644 index 0000000..b8ba127 --- /dev/null +++ b/src/BEBLIDExtractorQ.py @@ -0,0 +1,19 @@ +import cv2 +from pathlib import Path +import tqdm +import pickle +import os +import LFUtilities + +import BEBLIDParameters as params + +detector = cv2.ORB_create(params.KEYPOINTS_Q) +descriptor = cv2.xfeatures2d.BEBLID_create(0.75, 101) + + +def extract(img_path): + img = LFUtilities.resize(params.IMG_SIZE_Q, cv2.imread(img_path)) + kp = detector.detect(img, None) + kp, des = descriptor.compute(img, kp) + return (kp, des) + diff --git a/src/BEBLIDParameters.py b/src/BEBLIDParameters.py index 91142ff..5bdf0f5 100644 --- a/src/BEBLIDParameters.py +++ b/src/BEBLIDParameters.py @@ -1,5 +1,8 @@ NN_MATCH_RATIO = 0.8 -MIN_GOOD_MATCHES = 22 +MIN_GOOD_MATCHES = 20 MIN_INLIERS = 15 -KEYPOINTS = 800 -IMG_SIZE = 500 \ No newline at end of file +KEYPOINTS_D = 250 +IMG_SIZE_D = 500 +KEYPOINTS_Q = 800 +IMG_SIZE_Q = 500 +K_REORDERING = 1000 \ No newline at end of file diff --git a/src/BEBLIDRescorer.py b/src/BEBLIDRescorer.py index 16da1f7..99e5abd 100644 --- a/src/BEBLIDRescorer.py +++ b/src/BEBLIDRescorer.py @@ -4,7 +4,7 @@ import numpy as np import LFUtilities import BEBLIDParameters import ImageRecognitionSettings as settings - +from line_profiler_pycharm import profile class BEBLIDRescorer: @@ -19,24 +19,27 @@ class BEBLIDRescorer: query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id) return self.rescore_by_img(query, resultset) + @profile def rescore_by_img(self, query, resultset): max_inliers = -1 res = [] counter = 0 - if len(query[0]) > 0: + if len(query[0]) > BEBLIDParameters.MIN_GOOD_MATCHES: for data_id, _ in resultset: try: - data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id) + #data_el = LFUtilities.loadz_img_lf(settings.DATASET_LF_FOLDER, data_id) + data_el = LFUtilities.unpickle_img_lf(settings.DATASET_LF_FOLDER, data_id) - if len(data_el[1]) > 0: + if len(data_el[1]) > BEBLIDParameters.MIN_GOOD_MATCHES: nn_matches = self.bf.knnMatch(query[1], data_el[1], 2) good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance] if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES: src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2) - dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2) + #dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2) + dst_pts = data_el[0][[m.trainIdx for m in good]].reshape(-1, 1, 2) - M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0) + M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) matches_mask = mask.ravel().tolist() # print(len(good)) inliers = np.count_nonzero(matches_mask) diff --git a/src/GroundTruthEvaluation.py b/src/GroundTruthEvaluation.py index 74a649d..ed353cd 100644 --- a/src/GroundTruthEvaluation.py +++ b/src/GroundTruthEvaluation.py @@ -43,7 +43,7 @@ if __name__ == '__main__': for path in tqdm.tqdm(paths_list): key = path.name exprected_id = groundtruth[key] - print(exprected_id) + # print(exprected_id) try: img_file = {'image': ( 'query', open(os.path.join(path.parent, path.name), 'rb'))} @@ -53,7 +53,7 @@ if __name__ == '__main__': res = r.json() for i in range (0, len(res)): - print(res[i][0]) + #print(res[i][0]) if res[i][0] in exprected_id: precision_at[i] = precision_at[i] + 1 except Exception as e: diff --git a/src/LFBulkExtraction.py b/src/LFBulkExtraction.py index 34bb8f5..5a68e9d 100644 --- a/src/LFBulkExtraction.py +++ b/src/LFBulkExtraction.py @@ -2,7 +2,7 @@ from pathlib import Path import tqdm import LFUtilities -import BEBLIDExtractor as lf +import BEBLIDExtractorQ as lf import argparse import os diff --git a/src/LFBulkExtraction4File.py b/src/LFBulkExtraction4File.py index 556ecf5..29cd4a5 100644 --- a/src/LFBulkExtraction4File.py +++ b/src/LFBulkExtraction4File.py @@ -2,7 +2,7 @@ from pathlib import Path import tqdm import LFUtilities -import BEBLIDExtractor as lf +import BEBLIDExtractorD as lf import argparse import os @@ -24,9 +24,12 @@ if __name__ == '__main__': try: kp, des = lf.extract(os.path.join(path.parent, path.name)) filename = os.path.splitext(path.name)[0] - LFUtilities.save_img_lf(dest, filename, kp, des) - except: + #LFUtilities.save_img_lf(dest, filename, kp, des) + #LFUtilities.savez_img_lf(dest, filename, kp, des) + LFUtilities.pickle_img_lf(dest, filename, kp, des) + except Exception as e: print("cannot process '%s'" % path) + print(e) pass print('lf extracted.') diff --git a/src/LFBulkExtractionToDB.py b/src/LFBulkExtractionToDB.py index 91c9475..a821441 100644 --- a/src/LFBulkExtractionToDB.py +++ b/src/LFBulkExtractionToDB.py @@ -2,7 +2,7 @@ from pathlib import Path import tqdm import LFUtilities -import BEBLIDExtractor as lf +import BEBLIDExtractorQ as lf import argparse import os from LFDB import LFDB diff --git a/src/LFUtilities.py b/src/LFUtilities.py index 7ef48a4..6d741f6 100644 --- a/src/LFUtilities.py +++ b/src/LFUtilities.py @@ -2,7 +2,7 @@ import cv2 import numpy as np import pickle as pickle import os - +from line_profiler_pycharm import profile def resize(max_side, img): if img.shape[1] > img.shape[0]: @@ -36,9 +36,101 @@ def deserialize_object(serialized_obj): return pickle.loads(serialized_obj) +def serializeV1(keypoints, descriptors): + temp_array = [] + for point in keypoints: + kp = [point.pt, point.size, point.angle, point.response, point.octave, point.class_id] + temp_array.append(kp) + return temp_array, descriptors + + +def serialize(keypoints, descriptors): + pts = np.float32([keypoints[i].pt for i in range(0, len(keypoints))]) + return pts, descriptors + +def deserialize(ser_kp, ser_des): + keypoints = [] + #data_list = array.tolist() + for point in ser_kp: + temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5]) + keypoints.append(temp_feature) + return keypoints, ser_des + + +def deserializev1(ser_kp, ser_des): + keypoints = [] + #data_list = array.tolist() + for point in ser_kp: + temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5]) + keypoints.append(temp_feature) + return keypoints, ser_des + +def pickle_img_lf(dest, id, keypoints, descriptors): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + if (not os.path.exists(dest_folder_path)): + os.mkdir(dest_folder_path) + dest_path = os.path.join(dest_folder_path, filename) + kps, des = serialize(keypoints, descriptors) + pickle.dump([kps, des], open(dest_path, 'wb')) + +@profile +def unpickle_img_lf(lf_path, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(lf_path, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + kps, des = pickle.load((open(dest_path, "rb"))) + return kps, des + + +@profile +def loadz_img_lf(lf_path, id): + dest_folder_name = id[0:3] + filename = id + '.dat.npz' + dest_folder_path = os.path.join(lf_path, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + data = np.load(dest_path, allow_pickle=False) + kps = data.f.kps + des = data.f.des + #kps = data['kps'] + #des = data['des'] + #kp, desc = deserialize(data['kps'], data['des']) + return kps, des + + +def savez_img_lf(dest, id, keypoints, descriptors): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + if (not os.path.exists(dest_folder_path)): + os.mkdir(dest_folder_path) + dest_path = os.path.join(dest_folder_path, filename) + kps, des = serialize(keypoints, descriptors) + #np.savez(dest_path, data) + np.savez(dest_path, kps=kps, des=des) + + +@profile +def loadz_img_lf(lf_path, id): + dest_folder_name = id[0:3] + filename = id + '.dat.npz' + dest_folder_path = os.path.join(lf_path, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + data = np.load(dest_path, allow_pickle=False) + kps = data.f.kps + des = data.f.des + #kps = data['kps'] + #des = data['des'] + #kp, desc = deserialize(data['kps'], data['des']) + return kps, des + + def unpickle_keypoints(array): keypoints = [] descriptors = [] + data_list = array.tolist() for point in array: temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5]) temp_descriptor = point[6] diff --git a/src/Searcher.py b/src/Searcher.py index c84606d..b5cec40 100644 --- a/src/Searcher.py +++ b/src/Searcher.py @@ -4,22 +4,24 @@ import pickle as pickle import LFUtilities import ImageRecognitionSettings as settings -from BEBLIDRescorerDB import BEBLIDRescorerDB -#from BEBLIDRescorerGPU import BEBLIDRescorerGPU +#from BEBLIDRescorerDB import BEBLIDRescorerDB +from BEBLIDRescorer import BEBLIDRescorer +import BEBLIDParameters + from FAISSSearchEngine import FAISSSearchEngine import FeatureExtractor as fe -import BEBLIDExtractor as lf +import BEBLIDExtractorQ as lf +from line_profiler_pycharm import profile class Searcher: - K_REORDERING = 1000 def __init__(self): # self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...] # np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset) self.search_engine = FAISSSearchEngine() - self.rescorer = BEBLIDRescorerDB() + self.rescorer = BEBLIDRescorer() def get_id(self, idx): return self.search_engine.get_id(idx) @@ -45,17 +47,18 @@ class Searcher: def search_by_id(self, query_id, k=10, rescorer=False): kq = k if rescorer: - kq = self.K_REORDERING + kq = BEBLIDParameters.K_REORDERING res = self.search_engine.search_by_id(query_id, kq) if rescorer: res_lf = self.rescorer.rescore_by_id(query_id, res) res = res_lf if res_lf else res[:k] return res + @profile def search_by_img(self, query_img, k=10, rescorer=False): kq = k if rescorer: - kq = self.K_REORDERING + kq = BEBLIDParameters.K_REORDERING query_desc = fe.extract(query_img) res = self.search_engine.search_by_img(query_desc, kq) if rescorer: