added FAISS Searcher

2022-07-15 17:59:07 +02:00 · 2022-07-15 17:59:07 +02:00 · de8bc9a70c
parent 8fced50627
commit de8bc9a70c
6 changed files with 346 additions and 63 deletions
--- a/src/BEBLIDRescorer.py
+++ b/src/BEBLIDRescorer.py
@ -61,8 +61,10 @@ class BEBLIDRescorer:
            res.sort(key=lambda result: result[1], reverse=True)
        return res

-    def add(self, lf):
-        self.lf.append(lf)
+    def add(self, kp, des, id):
+        # LFUtilities.save_img_lf(dest, filename, kp, des)
+        # LFUtilities.savez_img_lf(dest, filename, kp, des)
+        LFUtilities.pickle_img_lf(settings.DATASET_LF_FOLDER, id, kp, des)

    def remove(self, idx):
        self.descs = np.delete(self.descs, idx, axis=0)
--- a/src/BEBLIDRescorerFAISS.py
+++ b/src/BEBLIDRescorerFAISS.py
@ -0,0 +1,118 @@
+import cv2
+import numpy as np
+
+import LFUtilities
+import BEBLIDParameters
+import ImageRecognitionSettings as settings
+from line_profiler_pycharm import profile
+import faiss
+
+class BEBLIDRescorerFAISS:
+
+    def __init__(self):
+        #self.lf = LFUtilities.load(settings.DATASET_BEBLID)
+        #self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
+        #self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+        self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
+
+    def rescore_by_id(self, query_id, resultset):
+        #query_idx = self.ids.index(query_id)
+        query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
+        return self.rescore_by_img(query, resultset)
+
+    @profile
+    def rescore_by_img(self, query, resultset):
+        max_inliers = -1
+        res = []
+        counter = 0
+        if len(query[0]) > BEBLIDParameters.MIN_GOOD_MATCHES:
+            for data_id, _ in resultset:
+                try:
+                    #data_el = LFUtilities.loadz_img_lf(settings.DATASET_LF_FOLDER, data_id)
+                    data_el = LFUtilities.unpickle_img_lf(settings.DATASET_LF_FOLDER, data_id)
+
+                    if len(data_el[1]) > BEBLIDParameters.MIN_GOOD_MATCHES:
+                        #nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
+                        #good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
+
+                       # Dimension of the vectors.
+                        d = 256
+
+                       # Vectors to be indexed, each represented by d / 8 bytes in a nb
+                       # i.e. the i-th vector is db[i].
+                        db = data_el[1]
+
+                       # Vectors to be queried from the index.
+                        queries = query[1]
+
+                       # Initializing index.
+                        #index = faiss.IndexBinaryFlat(d)
+                        nbits = 64
+                        index = faiss.IndexBinaryHash(d, nbits)
+                       # index = faiss.IndexBinaryHNSW(d, 256)
+
+                        # Adding the database vectors.
+                        index.add(db)
+
+                       # Number of nearest neighbors to retrieve per query vector.
+                        k = 2
+
+                       # Querying the index
+                        index.nflip = 1
+
+                        D, I = index.search(queries, k)
+
+                       # D[i, j] contains the distance from the i-th query vector to its j-th nearest neighbor.
+                       # I[i, j] contains the id of the j-th nearest neighbor of the i-th query vector.
+
+                        f_good = (D[:, 0] < BEBLIDParameters.NN_MATCH_RATIO * D[:, 1])
+                        Qgood = np.asarray(np.nonzero(f_good))[0]
+                        Igood = I[f_good, 0]
+
+                        if Qgood.size > BEBLIDParameters.MIN_GOOD_MATCHES:
+                           # src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
+                            #dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
+                           # dst_pts = data_el[0][[m.trainIdx for m in good]].reshape(-1, 1, 2)
+
+                            src_pts = np.float32([query[0][m].pt for m in Qgood]).reshape(-1, 1, 2)
+                            #dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
+                            dst_pts = data_el[0][[m for m in Igood]].reshape(-1, 1, 2)
+
+                            M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
+                            matches_mask = mask.ravel().tolist()
+                            # print(len(good))
+                            inliers = np.count_nonzero(matches_mask)
+                            # print(inliers)
+                            if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
+                                max_inliers = inliers
+                                res.append((data_id, round(inliers/Qgood.size, 3)))
+                                print(data_id)
+                                print(f'candidate n.  {counter}')
+                                #to get just the first candidate
+                                break
+                except Exception as e:
+                    print('rescore error evaluating ' + data_id)
+                    print(e)
+                    pass
+                counter += 1
+
+        if res:
+            res.sort(key=lambda result: result[1], reverse=True)
+        return res
+
+    def add(self, lf):
+        self.lf.append(lf)
+
+    def remove(self, idx):
+        self.descs = np.delete(self.descs, idx, axis=0)
+
+    def save(self, is_backup=False):
+        lf_save_file = settings.DATASET_LF
+        ids_file = settings.DATASET_IDS_LF
+        if lf_save_file != "None":
+            if is_backup:
+                lf_save_file += '.bak'
+                ids_file += '.bak'
+
+            LFUtilities.save(lf_save_file, self.lf)
+            np.savetxt(ids_file, self.ids, fmt='%s')
--- a/src/GroundTruthEvaluation.py
+++ b/src/GroundTruthEvaluation.py
@ -42,20 +42,19 @@ if __name__ == '__main__':
    print('Analyzing images...')
    for path in tqdm.tqdm(paths_list):
        key = path.name
-        exprected_id = groundtruth[key]
+        expected_id = groundtruth[key]
       # print(exprected_id)
        try:
            img_file = {'image': (
            'query', open(os.path.join(path.parent, path.name), 'rb'))}
-            params = {'searchDeepLevel': 1}
+            params = {'searchDeepLevel': 3}

            r = requests.post(IMG_REC_SERVICE + 'searchByImg', data=params, files=img_file)
            res = r.json()
-            print(res)

            for i in range (0, len(res)):
-                if res[i][0] in exprected_id:
-                    print("found at " + str(i))
+                if res[i][0] in expected_id:
+                    #print("found at " + str(i))
                    precision_at[i] = precision_at[i] + 1
        except Exception as e:
            print("cannot process '%s'" % path)
--- a/src/LFUtilities.py
+++ b/src/LFUtilities.py
@ -75,6 +75,14 @@ def pickle_img_lf(dest, id, keypoints, descriptors):
    kps, des = serialize(keypoints, descriptors)
    pickle.dump([kps, des], open(dest_path, 'wb'))

+def delete_img_lf(dest, id):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat'
+    dest_folder_path = os.path.join(dest, dest_folder_name)
+    dest_path = os.path.join(dest_folder_path, filename)
+    if os.path.exists(dest_path):
+        os.remove(dest_path)
+
@profile
 def unpickle_img_lf(lf_path, id):
    dest_folder_name = id[0:3]
@ -139,56 +147,5 @@ def unpickle_keypoints(array):
    return keypoints, np.array(descriptors)


-def load(lf_path):
-    print('loading LF dataset ' + lf_path)
-    ser_dataset = pickle.load(open(lf_path, "rb"))
-    lf_dataset = []
-    for item in ser_dataset:
-        kp, desc = unpickle_keypoints(item)
-        lf_dataset.append((kp, desc))
-    return lf_dataset


-def save(lf_data, lf_path):
-    data = []
-    for lf in lf_data:
-        data.append(pickle_keypoints(lf[0], lf[1]))
-    pickle.dump(data, open(lf_path, 'wb'))
-
-
-def save_img_lf(dest, id, keypoints, descriptors):
-    dest_folder_name = id[0:3]
-    filename = id + '.dat'
-    dest_folder_path = os.path.join(dest, dest_folder_name)
-    if (not os.path.exists(dest_folder_path)):
-        os.mkdir(dest_folder_path)
-    dest_path = os.path.join(dest_folder_path, filename)
-    data = pickle_keypoints(keypoints, descriptors)
-    pickle.dump(data, open(dest_path, 'wb'))
-
-
-def load_img_lf(lf_path, id):
-    dest_folder_name = id[0:3]
-    filename = id + '.dat'
-    dest_folder_path = os.path.join(lf_path, dest_folder_name)
-    dest_path = os.path.join(dest_folder_path, filename)
-    data = pickle.load(open(dest_path, "rb"))
-    kp, desc = unpickle_keypoints(data)
-    return (kp, desc)
-
-
-def load_img_lf_GPU(lf_path, id):
-    dest_folder_name = id[0:3]
-    filename = id + '.dat'
-    dest_folder_path = os.path.join(lf_path, dest_folder_name)
-    dest_path = os.path.join(dest_folder_path, filename)
-    data = pickle.load(open(dest_path, "rb"))
-    kp, desc = unpickle_keypoints(data)
-
-    data_gpu_mat = cv2.cuda_GpuMat(np.zeros((1500,), dtype=int))
-    if len(desc) > 0:
-        data_gpu_mat = cv2.cuda_GpuMat(desc)
-    desc = data_gpu_mat
-
-    return (kp, desc)
-
--- a/src/LFUtilitiesOld.py
+++ b/src/LFUtilitiesOld.py
@ -0,0 +1,202 @@
+import cv2
+import numpy as np
+import pickle as pickle
+import os
+from line_profiler_pycharm import profile
+
+def resize(max_side, img):
+    if img.shape[1] > img.shape[0]:
+        r = max_side / img.shape[1]
+        dim = (max_side, int(img.shape[0] * r))
+    else:
+        r = max_side / img.shape[0]
+        dim = (int(img.shape[1] * r), max_side)
+
+    # perform the actual resizing of the image and show it
+    resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
+    return resized
+
+
+def pickle_keypoints(keypoints, descriptors):
+    i = 0
+    temp_array = []
+    for point in keypoints:
+        temp = (point.pt, point.size, point.angle, point.response, point.octave,
+        point.class_id, descriptors[i])
+        i += 1
+        temp_array.append(temp)
+    return temp_array
+
+
+def serialize_object(obj):
+    return pickle.dumps(obj)
+
+
+def deserialize_object(serialized_obj):
+    return pickle.loads(serialized_obj)
+
+
+def serializeV1(keypoints, descriptors):
+    temp_array = []
+    for point in keypoints:
+        kp = [point.pt, point.size, point.angle, point.response, point.octave, point.class_id]
+        temp_array.append(kp)
+    return temp_array, descriptors
+
+
+def serialize(keypoints, descriptors):
+    pts = np.float32([keypoints[i].pt for i in range(0, len(keypoints))])
+    return pts, descriptors
+
+def deserialize(ser_kp, ser_des):
+    keypoints = []
+    #data_list = array.tolist()
+    for point in ser_kp:
+        temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
+        keypoints.append(temp_feature)
+    return keypoints, ser_des
+
+
+def deserializev1(ser_kp, ser_des):
+    keypoints = []
+    #data_list = array.tolist()
+    for point in ser_kp:
+        temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
+        keypoints.append(temp_feature)
+    return keypoints, ser_des
+
+def pickle_img_lf(dest, id, keypoints, descriptors):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat'
+    dest_folder_path = os.path.join(dest, dest_folder_name)
+    if (not os.path.exists(dest_folder_path)):
+        os.mkdir(dest_folder_path)
+    dest_path = os.path.join(dest_folder_path, filename)
+    kps, des = serialize(keypoints, descriptors)
+    pickle.dump([kps, des], open(dest_path, 'wb'))
+
+def delete_img_lf(dest, id):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat'
+    dest_folder_path = os.path.join(dest, dest_folder_name)
+    dest_path = os.path.join(dest_folder_path, filename)
+    if os.path.exists(dest_path):
+        os.remove(dest_path)
+
+@profile
+def unpickle_img_lf(lf_path, id):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat'
+    dest_folder_path = os.path.join(lf_path, dest_folder_name)
+    dest_path = os.path.join(dest_folder_path, filename)
+    kps,  des = pickle.load((open(dest_path, "rb")))
+    return kps,  des
+
+
+@profile
+def loadz_img_lf(lf_path, id):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat.npz'
+    dest_folder_path = os.path.join(lf_path, dest_folder_name)
+    dest_path = os.path.join(dest_folder_path, filename)
+    data = np.load(dest_path, allow_pickle=False)
+    kps = data.f.kps
+    des = data.f.des
+    #kps = data['kps']
+    #des = data['des']
+    #kp, desc = deserialize(data['kps'], data['des'])
+    return kps,  des
+
+
+def savez_img_lf(dest, id, keypoints, descriptors):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat'
+    dest_folder_path = os.path.join(dest, dest_folder_name)
+    if (not os.path.exists(dest_folder_path)):
+        os.mkdir(dest_folder_path)
+    dest_path = os.path.join(dest_folder_path, filename)
+    kps, des = serialize(keypoints, descriptors)
+    #np.savez(dest_path, data)
+    np.savez(dest_path, kps=kps, des=des)
+
+
+@profile
+def loadz_img_lf(lf_path, id):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat.npz'
+    dest_folder_path = os.path.join(lf_path, dest_folder_name)
+    dest_path = os.path.join(dest_folder_path, filename)
+    data = np.load(dest_path, allow_pickle=False)
+    kps = data.f.kps
+    des = data.f.des
+    #kps = data['kps']
+    #des = data['des']
+    #kp, desc = deserialize(data['kps'], data['des'])
+    return kps,  des
+
+
+def unpickle_keypoints(array):
+    keypoints = []
+    descriptors = []
+    data_list = array.tolist()
+    for point in array:
+        temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
+        temp_descriptor = point[6]
+        keypoints.append(temp_feature)
+        descriptors.append(temp_descriptor)
+    return keypoints, np.array(descriptors)
+
+
+def load(lf_path):
+    print('loading LF dataset ' + lf_path)
+    ser_dataset = pickle.load(open(lf_path, "rb"))
+    lf_dataset = []
+    for item in ser_dataset:
+        kp, desc = unpickle_keypoints(item)
+        lf_dataset.append((kp, desc))
+    return lf_dataset
+
+
+def save(lf_data, lf_path):
+    data = []
+    for lf in lf_data:
+        data.append(pickle_keypoints(lf[0], lf[1]))
+    pickle.dump(data, open(lf_path, 'wb'))
+
+
+def save_img_lf(dest, id, keypoints, descriptors):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat'
+    dest_folder_path = os.path.join(dest, dest_folder_name)
+    if (not os.path.exists(dest_folder_path)):
+        os.mkdir(dest_folder_path)
+    dest_path = os.path.join(dest_folder_path, filename)
+    data = pickle_keypoints(keypoints, descriptors)
+    pickle.dump(data, open(dest_path, 'wb'))
+
+
+def load_img_lf(lf_path, id):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat'
+    dest_folder_path = os.path.join(lf_path, dest_folder_name)
+    dest_path = os.path.join(dest_folder_path, filename)
+    data = pickle.load(open(dest_path, "rb"))
+    kp, desc = unpickle_keypoints(data)
+    return (kp, desc)
+
+
+def load_img_lf_GPU(lf_path, id):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat'
+    dest_folder_path = os.path.join(lf_path, dest_folder_name)
+    dest_path = os.path.join(dest_folder_path, filename)
+    data = pickle.load(open(dest_path, "rb"))
+    kp, desc = unpickle_keypoints(data)
+
+    data_gpu_mat = cv2.cuda_GpuMat(np.zeros((1500,), dtype=int))
+    if len(desc) > 0:
+        data_gpu_mat = cv2.cuda_GpuMat(desc)
+    desc = data_gpu_mat
+
+    return (kp, desc)
+
--- a/src/Searcher.py
+++ b/src/Searcher.py
@ -5,12 +5,14 @@ import pickle as pickle
 import LFUtilities
 import ImageRecognitionSettings as settings
 #from BEBLIDRescorerDB import BEBLIDRescorerDB
+#from BEBLIDRescorerFAISS import BEBLIDRescorerFAISS
 from BEBLIDRescorer import BEBLIDRescorer
 import SearcherParameters

 from FAISSSearchEngine import FAISSSearchEngine
 import FeatureExtractor as fe
-import BEBLIDExtractorQ as lf
+import BEBLIDExtractorQ as lfQ
+import BEBLIDExtractorD as lfD
 from line_profiler_pycharm import profile


@ -30,9 +32,12 @@ class Searcher:
        self.save(True)

        desc = fe.extract(img_file)
-        #orb = lf.extract(img_file)
        self.search_engine.add(desc, id)
-        #self.rescorer.add(orb)
+
+        lf = lfD.extract(img_file)
+        self.rescorer.add(lf, id)
+
+        #orb = lf.extract(img_file)

        self.save()
        print('added ' + id)
@ -62,7 +67,7 @@ class Searcher:
        query_desc = fe.extract(query_img)
        res = self.search_engine.search_by_img(query_desc, kq)
        if search_deep_level > 0:
-            query_lf = lf.extract(query_img)
+            query_lf = lfQ.extract(query_img)
            res_lf = self.rescorer.rescore_by_img(query_lf, res)
            #res = res_lf if res_lf else res[:k]
            res = res_lf if res_lf else res[:k]
@ -71,4 +76,4 @@ class Searcher:

    def save(self, is_backup=False):
        self.search_engine.save(is_backup)
-        #self.rescorer.save(is_backup)
+        self.rescorer.save(is_backup)