added FAISS Searcher

2022-07-08 18:19:41 +02:00 · 2022-07-08 18:19:41 +02:00 · 2761ccbe95
parent 647a8778ba
commit 2761ccbe95
26 changed files with 772 additions and 466 deletions
--- a/5
+++ b/5
@ -7,7 +7,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
        git \
        wget \
        nano \
-        unzip
+        unzip \
+        sqlite3 \
+        libsqlite3-dev
+        

 RUN pip install numpy tornado flask-restful pillow numpy matplotlib tqdm scikit-learn h5py requests faiss-cpu==1.7.2
 ADD . /workspace
--- a/run.sh
+++ b/run.sh
@ -1 +1 @@
-docker run --net=host -p 8190:8190 -v /media/data2/data/swoads/data:/workspace/data -it image-recognition:swoads python3 /workspace/src/beniculturali.py /workspace/data/conf/img_rec_conf.json
+docker run --net=host -p 8190:8190 -v /media/data2/data/swoads/data:/workspace/data -it image-recognition:swoads python3 /workspace/src/ImageRecognitionService.py /workspace/data/conf/img_rec_conf.json
--- a/src/BEBLIDExtractor.py
+++ b/src/BEBLIDExtractor.py
@ -8,7 +8,7 @@ import LFUtilities
 import BEBLIDParameters as params

 detector = cv2.ORB_create(params.KEYPOINTS)
-descriptor = cv2.xfeatures2d.BEBLID_create(0.75)
+descriptor = cv2.xfeatures2d.BEBLID_create(0.75, 101)


 def extract(img_path):
--- a/src/BEBLIDParameters.py
+++ b/src/BEBLIDParameters.py
@ -1,5 +1,5 @@
 NN_MATCH_RATIO = 0.8
-MIN_GOOD_MATCHES = 12
-MIN_INLIERS = 10
-KEYPOINTS = 500
+MIN_GOOD_MATCHES = 22
+MIN_INLIERS = 15
+KEYPOINTS = 800
 IMG_SIZE = 500
--- a/src/BEBLIDRescorer.py
+++ b/src/BEBLIDRescorer.py
@ -3,7 +3,7 @@ import numpy as np

 import LFUtilities
 import BEBLIDParameters
-import beniculturaliSettings as settings
+import ImageRecognitionSettings as settings


 class BEBLIDRescorer:
@ -15,38 +15,44 @@ class BEBLIDRescorer:
        self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)

    def rescore_by_id(self, query_id, resultset):
-        query_idx = self.ids.index(query_id)
-        query = LFUtilities.load_img_lf(settings.DATASET_BEBLID, query_id)
+        #query_idx = self.ids.index(query_id)
+        query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
        return self.rescore_by_img(query, resultset)

    def rescore_by_img(self, query, resultset):
        max_inliers = -1
        res = []
        counter = 0
-        for data_id, _ in resultset:
-            try:
-                data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id)
+        if len(query[0]) > 0:
+            for data_id, _ in resultset:
+                try:
+                    data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id)

-                nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
-                good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
+                    if len(data_el[1]) > 0:
+                        nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
+                        good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]

-                if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
-                    src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
-                    dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
+                        if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
+                            src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
+                            dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)

-                    M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0)
-                    matches_mask = mask.ravel().tolist()
-                    # print(len(good))
-                    inliers = np.count_nonzero(matches_mask)
-                    # print(inliers)
-                    if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
-                        max_inliers = inliers
-                        res.append((data_id, round(inliers/len(good), 3)))
-                        print(f'candidate n.  {counter}')
-            except:
-                print('rescore error evaluating ' + data_id)
-                pass
-            counter += 1
+                            M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0)
+                            matches_mask = mask.ravel().tolist()
+                            # print(len(good))
+                            inliers = np.count_nonzero(matches_mask)
+                            # print(inliers)
+                            if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
+                                max_inliers = inliers
+                                res.append((data_id, round(inliers/len(good), 3)))
+                                print(data_id)
+                                print(f'candidate n.  {counter}')
+                                #to get just the first candidate
+                                break
+                except Exception as e:
+                    print('rescore error evaluating ' + data_id)
+                    print(e)
+                    pass
+                counter += 1

        if res:
            res.sort(key=lambda result: result[1], reverse=True)
--- a/src/BEBLIDRescorerDB.py
+++ b/src/BEBLIDRescorerDB.py
@ -0,0 +1,81 @@
+import cv2
+import numpy as np
+
+import LFUtilities
+import BEBLIDParameters
+import ImageRecognitionSettings as settings
+from LFDB import LFDB
+
+
+class BEBLIDRescorerDB:
+
+    def __init__(self):
+        #self.lf = LFUtilities.load(settings.DATASET_BEBLID)
+        #self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
+        #self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+        self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
+        self.lf_db = LFDB(settings.DB_LF)
+
+    def rescore_by_id(self, query_id, resultset):
+        #query_idx = self.ids.index(query_id)
+        query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
+        return self.rescore_by_img(query, resultset)
+
+    def rescore_by_img(self, query, resultset):
+        max_inliers = -1
+        res = []
+        counter = 0
+        if len(query[0]) > 0:
+            for data_id, _ in resultset:
+                try:
+                    blob = self.lf_db.get(data_id)
+                    serialized_obj = LFUtilities.deserialize_object(blob)
+                    data_el = LFUtilities.unpickle_keypoints(serialized_obj)
+
+
+                    if len(data_el[1]) > 0:
+                        nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
+                        good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
+
+                        if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
+                            src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
+                            dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
+
+                            M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0)
+                            matches_mask = mask.ravel().tolist()
+                            # print(len(good))
+                            inliers = np.count_nonzero(matches_mask)
+                            # print(inliers)
+                            if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
+                                max_inliers = inliers
+                                res.append((data_id, round(inliers/len(good), 3)))
+                                print(data_id)
+                                print(f'candidate n.  {counter}')
+                                #to get just the first candidate
+                                break
+                except Exception as e:
+                    print('rescore error evaluating ' + data_id)
+                    print(e)
+                    pass
+                counter += 1
+
+        if res:
+            res.sort(key=lambda result: result[1], reverse=True)
+        return res
+
+    def add(self, lf):
+        self.lf.append(lf)
+
+    def remove(self, idx):
+        self.descs = np.delete(self.descs, idx, axis=0)
+
+    def save(self, is_backup=False):
+        lf_save_file = settings.DATASET_LF
+        ids_file = settings.DATASET_IDS_LF
+        if lf_save_file != "None":
+            if is_backup:
+                lf_save_file += '.bak'
+                ids_file += '.bak'
+
+            LFUtilities.save(lf_save_file, self.lf)
+            np.savetxt(ids_file, self.ids, fmt='%s')
--- a/src/BEBLIDRescorerGPU.py
+++ b/src/BEBLIDRescorerGPU.py
@ -0,0 +1,75 @@
+import cv2
+import numpy as np
+
+import LFUtilities
+import BEBLIDParameters
+import ImageRecognitionSettings as settings
+
+
+class BEBLIDRescorerGPU:
+
+    def __init__(self):
+        #self.lf = LFUtilities.load(settings.DATASET_BEBLID)
+        #self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
+        #self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+        #self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
+        self.bf = cv2.cuda.DescriptorMatcher_createBFMatcher(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
+
+    def rescore_by_id(self, query_id, resultset):
+        #query_idx = self.ids.index(query_id)
+        query = LFUtilities.load_img_lf_GPU(settings.DATASET_LF_FOLDER, query_id)
+        return self.rescore_by_img(query, resultset)
+
+    def rescore_by_img(self, query, resultset):
+        max_inliers = -1
+        res = []
+        counter = 0
+        for data_id, _ in resultset:
+            try:
+                data_el = LFUtilities.load_img_lf_GPU(settings.DATASET_LF_FOLDER, data_id)
+
+                nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
+                good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
+
+                if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
+                    src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
+                    dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
+
+                    M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0)
+                    matches_mask = mask.ravel().tolist()
+                    # print(len(good))
+                    inliers = np.count_nonzero(matches_mask)
+                    # print(inliers)
+                    if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
+                        max_inliers = inliers
+                        res.append((data_id, round(inliers/len(good), 3)))
+                        print(data_id)
+                        print(f'candidate n.  {counter}')
+                        #to get just the first candidate
+                        break
+            except Exception as e:
+                print('rescore error evaluating ' + data_id)
+                print(e)
+                pass
+            counter += 1
+
+        if res:
+            res.sort(key=lambda result: result[1], reverse=True)
+        return res
+
+    def add(self, lf):
+        self.lf.append(lf)
+
+    def remove(self, idx):
+        self.descs = np.delete(self.descs, idx, axis=0)
+
+    def save(self, is_backup=False):
+        lf_save_file = settings.DATASET_LF
+        ids_file = settings.DATASET_IDS_LF
+        if lf_save_file != "None":
+            if is_backup:
+                lf_save_file += '.bak'
+                ids_file += '.bak'
+
+            LFUtilities.save(lf_save_file, self.lf)
+            np.savetxt(ids_file, self.ids, fmt='%s')
--- a/src/BEBLIDRescorerV2.py
+++ b/src/BEBLIDRescorerV2.py
@ -0,0 +1,76 @@
+import cv2
+import numpy as np
+
+import LFUtilities
+import BEBLIDParameters
+import ImageRecognitionSettings as settings
+
+
+class BEBLIDRescorer:
+
+    def __init__(self):
+        #self.lf = LFUtilities.load(settings.DATASET_BEBLID)
+        #self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
+        self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+        #self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
+
+    def rescore_by_id(self, query_id, resultset):
+        #query_idx = self.ids.index(query_id)
+        query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
+        return self.rescore_by_img(query, resultset)
+
+    def rescore_by_img(self, query, resultset):
+        max_inliers = -1
+        res = []
+        counter = 0
+        if len(query[0]) > 0:
+            for data_id, _ in resultset:
+                try:
+                    data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id)
+
+                    if len(data_el[1]) > 0:
+                        nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
+                        good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
+
+                        if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
+                            src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
+                            dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
+
+                            M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
+                            matches_mask = mask.ravel().tolist()
+                            # print(len(good))
+                            inliers = np.count_nonzero(matches_mask)
+                            # print(inliers)
+                            if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
+                                max_inliers = inliers
+                                res.append((data_id, round(inliers/len(good), 3)))
+                                print(data_id)
+                                print(f'candidate n.  {counter}')
+                                #to get just the first candidate
+                                break
+                except Exception as e:
+                    print('rescore error evaluating ' + data_id)
+                    print(e)
+                    pass
+                counter += 1
+
+        if res:
+            res.sort(key=lambda result: result[1], reverse=True)
+        return res
+
+    def add(self, lf):
+        self.lf.append(lf)
+
+    def remove(self, idx):
+        self.descs = np.delete(self.descs, idx, axis=0)
+
+    def save(self, is_backup=False):
+        lf_save_file = settings.DATASET_LF
+        ids_file = settings.DATASET_IDS_LF
+        if lf_save_file != "None":
+            if is_backup:
+                lf_save_file += '.bak'
+                ids_file += '.bak'
+
+            LFUtilities.save(lf_save_file, self.lf)
+            np.savetxt(ids_file, self.ids, fmt='%s')
--- a/src/BeniCulturaliRescorer.py
+++ b/src/BeniCulturaliRescorer.py
@ -1,66 +0,0 @@
-import cv2
-import numpy as np
-
-import LFUtilities
-import beniculturaliSettings as settings
-
-
-class BeniCulturaliRescorer:
-
-    def __init__(self):
-        self.lf = LFUtilities.load(settings.DATASET_LF)
-        self.ids = np.loadtxt(settings.DATASET_IDS_LF, dtype=str).tolist()
-
-        self.orb = cv2.ORB_create()
-        self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
-
-    def rescore_by_id(self, query_id, resultset):
-        query_idx = self.ids.index(query_id)
-        return self.rescore_by_img(self.lf[query_idx], resultset)
-
-    def rescore_by_img(self, query, resultset):
-        max_inliers = -1
-        res = []
-
-        for data_id, _ in resultset:
-            data_idx = self.ids.index(data_id)
-            try:
-                data_el = self.lf[data_idx]
-                matches = self.bf.match(query[1], data_el[1])
-                good = [m for m in matches if m.distance <= LFUtilities.THRESHOLD]
-                if len(good) > LFUtilities.MIN_GOOD_MATCHES:
-                    src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
-                    dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
-
-                    M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0)
-                    matches_mask = mask.ravel().tolist()
-                    # print(len(good))
-                    inliers = np.count_nonzero(matches_mask)
-                    # print(inliers)
-                    if (inliers >= LFUtilities.MIN_INLIERS and inliers > max_inliers):
-                        max_inliers = inliers
-                        res.append((data_id, inliers))
-            except:
-                print('rescore error evaluating ' + data_id)
-                pass
-
-        if res:
-            res.sort(key=lambda result: result[1], reverse=True)
-            return res
-
-    def add(self, lf):
-        self.lf.append(lf)
-
-    def remove(self, idx):
-        self.descs = np.delete(self.descs, idx, axis=0)
-
-    def save(self, is_backup=False):
-        lf_save_file = settings.DATASET_LF
-        ids_file = settings.DATASET_IDS_LF
-        if lf_save_file != "None":
-            if is_backup:
-                lf_save_file += '.bak'
-                ids_file += '.bak'
-
-            LFUtilities.save(lf_save_file, self.lf)
-            np.savetxt(ids_file, self.ids, fmt='%s')
--- a/src/BeniCulturaliSearchEngine.py
+++ b/src/BeniCulturaliSearchEngine.py
@ -1,60 +0,0 @@
-import numpy as np
-import beniculturaliSettings as settings
-
-
-class BeniCulturaliSearchEngine:
-
-
-    def __init__(self):
-        #self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
-
-        #np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
-        self.descs = np.load(settings.DATASET)
-        #self.desc1 = np.load(settings.DATASET1)
-        #self.desc2 = np.load(settings.DATASET2)
-
-        #self.descs = (self.desc1 + self.desc2) / 2
-        #self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True)
-        self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
-
-
-    def get_id(self, idx):
-        return self.ids[idx]
-
-
-    def add(self, desc, id):
-        self.ids.append(id)
-        self.descs = np.vstack((self.descs, desc))
-        self.save()
-
-
-    def remove(self, id):
-        idx = self.ids.index(id)
-        del self.ids[idx]
-        self.descs = np.delete(self.descs, idx, axis=0)
-
-
-    def search_by_id(self, query_id, k=10):
-        query_idx = self.ids.index(query_id)
-        return self.search_by_img(self.descs[query_idx], k)
-
-    def search_by_img(self, query, k=10):
-        print('----------query features-------')
-        print(query)
-        dot_product = np.dot(self.descs, query)
-        idx = dot_product.argsort()[::-1][:k]
-        res = []
-        for i in idx:
-            res.append((self.ids[i], round(float(dot_product[i]), 3)))
-        return res
-
-    def save(self, is_backup=False):
-        descs_file = settings.DATASET
-        ids_file = settings.DATASET_IDS
-
-        if is_backup:
-            descs_file += '.bak'
-            ids_file += '.bak'
-
-        np.save(descs_file, self.descs)
-        np.savetxt(ids_file, self.ids, fmt='%s')
--- a/src/BeniCulturaliSearcher.py
+++ b/src/BeniCulturaliSearcher.py
@ -1,68 +0,0 @@
-import cv2
-import numpy as np
-import pickle as pickle
-
-import LFUtilities
-import beniculturaliSettings as settings
-from BeniCulturaliRescorer import BeniCulturaliRescorer
-from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
-import FeatureExtractor as fe
-#import ORBExtractor as lf
-
-
-class BeniCulturaliSearcher:
-    K_REORDERING = 15
-
-    def __init__(self):
-        # self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
-
-        # np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
-        self.search_engine = BeniCulturaliSearchEngine()
-        #self.rescorer = BeniCulturaliRescorer()
-
-    def get_id(self, idx):
-        return self.search_engine.get_id(idx)
-
-    def add(self, img_file, id):
-        self.save(True)
-
-        desc = fe.extract(img_file)
-        #orb = lf.extract(img_file)
-        self.search_engine.add(desc, id)
-        #self.rescorer.add(orb)
-
-        self.save()
-        print('added ' + id)
-
-    def remove(self, id):
-        self.save(True)
-        self.search_engine.remove(id)
-        #self.rescorer.remove(idx)
-        self.save()
-        print('removed ' + id)
-
-    def search_by_id(self, query_id, k=10, rescorer=False):
-        kq = k
-        if rescorer:
-            kq = self.K_REORDERING
-        res = self.search_engine.search_by_id(query_id, kq)
-       # if rescorer:
-       #     res_lf = self.rescorer.rescore_by_id(query_id, res)
-       #     res = res_lf if res_lf else res[:k]
-        return res
-
-    def search_by_img(self, query_img, k=10, rescorer=False):
-        kq = k
-        if rescorer:
-            kq = self.K_REORDERING
-        query_desc = fe.extract(query_img)
-        res = self.search_engine.search_by_img(query_desc, kq)
-        #if rescorer:
-        #    query_lf = lf.extract(query_img)
-        #    res_lf = self.rescorer.rescore_by_img(query_lf, res)
-        #    res = res_lf if res_lf else res[:k]
-        return res
-
-    def save(self, is_backup=False):
-        self.search_engine.save(is_backup)
-        #self.rescorer.save(is_backup)
--- a/src/BulkSearch.py
+++ b/src/BulkSearch.py
@ -0,0 +1,38 @@
+import requests
+
+from pathlib import Path
+import tqdm
+
+import argparse
+import os
+
+
+IMG_REC_SERVICE = 'http://localhost:8290/bcir/'
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Img Recognition Bulk Analysis')
+    parser.add_argument('src', type=str, help='img src folder path')
+    parser.add_argument('dest', type=str, help='dest file path')
+
+    args = parser.parse_args()
+    src = args.src
+    dest = args.dest
+
+    paths = Path(src).rglob('*.*')
+    paths_list = list(paths)
+
+    print('Analyzing images...')
+    with open(dest, 'w', encoding='UTF8') as f:
+        for path in tqdm.tqdm(paths_list):
+            try:
+                img_file = {'image': (
+                'query', open(os.path.join(path.parent, path.name), 'rb'))}
+                r = requests.post(IMG_REC_SERVICE + 'searchByImg', files=img_file)
+                res = r.json()
+                tmp = ';'.join([str(i) for x in res for i in x])
+                row = path.name + ";" + tmp
+                f.write(row + '\n')
+            except Exception as e:
+                print("cannot process '%s'" % path)
+                print(e)
+                pass
--- a/src/FAISSSearchEngine.py
+++ b/src/FAISSSearchEngine.py
@ -1,5 +1,5 @@
 import numpy as np
-import beniculturaliSettings as settings
+import ImageRecognitionSettings as settings
 import faiss


--- a/src/FeatureExtractor.py
+++ b/src/FeatureExtractor.py
@ -1,5 +1,5 @@
 import numpy as np
-import beniculturaliSettings as settings
+import ImageRecognitionSettings as settings
 import requests


--- a/src/GroundTruthEvaluation.py
+++ b/src/GroundTruthEvaluation.py
@ -0,0 +1,63 @@
+import requests
+
+from pathlib import Path
+import tqdm
+
+import argparse
+import os
+
+
+IMG_REC_SERVICE = 'http://localhost:8290/bcir/'
+
+groundtruth_file = '/media/ssd2/data/swoads/workdir/data/groundtruth_no_ext.txt'
+
+precision_at = [0] * 10
+
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Img Recognition Bulk Analysis')
+    parser.add_argument('src', type=str, help='img src folder path')
+    #parser.add_argument('dest', type=str, help='dest file path')
+
+    args = parser.parse_args()
+    src = args.src
+    #dest = args.dest
+
+    groundtruth = {}
+    with open(groundtruth_file, 'r') as f:
+        for line in f:
+            line = line.rstrip()  # removes trailing whitespace and '\n' chars
+
+            if "," not in line: continue  # skips blanks and comments w/o =
+            if line.startswith("#"): continue  # skips comments which contain =
+
+            k, v = line.split(",", 1)
+            groundtruth[k] = v
+
+    paths = Path(src).rglob('*.*')
+    paths_list = list(paths)
+
+
+    print('Analyzing images...')
+    for path in tqdm.tqdm(paths_list):
+        key = path.name
+        exprected_id = groundtruth[key]
+        print(exprected_id)
+        try:
+            img_file = {'image': (
+            'query', open(os.path.join(path.parent, path.name), 'rb'))}
+            params = {'rescorer':'true'}
+
+            r = requests.post(IMG_REC_SERVICE + 'searchByImg', data=params, files=img_file)
+            res = r.json()
+
+            for i in range (0, len(res)):
+                print(res[i][0])
+                if res[i][0] in exprected_id:
+                    precision_at[i] = precision_at[i] + 1
+        except Exception as e:
+            print("cannot process '%s'" % path)
+            print(e)
+            pass
+    print(precision_at)
--- a/src/ImageRecognitionService.py
+++ b/src/ImageRecognitionService.py
@ -9,8 +9,7 @@ import urllib

 #from BeniCulturaliSearcher import BeniCulturaliSearcher
 from Searcher import Searcher
-from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
-import beniculturaliSettings as settings
+import ImageRecognitionSettings as settings
 import uuid
 import os, os.path
 import tornado.wsgi
@ -66,9 +65,9 @@ def get_res(results, query_url=None):
@app.route('/bcir/searchById')
 def search_by_id():
    id = request.args.get('id')
-    rescorer = False
-    if request.args.get("rescorer") == 'true':
-        rescorer = True
+    rescorer = True
+    if request.args.get("rescorer") == 'false':
+        rescorer = False
    results = searcher.search_by_id(id, settings.k, rescorer)
    query_url = None
    if request.args.get("tohtml") is not None:
@ -84,9 +83,9 @@ def search_by_img():

    file = request.files['image']
    img_file = post_to_file(file)
-    rescorer = False
-    if request.form.get("rescorer") == 'true':
-        rescorer = True
+    rescorer = True
+    if request.form.get("rescorer") == 'false':
+        rescorer = False
    #dest_file = uuid.uuid4().hex + ".jpg"
    #dest_path = settings.logs + "/" + dest_file
    #file.save(dest_path)
@ -103,9 +102,9 @@ def search_by_img():
@app.route('/bcir/searchByURL')
 def search_by_url():
    url = request.args.get('url')
-    rescorer = False
-    if request.args.get("rescorer") == 'true':
-        rescorer = True
+    rescorer = True
+    if request.args.get("rescorer") == 'false':
+        rescorer = False
    img_file = url_to_file(url)
   # query = cv2.imdecode(image, cv2.IMREAD_COLOR)
   # dest_file = uuid.uuid4().hex + ".jpg"
@ -155,6 +154,17 @@ def download_file(filename):

    return send_from_directory(settings.img_folder, filename, as_attachment=False)

+@app.route('/bcir/queries/<path:filename>')
+def queries(filename):
+    print(filename)
+    values = filename.split('/')
+    folder = values[0]
+    name = values[1]
+    print(folder)
+    print(name)
+
+    return send_from_directory(settings.working_folder + '/' + folder, name, as_attachment=False)
+
 """
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Reading configuration file')
--- a/src/ImageRecognitionSettings.py
+++ b/src/ImageRecognitionSettings.py
@ -2,7 +2,7 @@ import json
 import os

 def load_setting(conf_file):
-    global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET1, DATASET2, DATASET_LF_FOLDER, DATASET_IDS, DATASET_IDS_LF
+    global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET_LF_FOLDER, DATASET_IDS, DB_LF

    with open(conf_file) as settings_file:

@ -20,11 +20,9 @@ def load_setting(conf_file):
            os.mkdir(data_folder)

        DATASET = os.path.join(data_folder, 'dataset.npy')
-        #DATASET1 = os.path.join(data_folder, 'dataset_resized.npy')
-        #DATASET2 = os.path.join(data_folder, 'dataset_bw.npy')
        DATASET_LF_FOLDER = os.path.join(data_folder, 'lf')
        DATASET_IDS = os.path.join(data_folder, 'dataset.ids')
-        #DATASET_IDS_LF = os.path.join(data_folder, 'dataset_lf.ids')
+        DB_LF = os.path.join(data_folder, 'sqlite_lf/lf.db')

        img_folder = settings['img_folder']
        logs = os.path.join(working_folder, settings['log_folder'])
--- a/src/LFBulkExtractionToDB.py
+++ b/src/LFBulkExtractionToDB.py
@ -0,0 +1,40 @@
+from pathlib import Path
+import tqdm
+
+import LFUtilities
+import BEBLIDExtractor as lf
+import argparse
+import os
+from LFDB import LFDB
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='LF bulk extraction')
+    parser.add_argument('src', type=str, help='img src folder path')
+    parser.add_argument('dest', type=str, help='LF DB file')
+
+    args = parser.parse_args()
+    src = args.src
+    dest = args.dest
+
+    lf_db = LFDB(dest)
+
+    paths = Path(src).rglob('*.*')
+    paths_list = list(paths)
+
+    print('Extracting lf...')
+    for path in tqdm.tqdm(paths_list):
+        try:
+            kp, des = lf.extract(os.path.join(path.parent, path.name))
+            features = LFUtilities.pickle_keypoints(kp, des)
+            blob = LFUtilities.serialize_object(features)
+            filename = os.path.splitext(path.name)[0]
+            lf_db.put(filename, blob)
+        except Exception as e:
+            print("cannot process '%s'" % path)
+            print(e)
+            pass
+
+    lf_db.commit()
+    lf_db.close()
+    print('lf extracted.')
--- a/src/LFDB.py
+++ b/src/LFDB.py
@ -0,0 +1,55 @@
+import os
+import sqlite3
+from sqlite3 import Error
+from werkzeug.datastructures import FileStorage
+
+
+class LFDB:
+
+    def __init__(self, db_path):
+        # self.lf = LFUtilities.load(settings.DATASET_BEBLID)
+        # self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
+        # self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+        self.conn = sqlite3.connect(db_path, check_same_thread=False)
+
+    def close(self):
+        if self.conn:
+            self.conn.close()
+
+    def put(self, docId, features):
+        try:
+            self.conn.text_factory = str
+            #print("[INFO] : Successful connection!")
+            cur = self.conn.cursor()
+            insert_file = '''INSERT INTO lf(docId, features) VALUES(?, ?)'''
+            cur = self.conn.cursor()
+            cur.execute(insert_file, (docId, features,))
+            #print("[INFO] : The blob for ", docId, " is in the database.")
+        except Error as e:
+            print(e)
+
+    def commit(self):
+        try:
+            if self.conn:
+                self.conn.commit()
+                print("committing...")
+        except Error as e:
+            print(e)
+
+    def get(self, docId):
+        try:
+            self.conn.text_factory = str
+            cur = self.conn.cursor()
+           # print("[INFO] : Connected to SQLite to read_blob_data")
+            sql_fetch_blob_query = """SELECT * from lf where docId = ?"""
+            cur.execute(sql_fetch_blob_query, (docId,))
+            record = cur.fetchall()
+            for row in record:
+                converted_file_name = row[1]
+                blob = row[2]
+                # parse out the file name from converted_file_name
+            cur.close()
+        except sqlite3.Error as error:
+            print("[INFO] : Failed to read blob data from sqlite table", error)
+        return blob
+
--- a/src/LFUtilities.py
+++ b/src/LFUtilities.py
@ -3,6 +3,7 @@ import numpy as np
 import pickle as pickle
 import os

+
 def resize(max_side, img):
    if img.shape[1] > img.shape[0]:
        r = max_side / img.shape[1]
@ -27,6 +28,14 @@ def pickle_keypoints(keypoints, descriptors):
    return temp_array


+def serialize_object(obj):
+    return pickle.dumps(obj)
+
+
+def deserialize_object(serialized_obj):
+    return pickle.loads(serialized_obj)
+
+
 def unpickle_keypoints(array):
    keypoints = []
    descriptors = []
@ -74,3 +83,20 @@ def load_img_lf(lf_path, id):
    data = pickle.load(open(dest_path, "rb"))
    kp, desc = unpickle_keypoints(data)
    return (kp, desc)
+
+
+def load_img_lf_GPU(lf_path, id):
+    dest_folder_name = id[0:3]
+    filename = id + '.dat'
+    dest_folder_path = os.path.join(lf_path, dest_folder_name)
+    dest_path = os.path.join(dest_folder_path, filename)
+    data = pickle.load(open(dest_path, "rb"))
+    kp, desc = unpickle_keypoints(data)
+
+    data_gpu_mat = cv2.cuda_GpuMat(np.zeros((1500,), dtype=int))
+    if len(desc) > 0:
+        data_gpu_mat = cv2.cuda_GpuMat(desc)
+    desc = data_gpu_mat
+
+    return (kp, desc)
+
--- a/src/Searcher.py
+++ b/src/Searcher.py
@ -3,8 +3,9 @@ import numpy as np
 import pickle as pickle

 import LFUtilities
-import beniculturaliSettings as settings
-from BEBLIDRescorer import BEBLIDRescorer
+import ImageRecognitionSettings as settings
+from BEBLIDRescorerDB import BEBLIDRescorerDB
+#from BEBLIDRescorerGPU import BEBLIDRescorerGPU
 from FAISSSearchEngine import FAISSSearchEngine
 import FeatureExtractor as fe
 import BEBLIDExtractor as lf
@ -18,7 +19,7 @@ class Searcher:

        # np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
        self.search_engine = FAISSSearchEngine()
-        self.rescorer = BEBLIDRescorer()
+        self.rescorer = BEBLIDRescorerDB()

    def get_id(self, idx):
        return self.search_engine.get_id(idx)
--- a/src/TestClient.py
+++ b/src/TestClient.py
@ -9,7 +9,7 @@ import urllib


 from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
-import beniculturaliSettings as settings
+import ImageRecognitionSettings as settings
 import uuid
 import requests

--- a/src/extract_lf.sh
+++ b/src/extract_lf.sh
@ -0,0 +1,14 @@
+#!/bin/bash
+IMG_FOLDER=/workspace/workdir
+DATA_FOLDER=/workspace/workdir/data/lf
+
+mkdir $DATA_FOLDER
+
+#if [[ $2 = '-o' ]]; then
+#    echo "deleting existing features"
+
+python3 /workspace/src/LFBulkExtraction4File.py $IMG_FOLDER/$1 $DATA_FOLDER
+
+chmod 777 $DATA_FOLDER/*
+
+echo "Done"
--- a/src/extract_lf_db.sh
+++ b/src/extract_lf_db.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+IMG_FOLDER=/workspace/workdir
+DB_PATH=/workspace/workdir/data/sqlite_lf/lf.db
+
+#if [[ $2 = '-o' ]]; then
+#    echo "deleting existing features"
+
+python3 /workspace/src/LFBulkExtractionToDB.py $IMG_FOLDER/$1 $DB_PATH
+
+chmod 777 $DB_PATH/*
+
+echo "Done"
--- a/src/templates/index_with_randoms.html
+++ b/src/templates/index_with_randoms.html
@ -31,7 +31,7 @@
 					    <td valign="top">
 						<input type="hidden" value="" name="" id="objId">
 						<input type="hidden" value="true" name="tohtml">
-						<input type="text" value="true" name="rescorer">
+						<input type="hidden" value="true" name="rescorer">

 						<input style="display: none;" id="urlToUpload" name="url" type="text" size="49" onclick="" onchange="document.getElementById('queryImage').value=''">
 					    <input  id="imageToUpload" name="image" type="file" size="38" onclick="" onchange="document.getElementById('queryImage').value=''">
--- a/src/templates/search.html
+++ b/src/templates/search.html
@ -36,6 +36,8 @@
 									<td valign="top">
 										<input type="hidden" value="" name="" id="objId">
 										<input type="hidden" value="true" name="tohtml">
+										<input type="hidden" value="true" name="rescorer">
+

 										<input style="display: none;" id="urlToUpload" name="url" type="text" size="49" onclick="" onchange="document.getElementById('queryImage').value=''">
 										<input  id="imageToUpload" name="image" type="file" size="38" onclick="" onchange="document.getElementById('queryImage').value=''">