From aeafcfe21981df2e28553091cd3ba939a64ce474 Mon Sep 17 00:00:00 2001 From: "paolo.bolettieri" Date: Tue, 19 Jul 2022 18:39:50 +0200 Subject: [PATCH] added FAISS Searcher --- src/BEBLIDRescorer.py | 26 +++-- src/FAISSSearchEngine.py | 88 ++++++++++---- src/FAISSSearchEngine2.py | 120 +++++++++++++++++++ src/GFUtilities.py | 36 ++++++ src/ImageRecognitionService.py | 125 ++++++++++++++------ src/ImageRecognitionSettings.py | 10 +- src/LFUtilities.py | 199 ++++++++------------------------ src/Searcher.py | 41 ++++--- src/SearcherParameters.py | 1 - src/TestClient.py | 78 ++++++++----- 10 files changed, 457 insertions(+), 267 deletions(-) create mode 100644 src/FAISSSearchEngine2.py create mode 100644 src/GFUtilities.py diff --git a/src/BEBLIDRescorer.py b/src/BEBLIDRescorer.py index cd105d0..538c9a2 100644 --- a/src/BEBLIDRescorer.py +++ b/src/BEBLIDRescorer.py @@ -5,6 +5,7 @@ import LFUtilities import BEBLIDParameters import ImageRecognitionSettings as settings from line_profiler_pycharm import profile +import logging class BEBLIDRescorer: @@ -16,7 +17,8 @@ class BEBLIDRescorer: def rescore_by_id(self, query_id, resultset): #query_idx = self.ids.index(query_id) - query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id) + query = LFUtilities.unpickle_img_lf(settings.DATASET_LF_FOLDER, query_id) + return self.rescore_by_img(query, resultset) @profile @@ -47,13 +49,13 @@ class BEBLIDRescorer: if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers): max_inliers = inliers res.append((data_id, round(inliers/len(good), 3))) - print(data_id) - print(f'candidate n. {counter}') + logging.info(data_id) + logging.info(f'candidate n. {counter}') #to get just the first candidate break except Exception as e: - print('rescore error evaluating ' + data_id) - print(e) + logging.error('rescore error evaluating ' + data_id) + logging.error(e) pass counter += 1 @@ -61,16 +63,18 @@ class BEBLIDRescorer: res.sort(key=lambda result: result[1], reverse=True) return res - def add(self, kp, des, id): + @staticmethod + def add(doc_id, kp, des): # LFUtilities.save_img_lf(dest, filename, kp, des) # LFUtilities.savez_img_lf(dest, filename, kp, des) - LFUtilities.pickle_img_lf(settings.DATASET_LF_FOLDER, id, kp, des) + LFUtilities.pickle_img_lf(settings.DATASET_LF_FOLDER, doc_id, kp, des) - def remove(self, idx): - self.descs = np.delete(self.descs, idx, axis=0) + @staticmethod + def remove(doc_id): + LFUtilities.delete_img_lf(settings.DATASET_LF_FOLDER, doc_id) def save(self, is_backup=False): - lf_save_file = settings.DATASET_LF + """lf_save_file = settings.DATASET_LF ids_file = settings.DATASET_IDS_LF if lf_save_file != "None": if is_backup: @@ -78,4 +82,4 @@ class BEBLIDRescorer: ids_file += '.bak' LFUtilities.save(lf_save_file, self.lf) - np.savetxt(ids_file, self.ids, fmt='%s') + np.savetxt(ids_file, self.ids, fmt='%s')""" diff --git a/src/FAISSSearchEngine.py b/src/FAISSSearchEngine.py index 71c3048..3a95581 100644 --- a/src/FAISSSearchEngine.py +++ b/src/FAISSSearchEngine.py @@ -1,11 +1,16 @@ import numpy as np import ImageRecognitionSettings as settings import faiss +from shutil import copyfile +import logging +import threading +import glob +import fileinput +import os class FAISSSearchEngine: - def __init__(self): #self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...] @@ -16,32 +21,58 @@ class FAISSSearchEngine: #self.descs = (self.desc1 + self.desc2) / 2 #self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True) - self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist() + #self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist() + self.ids = np.loadtxt(fileinput.input(sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.ids')))) + self.init_faiss_index() + self.lock = threading.Lock() + + def init_faiss_index(self): # create an index with inner product similarity dim = 2048 # dimensionality of the features metric = faiss.METRIC_INNER_PRODUCT self.index = faiss.index_factory(dim, 'Flat', metric) - # add the vectors to the index - self.index.add(self.descs) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index - + for desc_file in sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.npy')): + # add the vectors to the index + tmp = np.load(desc_file) + self.index.add(tmp) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index def get_id(self, idx): return self.ids[idx] + def get_indexed_ids(self): + return self.ids - def add(self, desc, id): - self.ids.append(id) - self.descs = np.vstack((self.descs, desc)) - self.save() - - - def remove(self, id): - idx = self.ids.index(id) - del self.ids[idx] - self.descs = np.delete(self.descs, idx, axis=0) + def add(self, desc, doc_id): + try: + self.lock.acquire() + # self.ids.append(doc_id) + # self.descs = np.vstack((self.descs, desc)) + np.savetxt(os.path.join(settings.DATASET_GF_FOLDER, doc_id + '.ids'), self.ids, fmt='%s') + np.save(os.path.join(settings.DATASET_GF_FOLDER, doc_id + '.npy'), desc) + finally: + self.lock.release() + def remove(self, doc_id): + try: + self.lock.acquire() + id_filename = settings.DATASET_GF_FOLDER, doc_id + '.ids' + numpy_filename = settings.DATASET_GF_FOLDER, doc_id + '.npy' + if os.path.exists(id_filename): + os.remove(id_filename) + if os.path.exists(numpy_filename): + os.remove(numpy_filename) + else: + idx = self.ids.index(doc_id) + del self.ids[idx] + self.descs = np.delete(self.descs, idx, axis=0) + descs_file = settings.DATASET + ids_file = settings.DATASET_IDS + np.save(descs_file, self.descs) + np.savetxt(ids_file, self.ids, fmt='%s') + finally: + self.lock.release() def search_by_id(self, query_id, k=10): query_idx = self.ids.index(query_id) @@ -49,9 +80,9 @@ class FAISSSearchEngine: def search_by_img(self, query, k=10): print('----------query features-------') - print(query) + logging.info(query) queries = np.reshape(query, (-1, 2048)) - print(queries) + logging.debug(queries) scores, indexes = self.index.search(queries, k) #dot_product = np.dot(self.descs, query) #idx = dot_product.argsort()[::-1][:k] @@ -63,10 +94,21 @@ class FAISSSearchEngine: def save(self, is_backup=False): descs_file = settings.DATASET ids_file = settings.DATASET_IDS + try: + self.lock.acquire() + """if is_backup: + descs_file_backup = descs_file +'.bak' + ids_file_backup = ids_file + '.bak' + copyfile(descs_file, descs_file_backup) + copyfile(ids_file, ids_file_backup) + logging.info('Backup features created') - if is_backup: - descs_file += '.bak' - ids_file += '.bak' - - np.save(descs_file, self.descs) - np.savetxt(ids_file, self.ids, fmt='%s') + np.save(descs_file, self.descs) + np.savetxt(ids_file, self.ids, fmt='%s') + logging.info('Storing features done')""" + self.init_faiss_index() + except Exception as e: + logging.error('Error, unable to storing features') + logging.error(e) + finally: + self.lock.release() diff --git a/src/FAISSSearchEngine2.py b/src/FAISSSearchEngine2.py new file mode 100644 index 0000000..15aacfc --- /dev/null +++ b/src/FAISSSearchEngine2.py @@ -0,0 +1,120 @@ +import numpy as np +import ImageRecognitionSettings as settings +import faiss +from shutil import copyfile +import logging +import threading +import glob +import fileinput +import os +import GFUtilities + +class FAISSSearchEngine: + + def __init__(self): + #self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...] + + #np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset) + #self.descs = np.load(settings.DATASET) + #self.desc1 = np.load(settings.DATASET1) + #self.desc2 = np.load(settings.DATASET2) + + #self.descs = (self.desc1 + self.desc2) / 2 + #self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True) + #self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist() + #self.ids = np.loadtxt(fileinput.input(sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.ids')))) + self.lock = threading.Lock() + self.init_faiss_index() + + + def init_faiss_index(self): + # create an index with inner product similarity + dim = 2048 # dimensionality of the features + metric = faiss.METRIC_INNER_PRODUCT + self.index = faiss.index_factory(dim, 'Flat', metric) + self.descs = np.load(settings.DATASET) + self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist() + self.index.add(self.descs) + + for desc_file in sorted(glob.glob(settings.DATASET_GF_FOLDER + '/**/*.dat', recursive=True)): + # add the vectors to the index + img_id = os.path.splitext(os.path.basename(desc_file))[0] + img_desc = GFUtilities.unpickle_img_gf(settings.DATASET_GF_FOLDER, img_id) + self.ids.append(img_id) + self.index.add(img_desc) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index + + + def get_id(self, idx): + return self.ids[idx] + + def get_indexed_ids(self): + return self.ids + + def exists(self, doc_id): + return doc_id in self.ids + + def add(self, desc, doc_id): + try: + self.lock.acquire() + # self.ids.append(doc_id) + # self.descs = np.vstack((self.descs, desc)) + GFUtilities.pickle_img_gf(settings.DATASET_GF_FOLDER, doc_id, desc) + finally: + self.lock.release() + + def remove(self, doc_id): + try: + self.lock.acquire() + if not GFUtilities.delete_img_gf( settings.DATASET_GF_FOLDER, doc_id): + try: + idx = self.ids.index(doc_id) + del self.ids[idx] + self.descs = np.delete(self.descs, idx, axis=0) + descs_file = settings.DATASET + ids_file = settings.DATASET_IDS + np.save(descs_file, self.descs) + np.savetxt(ids_file, self.ids, fmt='%s') + except ValueError as e: + logging.error('Error, unable to retrieve and delete ' + doc_id) + logging.error(e) + finally: + self.lock.release() + + def search_by_id(self, query_id, k=10): + query_idx = self.ids.index(query_id) + return self.search_by_img(self.descs[query_idx], k) + + def search_by_img(self, query, k=10): + print('----------query features-------') + logging.info(query) + queries = np.reshape(query, (-1, 2048)) + logging.debug(queries) + scores, indexes = self.index.search(queries, k) + #dot_product = np.dot(self.descs, query) + #idx = dot_product.argsort()[::-1][:k] + res = [] + for (i,j) in zip(indexes[0], scores[0]): + res.append((self.ids[i], round(float(j), 3))) + return res + + def save(self, is_backup=False): + descs_file = settings.DATASET + ids_file = settings.DATASET_IDS + try: + self.lock.acquire() + """if is_backup: + descs_file_backup = descs_file +'.bak' + ids_file_backup = ids_file + '.bak' + copyfile(descs_file, descs_file_backup) + copyfile(ids_file, ids_file_backup) + logging.info('Backup features created') + + np.save(descs_file, self.descs) + np.savetxt(ids_file, self.ids, fmt='%s') + logging.info('Storing features done')""" + self.init_faiss_index() + except Exception as e: + logging.error('Error, unable to storing features') + logging.error(e) + finally: + self.lock.release() diff --git a/src/GFUtilities.py b/src/GFUtilities.py new file mode 100644 index 0000000..32ff226 --- /dev/null +++ b/src/GFUtilities.py @@ -0,0 +1,36 @@ +import cv2 +import numpy as np +import pickle as pickle +import os + + +def pickle_img_gf(dest, id, descriptors): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + if (not os.path.exists(dest_folder_path)): + os.mkdir(dest_folder_path) + dest_path = os.path.join(dest_folder_path, filename) + pickle.dump(descriptors, open(dest_path, 'wb')) + + +def unpickle_img_gf(dest, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + des = pickle.load((open(dest_path, "rb"))) + return des + + +def delete_img_gf(dest, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + if os.path.exists(dest_path): + os.remove(dest_path) + if not os.listdir(dest_folder_path): + os.rmdir(dest_folder_path) + return True + return False diff --git a/src/ImageRecognitionService.py b/src/ImageRecognitionService.py index 4b3e89f..a658d25 100644 --- a/src/ImageRecognitionService.py +++ b/src/ImageRecognitionService.py @@ -15,6 +15,8 @@ import os, os.path import tornado.wsgi import tornado.httpserver import argparse +import logging +import base64 app = Flask(__name__) @@ -65,13 +67,9 @@ def get_res(results, query_url=None): @app.route('/bcir/searchById') def search_by_id(): id = request.args.get('id') - try: - searchDeepLevel = int(request.args.get("searchDeepLevel")) - except Exception as e: - print(e) - print('Setting default deep level 1') - search_deep_level = 1 - results = searcher.search_by_id(id, settings.k, searchDeepLevel) + k, threshold, search_deep_level = get_parameters(request.args.get("k"), request.args.get("threshold"), + request.args.get("searchDeepLevel")) + results = searcher.search_by_id(id, k, threshold, search_deep_level) query_url = None if request.args.get("tohtml") is not None: query_url = id + ".jpg" @@ -86,12 +84,8 @@ def search_by_img(): file = request.files['image'] img_file = post_to_file(file) - try: - search_deep_level = int(request.form.get("searchDeepLevel")) - except Exception as e: - print(e) - print('Setting default deep level 1') - search_deep_level = 1 + + k, threshold, search_deep_level = get_parameters(request.form.get("k"), request.form.get("threshold"), request.form.get("searchDeepLevel")) #dest_file = uuid.uuid4().hex + ".jpg" #dest_path = settings.logs + "/" + dest_file @@ -99,24 +93,48 @@ def search_by_img(): #files = {'image': (dest_file, open(dest_path, 'rb'))} #r = requests.post(settings.rmac_service, files=files) #results = search_engine.search_by_img(np.array(r.json()), settings.k) - results = searcher.search_by_img(img_file, settings.k, search_deep_level) + results = searcher.search_by_img(img_file, k, threshold, search_deep_level) query_url = None if request.form.get("tohtml") is not None: query_url = "" return get_res(results, query_url) +@app.route('/bcir/searchByImgB64', methods=['POST']) +def search_by_img_base64(): + image = request.form.get('image') + if image: + img_file = base64_to_file(image) + else: + flash('No img sent') + return redirect(request.url) + + k, threshold, search_deep_level = get_parameters(request.form.get("k"), request.form.get("threshold"), request.form.get("searchDeepLevel")) + + results = searcher.search_by_img(img_file, k, threshold, search_deep_level) + query_url = None + if request.form.get("tohtml") is not None: + query_url = "" + return get_res(results, query_url) + + +def base64_to_file(image_base64): + ext = ".png" + dest_file = uuid.uuid4().hex + ext + dest_path = settings.logs + "/" + dest_file + with open(dest_path, "wb") as image_file: + byte_content = base64.b64decode(image_base64) + image_file.write(byte_content) + return dest_path + @app.route('/bcir/searchByURL') def search_by_url(): url = request.args.get('url') - try: - search_deep_level = int(request.args.get("searchDeepLevel")) - except Exception as e: - print(e) - print('Setting default deep level 1') - search_deep_level = 1 img_file = url_to_file(url) + + k, threshold, search_deep_level = get_parameters(request.args.get("k"), request.args.get("threshold"), + request.args.get("searchDeepLevel")) # query = cv2.imdecode(image, cv2.IMREAD_COLOR) # dest_file = uuid.uuid4().hex + ".jpg" # dest_path = settings.logs + "/" + dest_file @@ -124,12 +142,49 @@ def search_by_url(): # files = {'image': open(dest_path, 'rb')} # r = requests.post(settings.rmac_service, files=files) # results = search_engine.search_by_img(np.array(r.json()), settings.k) - results = searcher.search_by_img(img_file, settings.k, search_deep_level) + results = searcher.search_by_img(img_file, k, threshold, search_deep_level) query_url = None if request.args.get("tohtml") is not None: query_url = url return get_res(results, query_url) + +def get_parameters(k, threshold, search_deep_level): + try: + threshold = float(threshold) + except Exception as e: + logging.error(e) + threshold = settings.SEARCH_THRESHOLD + logging.error('Setting default threshold value to ' + str(threshold)) + try: + k = int(k) + except Exception as e: + logging.error(e) + k = settings.k + logging.error('Setting default k value to ' + str(k)) + + try: + search_deep_level = int(search_deep_level) + except Exception as e: + logging.error(e) + search_deep_level = settings.SEARCH_DEEP_LEVEL + logging.error('Setting default deep level to ' + str(search_deep_level)) + + return k, threshold, search_deep_level + + +@app.route('/bcir/getIds') +def get_indexed_ids(): + json_ids = json.dumps(searcher.get_indexed_ids()) + return json_ids + + +@app.route('/bcir/exists') +def exists(): + doc_id = request.args.get('id') + return json.dumps(searcher.exists(doc_id)) + + @app.route('/bcir/addImg', methods=['POST']) def add_img(): if 'image' not in request.files: @@ -137,42 +192,46 @@ def add_img(): return redirect(request.url) try: file = request.files['image'] - id = request.files['image'].filename - id, _ = os.path.splitext(id) + doc_id = request.files['image'].filename + doc_id, _ = os.path.splitext(doc_id) img_file = post_to_file(file) - searcher.add(img_file, id) + searcher.add(img_file, doc_id) json_res = json.dumps("done") return json_res - except: + except Exception as e: + logging.error('Unable to add ' + doc_id + 'to the index') + logging.error(e) abort(500) @app.route('/bcir/rmImg') def remove_img(): try: - id = request.args.get('id') - searcher.remove(id) + doc_id = request.args.get('id') + searcher.remove(doc_id) json_res = json.dumps("done") return json_res - except: + except Exception as e: + logging.error('Unable to remove ' + doc_id + 'to the index') + logging.error(e) abort(500) @app.route('/bcir/') def download_file(filename): - print(filename) + logging.debug(filename) values = filename.split('/') - print(values) + logging.debug(values) return send_from_directory(settings.img_folder, filename, as_attachment=False) @app.route('/bcir/queries/') def queries(filename): - print(filename) + logging.debug(filename) values = filename.split('/') folder = values[0] name = values[1] - print(folder) - print(name) + logging.debug(folder) + logging.debug(name) return send_from_directory(settings.working_folder + '/' + folder, name, as_attachment=False) diff --git a/src/ImageRecognitionSettings.py b/src/ImageRecognitionSettings.py index c9e2cee..e019621 100644 --- a/src/ImageRecognitionSettings.py +++ b/src/ImageRecognitionSettings.py @@ -2,7 +2,7 @@ import json import os def load_setting(conf_file): - global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET_LF_FOLDER, DATASET_IDS, DB_LF + global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET_LF_FOLDER, DATASET_GF_FOLDER, DATASET, DATASET_IDS, DB_LF, SEARCH_THRESHOLD, SEARCH_DEEP_LEVEL with open(conf_file) as settings_file: @@ -19,10 +19,14 @@ def load_setting(conf_file): if not os.path.isdir(data_folder): os.mkdir(data_folder) - DATASET = os.path.join(data_folder, 'dataset.npy') DATASET_LF_FOLDER = os.path.join(data_folder, 'lf') - DATASET_IDS = os.path.join(data_folder, 'dataset.ids') + DATASET_GF_FOLDER = os.path.join(data_folder, 'gf') + DATASET = os.path.join(DATASET_GF_FOLDER, 'dataset.npy') + DATASET_IDS = os.path.join(DATASET_GF_FOLDER, 'dataset.ids') DB_LF = os.path.join(data_folder, 'sqlite_lf/lf.db') + SEARCH_THRESHOLD = settings['search_th'] + SEARCH_DEEP_LEVEL = settings['search_dl'] + img_folder = settings['img_folder'] logs = os.path.join(working_folder, settings['log_folder']) diff --git a/src/LFUtilities.py b/src/LFUtilities.py index 5612505..19f40e4 100644 --- a/src/LFUtilities.py +++ b/src/LFUtilities.py @@ -1,151 +1,48 @@ -import cv2 -import numpy as np -import pickle as pickle -import os -from line_profiler_pycharm import profile - -def resize(max_side, img): - if img.shape[1] > img.shape[0]: - r = max_side / img.shape[1] - dim = (max_side, int(img.shape[0] * r)) - else: - r = max_side / img.shape[0] - dim = (int(img.shape[1] * r), max_side) - - # perform the actual resizing of the image and show it - resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA) - return resized - - -def pickle_keypoints(keypoints, descriptors): - i = 0 - temp_array = [] - for point in keypoints: - temp = (point.pt, point.size, point.angle, point.response, point.octave, - point.class_id, descriptors[i]) - i += 1 - temp_array.append(temp) - return temp_array - - -def serialize_object(obj): - return pickle.dumps(obj) - - -def deserialize_object(serialized_obj): - return pickle.loads(serialized_obj) - - -def serializeV1(keypoints, descriptors): - temp_array = [] - for point in keypoints: - kp = [point.pt, point.size, point.angle, point.response, point.octave, point.class_id] - temp_array.append(kp) - return temp_array, descriptors - - -def serialize(keypoints, descriptors): - pts = np.float32([keypoints[i].pt for i in range(0, len(keypoints))]) - return pts, descriptors - -def deserialize(ser_kp, ser_des): - keypoints = [] - #data_list = array.tolist() - for point in ser_kp: - temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5]) - keypoints.append(temp_feature) - return keypoints, ser_des - - -def deserializev1(ser_kp, ser_des): - keypoints = [] - #data_list = array.tolist() - for point in ser_kp: - temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5]) - keypoints.append(temp_feature) - return keypoints, ser_des - -def pickle_img_lf(dest, id, keypoints, descriptors): - dest_folder_name = id[0:3] - filename = id + '.dat' - dest_folder_path = os.path.join(dest, dest_folder_name) - if (not os.path.exists(dest_folder_path)): - os.mkdir(dest_folder_path) - dest_path = os.path.join(dest_folder_path, filename) - kps, des = serialize(keypoints, descriptors) - pickle.dump([kps, des], open(dest_path, 'wb')) - -def delete_img_lf(dest, id): - dest_folder_name = id[0:3] - filename = id + '.dat' - dest_folder_path = os.path.join(dest, dest_folder_name) - dest_path = os.path.join(dest_folder_path, filename) - if os.path.exists(dest_path): - os.remove(dest_path) - -@profile -def unpickle_img_lf(lf_path, id): - dest_folder_name = id[0:3] - filename = id + '.dat' - dest_folder_path = os.path.join(lf_path, dest_folder_name) - dest_path = os.path.join(dest_folder_path, filename) - kps, des = pickle.load((open(dest_path, "rb"))) - return kps, des - - -@profile -def loadz_img_lf(lf_path, id): - dest_folder_name = id[0:3] - filename = id + '.dat.npz' - dest_folder_path = os.path.join(lf_path, dest_folder_name) - dest_path = os.path.join(dest_folder_path, filename) - data = np.load(dest_path, allow_pickle=False) - kps = data.f.kps - des = data.f.des - #kps = data['kps'] - #des = data['des'] - #kp, desc = deserialize(data['kps'], data['des']) - return kps, des - - -def savez_img_lf(dest, id, keypoints, descriptors): - dest_folder_name = id[0:3] - filename = id + '.dat' - dest_folder_path = os.path.join(dest, dest_folder_name) - if (not os.path.exists(dest_folder_path)): - os.mkdir(dest_folder_path) - dest_path = os.path.join(dest_folder_path, filename) - kps, des = serialize(keypoints, descriptors) - #np.savez(dest_path, data) - np.savez(dest_path, kps=kps, des=des) - - -@profile -def loadz_img_lf(lf_path, id): - dest_folder_name = id[0:3] - filename = id + '.dat.npz' - dest_folder_path = os.path.join(lf_path, dest_folder_name) - dest_path = os.path.join(dest_folder_path, filename) - data = np.load(dest_path, allow_pickle=False) - kps = data.f.kps - des = data.f.des - #kps = data['kps'] - #des = data['des'] - #kp, desc = deserialize(data['kps'], data['des']) - return kps, des - - -def unpickle_keypoints(array): - keypoints = [] - descriptors = [] - data_list = array.tolist() - for point in array: - temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5]) - temp_descriptor = point[6] - keypoints.append(temp_feature) - descriptors.append(temp_descriptor) - return keypoints, np.array(descriptors) - - - - +import cv2 +import numpy as np +import pickle as pickle +import os + + +def resize(max_side, img): + if img.shape[1] > img.shape[0]: + r = max_side / img.shape[1] + dim = (max_side, int(img.shape[0] * r)) + else: + r = max_side / img.shape[0] + dim = (int(img.shape[1] * r), max_side) + + # perform the actual resizing of the image and show it + resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA) + return resized + + +def pickle_img_lf(dest, id, keypoints, descriptors): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + if (not os.path.exists(dest_folder_path)): + os.mkdir(dest_folder_path) + dest_path = os.path.join(dest_folder_path, filename) + kps = np.float32([keypoints[i].pt for i in range(0, len(keypoints))]) + pickle.dump([kps, descriptors], open(dest_path, 'wb')) + + +def unpickle_img_lf(lf_path, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(lf_path, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + kps, des = pickle.load((open(dest_path, "rb"))) + return kps, des + + +def delete_img_lf(dest, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + if os.path.exists(dest_path): + os.remove(dest_path) + if not os.listdir(dest_folder_path): + os.rmdir(dest_folder_path) diff --git a/src/Searcher.py b/src/Searcher.py index 2260b1a..dd053e8 100644 --- a/src/Searcher.py +++ b/src/Searcher.py @@ -9,11 +9,11 @@ import ImageRecognitionSettings as settings from BEBLIDRescorer import BEBLIDRescorer import SearcherParameters -from FAISSSearchEngine import FAISSSearchEngine +from FAISSSearchEngine2 import FAISSSearchEngine import FeatureExtractor as fe import BEBLIDExtractorQ as lfQ import BEBLIDExtractorD as lfD -from line_profiler_pycharm import profile +import logging class Searcher: @@ -25,31 +25,34 @@ class Searcher: self.search_engine = FAISSSearchEngine() self.rescorer = BEBLIDRescorer() + def get_indexed_ids(self): + return self.search_engine.get_indexed_ids() + def get_id(self, idx): return self.search_engine.get_id(idx) - def add(self, img_file, id): - self.save(True) + def exists(self, doc_id): + return self.search_engine.exists(doc_id) + def add(self, img_file, doc_id): desc = fe.extract(img_file) - self.search_engine.add(desc, id) + self.search_engine.add(desc, doc_id) - lf = lfD.extract(img_file) - self.rescorer.add(lf, id) + kp, des = lfD.extract(img_file) + self.rescorer.add(doc_id, kp, des) #orb = lf.extract(img_file) - self.save() - print('added ' + id) - - def remove(self, id): self.save(True) - self.search_engine.remove(id) - #self.rescorer.remove(idx) - self.save() - print('removed ' + id) + logging.info('added ' + doc_id) - def search_by_id(self, query_id, k=10, search_deep_level=1): + def remove(self, doc_id): + self.search_engine.remove(doc_id) + self.rescorer.remove(doc_id) + self.save(True) + logging.info('removed ' + doc_id) + + def search_by_id(self, query_id, k=10, search_threshold=0.25, search_deep_level=1): kq = k if search_deep_level > 0: kq = SearcherParameters.SEARCH_DEEP_K[search_deep_level] @@ -57,10 +60,10 @@ class Searcher: if search_deep_level > 0: res_lf = self.rescorer.rescore_by_id(query_id, res) res = res_lf if res_lf else res[:k] + res = [result for result in res if result[1] >= search_threshold] return res - @profile - def search_by_img(self, query_img, k=10, search_deep_level=1): + def search_by_img(self, query_img, k=10, search_threshold=0.25, search_deep_level=1): kq = k if search_deep_level: kq = SearcherParameters.SEARCH_DEEP_K[search_deep_level] @@ -71,7 +74,7 @@ class Searcher: res_lf = self.rescorer.rescore_by_img(query_lf, res) #res = res_lf if res_lf else res[:k] res = res_lf if res_lf else res[:k] - res = [result for result in res if result[1] >= SearcherParameters.GEM_THRESHOLD] + res = [result for result in res if result[1] >= search_threshold] return res def save(self, is_backup=False): diff --git a/src/SearcherParameters.py b/src/SearcherParameters.py index 2595672..b16ac90 100644 --- a/src/SearcherParameters.py +++ b/src/SearcherParameters.py @@ -1,2 +1 @@ SEARCH_DEEP_K = [0, 1000, 2000, 5000, 10000, 30000, 100000] -GEM_THRESHOLD = 0.25 diff --git a/src/TestClient.py b/src/TestClient.py index b9045f6..8c3d2d3 100644 --- a/src/TestClient.py +++ b/src/TestClient.py @@ -1,47 +1,73 @@ -from flask import Flask, request, redirect, url_for, flash, render_template, send_from_directory -from random import randint -import cv2 -import io -import numpy as np -import json - -import urllib - - -from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine -import ImageRecognitionSettings as settings -import uuid import requests +import base64 -import os, os.path +BASE_URL = 'http://bilioso.isti.cnr.it:8290/bcir/' -BASE_URL = 'http://bilioso.isti.cnr.it:8190/bcir/' -payload = {'id': '54b019e5ed5082b0938b14c4-IMG357781'} +# ------Get indexed IDS------ +payload = {'id': '54b3298aed5082b093939ea1-IMG881380', 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1} +r = requests.get(BASE_URL + 'getIds') +print(r.json()) + +# ------Check if an ID exists------ +payload = {'id': '54b3298aed5082b093939ea1-IMG881380'} +r = requests.get(BASE_URL + 'exists', params=payload) +print(r.json()) + +# ------Searching by ID------ +print('Searching by ID') +payload = {'id': '54b3298aed5082b093939ea1-IMG881380', 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1} r = requests.get(BASE_URL + 'searchById', params=payload) print(r.json()) -files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))} -r = requests.post(BASE_URL + 'searchByImg', files=files) +# ------Searching by IMG------ +print('Searching by IMG') +files = {'image': ('query', open('/media/data2/data/swoads/workdir/img/ImmaginiComparazioni/ACC130111[1].jpg', 'rb'))} +data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1} +r = requests.post(BASE_URL + 'searchByImg', data=data, files=files) print(r.json()) -payload = {'url': 'http://bilioso.isti.cnr.it:8190/bcir/54b019e5ed5082b0938b14c4-IMG357781.jpg'} +# ------Searching by IMG Basa64------ +print('Searching by IMG Base64') +with open('/media/data2/data/swoads/workdir/img/ImmaginiComparazioni/ACC130111[1].jpg', "rb") as img_file: + b64_encoding = base64.b64encode(img_file.read()) +data = {'image':b64_encoding, 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1} +r = requests.post(BASE_URL + 'searchByImgB64', data=data, files=files) +print(r.json()) + +# ------Searching by URL------ +print('Searching by URL') +payload = {'url': 'http://bilioso.isti.cnr.it:8290/bcir/54b3298aed5082b093939ea1-IMG881380.jpg', 'k': 10, + 'threshold': 0.25, 'searchDeepLevel': 1} r = requests.get(BASE_URL + 'searchByURL', params=payload) print(r.json()) -files = {'image': ('prova', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))} -#files = {'image': ('prova', open('/media/Data/data/beni_culturali/deploy/dataset_ids.bak', 'rb'))} +# ------Adding newIMG------ +print('Adding newIMG') +files = {'image': ( +'newIMG', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))} r = requests.post(BASE_URL + 'addImg', files=files) s = r.json() print(r.json()) -files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))} -r = requests.post(BASE_URL + 'searchByImg', files=files) +# ------Searching by newIMG------ +print('Searching by newIMG') +files = {'image': ( +'query', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))} +data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1} + +r = requests.post(BASE_URL + 'searchByImg', data=data, files=files) print(r.json()) -payload = {'id': 'prova'} +# ------Removing newIMG------ +print('Removing newIMG') +payload = {'id': 'newIMG'} r = requests.get(BASE_URL + 'rmImg', params=payload) print(r.json()) -files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))} -r = requests.post(BASE_URL + 'searchByImg', files=files) +# ------Searching by newIMG now removed from the index------ +print('Searching by newIMG now removed from the index') +files = {'image': ( +'query', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))} +data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1} +r = requests.post(BASE_URL + 'searchByImg', data=data, files=files) print(r.json())