added FAISS Searcher

This commit is contained in:
Paolo Bolettieri 2022-07-19 18:39:50 +02:00
parent de8bc9a70c
commit aeafcfe219
10 changed files with 457 additions and 267 deletions

View File

@ -5,6 +5,7 @@ import LFUtilities
import BEBLIDParameters import BEBLIDParameters
import ImageRecognitionSettings as settings import ImageRecognitionSettings as settings
from line_profiler_pycharm import profile from line_profiler_pycharm import profile
import logging
class BEBLIDRescorer: class BEBLIDRescorer:
@ -16,7 +17,8 @@ class BEBLIDRescorer:
def rescore_by_id(self, query_id, resultset): def rescore_by_id(self, query_id, resultset):
#query_idx = self.ids.index(query_id) #query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id) query = LFUtilities.unpickle_img_lf(settings.DATASET_LF_FOLDER, query_id)
return self.rescore_by_img(query, resultset) return self.rescore_by_img(query, resultset)
@profile @profile
@ -47,13 +49,13 @@ class BEBLIDRescorer:
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers): if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3))) res.append((data_id, round(inliers/len(good), 3)))
print(data_id) logging.info(data_id)
print(f'candidate n. {counter}') logging.info(f'candidate n. {counter}')
#to get just the first candidate #to get just the first candidate
break break
except Exception as e: except Exception as e:
print('rescore error evaluating ' + data_id) logging.error('rescore error evaluating ' + data_id)
print(e) logging.error(e)
pass pass
counter += 1 counter += 1
@ -61,16 +63,18 @@ class BEBLIDRescorer:
res.sort(key=lambda result: result[1], reverse=True) res.sort(key=lambda result: result[1], reverse=True)
return res return res
def add(self, kp, des, id): @staticmethod
def add(doc_id, kp, des):
# LFUtilities.save_img_lf(dest, filename, kp, des) # LFUtilities.save_img_lf(dest, filename, kp, des)
# LFUtilities.savez_img_lf(dest, filename, kp, des) # LFUtilities.savez_img_lf(dest, filename, kp, des)
LFUtilities.pickle_img_lf(settings.DATASET_LF_FOLDER, id, kp, des) LFUtilities.pickle_img_lf(settings.DATASET_LF_FOLDER, doc_id, kp, des)
def remove(self, idx): @staticmethod
self.descs = np.delete(self.descs, idx, axis=0) def remove(doc_id):
LFUtilities.delete_img_lf(settings.DATASET_LF_FOLDER, doc_id)
def save(self, is_backup=False): def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF """lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None": if lf_save_file != "None":
if is_backup: if is_backup:
@ -78,4 +82,4 @@ class BEBLIDRescorer:
ids_file += '.bak' ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf) LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s') np.savetxt(ids_file, self.ids, fmt='%s')"""

View File

@ -1,11 +1,16 @@
import numpy as np import numpy as np
import ImageRecognitionSettings as settings import ImageRecognitionSettings as settings
import faiss import faiss
from shutil import copyfile
import logging
import threading
import glob
import fileinput
import os
class FAISSSearchEngine: class FAISSSearchEngine:
def __init__(self): def __init__(self):
#self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...] #self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
@ -16,32 +21,58 @@ class FAISSSearchEngine:
#self.descs = (self.desc1 + self.desc2) / 2 #self.descs = (self.desc1 + self.desc2) / 2
#self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True) #self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True)
self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist() #self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
self.ids = np.loadtxt(fileinput.input(sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.ids'))))
self.init_faiss_index()
self.lock = threading.Lock()
def init_faiss_index(self):
# create an index with inner product similarity # create an index with inner product similarity
dim = 2048 # dimensionality of the features dim = 2048 # dimensionality of the features
metric = faiss.METRIC_INNER_PRODUCT metric = faiss.METRIC_INNER_PRODUCT
self.index = faiss.index_factory(dim, 'Flat', metric) self.index = faiss.index_factory(dim, 'Flat', metric)
# add the vectors to the index for desc_file in sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.npy')):
self.index.add(self.descs) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index # add the vectors to the index
tmp = np.load(desc_file)
self.index.add(tmp) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index
def get_id(self, idx): def get_id(self, idx):
return self.ids[idx] return self.ids[idx]
def get_indexed_ids(self):
return self.ids
def add(self, desc, id): def add(self, desc, doc_id):
self.ids.append(id) try:
self.descs = np.vstack((self.descs, desc)) self.lock.acquire()
self.save() # self.ids.append(doc_id)
# self.descs = np.vstack((self.descs, desc))
np.savetxt(os.path.join(settings.DATASET_GF_FOLDER, doc_id + '.ids'), self.ids, fmt='%s')
def remove(self, id): np.save(os.path.join(settings.DATASET_GF_FOLDER, doc_id + '.npy'), desc)
idx = self.ids.index(id) finally:
del self.ids[idx] self.lock.release()
self.descs = np.delete(self.descs, idx, axis=0)
def remove(self, doc_id):
try:
self.lock.acquire()
id_filename = settings.DATASET_GF_FOLDER, doc_id + '.ids'
numpy_filename = settings.DATASET_GF_FOLDER, doc_id + '.npy'
if os.path.exists(id_filename):
os.remove(id_filename)
if os.path.exists(numpy_filename):
os.remove(numpy_filename)
else:
idx = self.ids.index(doc_id)
del self.ids[idx]
self.descs = np.delete(self.descs, idx, axis=0)
descs_file = settings.DATASET
ids_file = settings.DATASET_IDS
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')
finally:
self.lock.release()
def search_by_id(self, query_id, k=10): def search_by_id(self, query_id, k=10):
query_idx = self.ids.index(query_id) query_idx = self.ids.index(query_id)
@ -49,9 +80,9 @@ class FAISSSearchEngine:
def search_by_img(self, query, k=10): def search_by_img(self, query, k=10):
print('----------query features-------') print('----------query features-------')
print(query) logging.info(query)
queries = np.reshape(query, (-1, 2048)) queries = np.reshape(query, (-1, 2048))
print(queries) logging.debug(queries)
scores, indexes = self.index.search(queries, k) scores, indexes = self.index.search(queries, k)
#dot_product = np.dot(self.descs, query) #dot_product = np.dot(self.descs, query)
#idx = dot_product.argsort()[::-1][:k] #idx = dot_product.argsort()[::-1][:k]
@ -63,10 +94,21 @@ class FAISSSearchEngine:
def save(self, is_backup=False): def save(self, is_backup=False):
descs_file = settings.DATASET descs_file = settings.DATASET
ids_file = settings.DATASET_IDS ids_file = settings.DATASET_IDS
try:
self.lock.acquire()
"""if is_backup:
descs_file_backup = descs_file +'.bak'
ids_file_backup = ids_file + '.bak'
copyfile(descs_file, descs_file_backup)
copyfile(ids_file, ids_file_backup)
logging.info('Backup features created')
if is_backup: np.save(descs_file, self.descs)
descs_file += '.bak' np.savetxt(ids_file, self.ids, fmt='%s')
ids_file += '.bak' logging.info('Storing features done')"""
self.init_faiss_index()
np.save(descs_file, self.descs) except Exception as e:
np.savetxt(ids_file, self.ids, fmt='%s') logging.error('Error, unable to storing features')
logging.error(e)
finally:
self.lock.release()

120
src/FAISSSearchEngine2.py Normal file
View File

@ -0,0 +1,120 @@
import numpy as np
import ImageRecognitionSettings as settings
import faiss
from shutil import copyfile
import logging
import threading
import glob
import fileinput
import os
import GFUtilities
class FAISSSearchEngine:
def __init__(self):
#self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
#np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
#self.descs = np.load(settings.DATASET)
#self.desc1 = np.load(settings.DATASET1)
#self.desc2 = np.load(settings.DATASET2)
#self.descs = (self.desc1 + self.desc2) / 2
#self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True)
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
#self.ids = np.loadtxt(fileinput.input(sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.ids'))))
self.lock = threading.Lock()
self.init_faiss_index()
def init_faiss_index(self):
# create an index with inner product similarity
dim = 2048 # dimensionality of the features
metric = faiss.METRIC_INNER_PRODUCT
self.index = faiss.index_factory(dim, 'Flat', metric)
self.descs = np.load(settings.DATASET)
self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
self.index.add(self.descs)
for desc_file in sorted(glob.glob(settings.DATASET_GF_FOLDER + '/**/*.dat', recursive=True)):
# add the vectors to the index
img_id = os.path.splitext(os.path.basename(desc_file))[0]
img_desc = GFUtilities.unpickle_img_gf(settings.DATASET_GF_FOLDER, img_id)
self.ids.append(img_id)
self.index.add(img_desc) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index
def get_id(self, idx):
return self.ids[idx]
def get_indexed_ids(self):
return self.ids
def exists(self, doc_id):
return doc_id in self.ids
def add(self, desc, doc_id):
try:
self.lock.acquire()
# self.ids.append(doc_id)
# self.descs = np.vstack((self.descs, desc))
GFUtilities.pickle_img_gf(settings.DATASET_GF_FOLDER, doc_id, desc)
finally:
self.lock.release()
def remove(self, doc_id):
try:
self.lock.acquire()
if not GFUtilities.delete_img_gf( settings.DATASET_GF_FOLDER, doc_id):
try:
idx = self.ids.index(doc_id)
del self.ids[idx]
self.descs = np.delete(self.descs, idx, axis=0)
descs_file = settings.DATASET
ids_file = settings.DATASET_IDS
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')
except ValueError as e:
logging.error('Error, unable to retrieve and delete ' + doc_id)
logging.error(e)
finally:
self.lock.release()
def search_by_id(self, query_id, k=10):
query_idx = self.ids.index(query_id)
return self.search_by_img(self.descs[query_idx], k)
def search_by_img(self, query, k=10):
print('----------query features-------')
logging.info(query)
queries = np.reshape(query, (-1, 2048))
logging.debug(queries)
scores, indexes = self.index.search(queries, k)
#dot_product = np.dot(self.descs, query)
#idx = dot_product.argsort()[::-1][:k]
res = []
for (i,j) in zip(indexes[0], scores[0]):
res.append((self.ids[i], round(float(j), 3)))
return res
def save(self, is_backup=False):
descs_file = settings.DATASET
ids_file = settings.DATASET_IDS
try:
self.lock.acquire()
"""if is_backup:
descs_file_backup = descs_file +'.bak'
ids_file_backup = ids_file + '.bak'
copyfile(descs_file, descs_file_backup)
copyfile(ids_file, ids_file_backup)
logging.info('Backup features created')
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')
logging.info('Storing features done')"""
self.init_faiss_index()
except Exception as e:
logging.error('Error, unable to storing features')
logging.error(e)
finally:
self.lock.release()

36
src/GFUtilities.py Normal file
View File

@ -0,0 +1,36 @@
import cv2
import numpy as np
import pickle as pickle
import os
def pickle_img_gf(dest, id, descriptors):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
if (not os.path.exists(dest_folder_path)):
os.mkdir(dest_folder_path)
dest_path = os.path.join(dest_folder_path, filename)
pickle.dump(descriptors, open(dest_path, 'wb'))
def unpickle_img_gf(dest, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
des = pickle.load((open(dest_path, "rb")))
return des
def delete_img_gf(dest, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
if os.path.exists(dest_path):
os.remove(dest_path)
if not os.listdir(dest_folder_path):
os.rmdir(dest_folder_path)
return True
return False

View File

@ -15,6 +15,8 @@ import os, os.path
import tornado.wsgi import tornado.wsgi
import tornado.httpserver import tornado.httpserver
import argparse import argparse
import logging
import base64
app = Flask(__name__) app = Flask(__name__)
@ -65,13 +67,9 @@ def get_res(results, query_url=None):
@app.route('/bcir/searchById') @app.route('/bcir/searchById')
def search_by_id(): def search_by_id():
id = request.args.get('id') id = request.args.get('id')
try: k, threshold, search_deep_level = get_parameters(request.args.get("k"), request.args.get("threshold"),
searchDeepLevel = int(request.args.get("searchDeepLevel")) request.args.get("searchDeepLevel"))
except Exception as e: results = searcher.search_by_id(id, k, threshold, search_deep_level)
print(e)
print('Setting default deep level 1')
search_deep_level = 1
results = searcher.search_by_id(id, settings.k, searchDeepLevel)
query_url = None query_url = None
if request.args.get("tohtml") is not None: if request.args.get("tohtml") is not None:
query_url = id + ".jpg" query_url = id + ".jpg"
@ -86,12 +84,8 @@ def search_by_img():
file = request.files['image'] file = request.files['image']
img_file = post_to_file(file) img_file = post_to_file(file)
try:
search_deep_level = int(request.form.get("searchDeepLevel")) k, threshold, search_deep_level = get_parameters(request.form.get("k"), request.form.get("threshold"), request.form.get("searchDeepLevel"))
except Exception as e:
print(e)
print('Setting default deep level 1')
search_deep_level = 1
#dest_file = uuid.uuid4().hex + ".jpg" #dest_file = uuid.uuid4().hex + ".jpg"
#dest_path = settings.logs + "/" + dest_file #dest_path = settings.logs + "/" + dest_file
@ -99,24 +93,48 @@ def search_by_img():
#files = {'image': (dest_file, open(dest_path, 'rb'))} #files = {'image': (dest_file, open(dest_path, 'rb'))}
#r = requests.post(settings.rmac_service, files=files) #r = requests.post(settings.rmac_service, files=files)
#results = search_engine.search_by_img(np.array(r.json()), settings.k) #results = search_engine.search_by_img(np.array(r.json()), settings.k)
results = searcher.search_by_img(img_file, settings.k, search_deep_level) results = searcher.search_by_img(img_file, k, threshold, search_deep_level)
query_url = None query_url = None
if request.form.get("tohtml") is not None: if request.form.get("tohtml") is not None:
query_url = "" query_url = ""
return get_res(results, query_url) return get_res(results, query_url)
@app.route('/bcir/searchByImgB64', methods=['POST'])
def search_by_img_base64():
image = request.form.get('image')
if image:
img_file = base64_to_file(image)
else:
flash('No img sent')
return redirect(request.url)
k, threshold, search_deep_level = get_parameters(request.form.get("k"), request.form.get("threshold"), request.form.get("searchDeepLevel"))
results = searcher.search_by_img(img_file, k, threshold, search_deep_level)
query_url = None
if request.form.get("tohtml") is not None:
query_url = ""
return get_res(results, query_url)
def base64_to_file(image_base64):
ext = ".png"
dest_file = uuid.uuid4().hex + ext
dest_path = settings.logs + "/" + dest_file
with open(dest_path, "wb") as image_file:
byte_content = base64.b64decode(image_base64)
image_file.write(byte_content)
return dest_path
@app.route('/bcir/searchByURL') @app.route('/bcir/searchByURL')
def search_by_url(): def search_by_url():
url = request.args.get('url') url = request.args.get('url')
try:
search_deep_level = int(request.args.get("searchDeepLevel"))
except Exception as e:
print(e)
print('Setting default deep level 1')
search_deep_level = 1
img_file = url_to_file(url) img_file = url_to_file(url)
k, threshold, search_deep_level = get_parameters(request.args.get("k"), request.args.get("threshold"),
request.args.get("searchDeepLevel"))
# query = cv2.imdecode(image, cv2.IMREAD_COLOR) # query = cv2.imdecode(image, cv2.IMREAD_COLOR)
# dest_file = uuid.uuid4().hex + ".jpg" # dest_file = uuid.uuid4().hex + ".jpg"
# dest_path = settings.logs + "/" + dest_file # dest_path = settings.logs + "/" + dest_file
@ -124,12 +142,49 @@ def search_by_url():
# files = {'image': open(dest_path, 'rb')} # files = {'image': open(dest_path, 'rb')}
# r = requests.post(settings.rmac_service, files=files) # r = requests.post(settings.rmac_service, files=files)
# results = search_engine.search_by_img(np.array(r.json()), settings.k) # results = search_engine.search_by_img(np.array(r.json()), settings.k)
results = searcher.search_by_img(img_file, settings.k, search_deep_level) results = searcher.search_by_img(img_file, k, threshold, search_deep_level)
query_url = None query_url = None
if request.args.get("tohtml") is not None: if request.args.get("tohtml") is not None:
query_url = url query_url = url
return get_res(results, query_url) return get_res(results, query_url)
def get_parameters(k, threshold, search_deep_level):
try:
threshold = float(threshold)
except Exception as e:
logging.error(e)
threshold = settings.SEARCH_THRESHOLD
logging.error('Setting default threshold value to ' + str(threshold))
try:
k = int(k)
except Exception as e:
logging.error(e)
k = settings.k
logging.error('Setting default k value to ' + str(k))
try:
search_deep_level = int(search_deep_level)
except Exception as e:
logging.error(e)
search_deep_level = settings.SEARCH_DEEP_LEVEL
logging.error('Setting default deep level to ' + str(search_deep_level))
return k, threshold, search_deep_level
@app.route('/bcir/getIds')
def get_indexed_ids():
json_ids = json.dumps(searcher.get_indexed_ids())
return json_ids
@app.route('/bcir/exists')
def exists():
doc_id = request.args.get('id')
return json.dumps(searcher.exists(doc_id))
@app.route('/bcir/addImg', methods=['POST']) @app.route('/bcir/addImg', methods=['POST'])
def add_img(): def add_img():
if 'image' not in request.files: if 'image' not in request.files:
@ -137,42 +192,46 @@ def add_img():
return redirect(request.url) return redirect(request.url)
try: try:
file = request.files['image'] file = request.files['image']
id = request.files['image'].filename doc_id = request.files['image'].filename
id, _ = os.path.splitext(id) doc_id, _ = os.path.splitext(doc_id)
img_file = post_to_file(file) img_file = post_to_file(file)
searcher.add(img_file, id) searcher.add(img_file, doc_id)
json_res = json.dumps("done") json_res = json.dumps("done")
return json_res return json_res
except: except Exception as e:
logging.error('Unable to add ' + doc_id + 'to the index')
logging.error(e)
abort(500) abort(500)
@app.route('/bcir/rmImg') @app.route('/bcir/rmImg')
def remove_img(): def remove_img():
try: try:
id = request.args.get('id') doc_id = request.args.get('id')
searcher.remove(id) searcher.remove(doc_id)
json_res = json.dumps("done") json_res = json.dumps("done")
return json_res return json_res
except: except Exception as e:
logging.error('Unable to remove ' + doc_id + 'to the index')
logging.error(e)
abort(500) abort(500)
@app.route('/bcir/<path:filename>') @app.route('/bcir/<path:filename>')
def download_file(filename): def download_file(filename):
print(filename) logging.debug(filename)
values = filename.split('/') values = filename.split('/')
print(values) logging.debug(values)
return send_from_directory(settings.img_folder, filename, as_attachment=False) return send_from_directory(settings.img_folder, filename, as_attachment=False)
@app.route('/bcir/queries/<path:filename>') @app.route('/bcir/queries/<path:filename>')
def queries(filename): def queries(filename):
print(filename) logging.debug(filename)
values = filename.split('/') values = filename.split('/')
folder = values[0] folder = values[0]
name = values[1] name = values[1]
print(folder) logging.debug(folder)
print(name) logging.debug(name)
return send_from_directory(settings.working_folder + '/' + folder, name, as_attachment=False) return send_from_directory(settings.working_folder + '/' + folder, name, as_attachment=False)

View File

@ -2,7 +2,7 @@ import json
import os import os
def load_setting(conf_file): def load_setting(conf_file):
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET_LF_FOLDER, DATASET_IDS, DB_LF global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET_LF_FOLDER, DATASET_GF_FOLDER, DATASET, DATASET_IDS, DB_LF, SEARCH_THRESHOLD, SEARCH_DEEP_LEVEL
with open(conf_file) as settings_file: with open(conf_file) as settings_file:
@ -19,10 +19,14 @@ def load_setting(conf_file):
if not os.path.isdir(data_folder): if not os.path.isdir(data_folder):
os.mkdir(data_folder) os.mkdir(data_folder)
DATASET = os.path.join(data_folder, 'dataset.npy')
DATASET_LF_FOLDER = os.path.join(data_folder, 'lf') DATASET_LF_FOLDER = os.path.join(data_folder, 'lf')
DATASET_IDS = os.path.join(data_folder, 'dataset.ids') DATASET_GF_FOLDER = os.path.join(data_folder, 'gf')
DATASET = os.path.join(DATASET_GF_FOLDER, 'dataset.npy')
DATASET_IDS = os.path.join(DATASET_GF_FOLDER, 'dataset.ids')
DB_LF = os.path.join(data_folder, 'sqlite_lf/lf.db') DB_LF = os.path.join(data_folder, 'sqlite_lf/lf.db')
SEARCH_THRESHOLD = settings['search_th']
SEARCH_DEEP_LEVEL = settings['search_dl']
img_folder = settings['img_folder'] img_folder = settings['img_folder']
logs = os.path.join(working_folder, settings['log_folder']) logs = os.path.join(working_folder, settings['log_folder'])

View File

@ -1,151 +1,48 @@
import cv2 import cv2
import numpy as np import numpy as np
import pickle as pickle import pickle as pickle
import os import os
from line_profiler_pycharm import profile
def resize(max_side, img): def resize(max_side, img):
if img.shape[1] > img.shape[0]: if img.shape[1] > img.shape[0]:
r = max_side / img.shape[1] r = max_side / img.shape[1]
dim = (max_side, int(img.shape[0] * r)) dim = (max_side, int(img.shape[0] * r))
else: else:
r = max_side / img.shape[0] r = max_side / img.shape[0]
dim = (int(img.shape[1] * r), max_side) dim = (int(img.shape[1] * r), max_side)
# perform the actual resizing of the image and show it # perform the actual resizing of the image and show it
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA) resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
return resized return resized
def pickle_keypoints(keypoints, descriptors): def pickle_img_lf(dest, id, keypoints, descriptors):
i = 0 dest_folder_name = id[0:3]
temp_array = [] filename = id + '.dat'
for point in keypoints: dest_folder_path = os.path.join(dest, dest_folder_name)
temp = (point.pt, point.size, point.angle, point.response, point.octave, if (not os.path.exists(dest_folder_path)):
point.class_id, descriptors[i]) os.mkdir(dest_folder_path)
i += 1 dest_path = os.path.join(dest_folder_path, filename)
temp_array.append(temp) kps = np.float32([keypoints[i].pt for i in range(0, len(keypoints))])
return temp_array pickle.dump([kps, descriptors], open(dest_path, 'wb'))
def serialize_object(obj): def unpickle_img_lf(lf_path, id):
return pickle.dumps(obj) dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
def deserialize_object(serialized_obj): dest_path = os.path.join(dest_folder_path, filename)
return pickle.loads(serialized_obj) kps, des = pickle.load((open(dest_path, "rb")))
return kps, des
def serializeV1(keypoints, descriptors):
temp_array = [] def delete_img_lf(dest, id):
for point in keypoints: dest_folder_name = id[0:3]
kp = [point.pt, point.size, point.angle, point.response, point.octave, point.class_id] filename = id + '.dat'
temp_array.append(kp) dest_folder_path = os.path.join(dest, dest_folder_name)
return temp_array, descriptors dest_path = os.path.join(dest_folder_path, filename)
if os.path.exists(dest_path):
os.remove(dest_path)
def serialize(keypoints, descriptors): if not os.listdir(dest_folder_path):
pts = np.float32([keypoints[i].pt for i in range(0, len(keypoints))]) os.rmdir(dest_folder_path)
return pts, descriptors
def deserialize(ser_kp, ser_des):
keypoints = []
#data_list = array.tolist()
for point in ser_kp:
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
keypoints.append(temp_feature)
return keypoints, ser_des
def deserializev1(ser_kp, ser_des):
keypoints = []
#data_list = array.tolist()
for point in ser_kp:
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
keypoints.append(temp_feature)
return keypoints, ser_des
def pickle_img_lf(dest, id, keypoints, descriptors):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
if (not os.path.exists(dest_folder_path)):
os.mkdir(dest_folder_path)
dest_path = os.path.join(dest_folder_path, filename)
kps, des = serialize(keypoints, descriptors)
pickle.dump([kps, des], open(dest_path, 'wb'))
def delete_img_lf(dest, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
if os.path.exists(dest_path):
os.remove(dest_path)
@profile
def unpickle_img_lf(lf_path, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
kps, des = pickle.load((open(dest_path, "rb")))
return kps, des
@profile
def loadz_img_lf(lf_path, id):
dest_folder_name = id[0:3]
filename = id + '.dat.npz'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
data = np.load(dest_path, allow_pickle=False)
kps = data.f.kps
des = data.f.des
#kps = data['kps']
#des = data['des']
#kp, desc = deserialize(data['kps'], data['des'])
return kps, des
def savez_img_lf(dest, id, keypoints, descriptors):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
if (not os.path.exists(dest_folder_path)):
os.mkdir(dest_folder_path)
dest_path = os.path.join(dest_folder_path, filename)
kps, des = serialize(keypoints, descriptors)
#np.savez(dest_path, data)
np.savez(dest_path, kps=kps, des=des)
@profile
def loadz_img_lf(lf_path, id):
dest_folder_name = id[0:3]
filename = id + '.dat.npz'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
data = np.load(dest_path, allow_pickle=False)
kps = data.f.kps
des = data.f.des
#kps = data['kps']
#des = data['des']
#kp, desc = deserialize(data['kps'], data['des'])
return kps, des
def unpickle_keypoints(array):
keypoints = []
descriptors = []
data_list = array.tolist()
for point in array:
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
temp_descriptor = point[6]
keypoints.append(temp_feature)
descriptors.append(temp_descriptor)
return keypoints, np.array(descriptors)

View File

@ -9,11 +9,11 @@ import ImageRecognitionSettings as settings
from BEBLIDRescorer import BEBLIDRescorer from BEBLIDRescorer import BEBLIDRescorer
import SearcherParameters import SearcherParameters
from FAISSSearchEngine import FAISSSearchEngine from FAISSSearchEngine2 import FAISSSearchEngine
import FeatureExtractor as fe import FeatureExtractor as fe
import BEBLIDExtractorQ as lfQ import BEBLIDExtractorQ as lfQ
import BEBLIDExtractorD as lfD import BEBLIDExtractorD as lfD
from line_profiler_pycharm import profile import logging
class Searcher: class Searcher:
@ -25,31 +25,34 @@ class Searcher:
self.search_engine = FAISSSearchEngine() self.search_engine = FAISSSearchEngine()
self.rescorer = BEBLIDRescorer() self.rescorer = BEBLIDRescorer()
def get_indexed_ids(self):
return self.search_engine.get_indexed_ids()
def get_id(self, idx): def get_id(self, idx):
return self.search_engine.get_id(idx) return self.search_engine.get_id(idx)
def add(self, img_file, id): def exists(self, doc_id):
self.save(True) return self.search_engine.exists(doc_id)
def add(self, img_file, doc_id):
desc = fe.extract(img_file) desc = fe.extract(img_file)
self.search_engine.add(desc, id) self.search_engine.add(desc, doc_id)
lf = lfD.extract(img_file) kp, des = lfD.extract(img_file)
self.rescorer.add(lf, id) self.rescorer.add(doc_id, kp, des)
#orb = lf.extract(img_file) #orb = lf.extract(img_file)
self.save()
print('added ' + id)
def remove(self, id):
self.save(True) self.save(True)
self.search_engine.remove(id) logging.info('added ' + doc_id)
#self.rescorer.remove(idx)
self.save()
print('removed ' + id)
def search_by_id(self, query_id, k=10, search_deep_level=1): def remove(self, doc_id):
self.search_engine.remove(doc_id)
self.rescorer.remove(doc_id)
self.save(True)
logging.info('removed ' + doc_id)
def search_by_id(self, query_id, k=10, search_threshold=0.25, search_deep_level=1):
kq = k kq = k
if search_deep_level > 0: if search_deep_level > 0:
kq = SearcherParameters.SEARCH_DEEP_K[search_deep_level] kq = SearcherParameters.SEARCH_DEEP_K[search_deep_level]
@ -57,10 +60,10 @@ class Searcher:
if search_deep_level > 0: if search_deep_level > 0:
res_lf = self.rescorer.rescore_by_id(query_id, res) res_lf = self.rescorer.rescore_by_id(query_id, res)
res = res_lf if res_lf else res[:k] res = res_lf if res_lf else res[:k]
res = [result for result in res if result[1] >= search_threshold]
return res return res
@profile def search_by_img(self, query_img, k=10, search_threshold=0.25, search_deep_level=1):
def search_by_img(self, query_img, k=10, search_deep_level=1):
kq = k kq = k
if search_deep_level: if search_deep_level:
kq = SearcherParameters.SEARCH_DEEP_K[search_deep_level] kq = SearcherParameters.SEARCH_DEEP_K[search_deep_level]
@ -71,7 +74,7 @@ class Searcher:
res_lf = self.rescorer.rescore_by_img(query_lf, res) res_lf = self.rescorer.rescore_by_img(query_lf, res)
#res = res_lf if res_lf else res[:k] #res = res_lf if res_lf else res[:k]
res = res_lf if res_lf else res[:k] res = res_lf if res_lf else res[:k]
res = [result for result in res if result[1] >= SearcherParameters.GEM_THRESHOLD] res = [result for result in res if result[1] >= search_threshold]
return res return res
def save(self, is_backup=False): def save(self, is_backup=False):

View File

@ -1,2 +1 @@
SEARCH_DEEP_K = [0, 1000, 2000, 5000, 10000, 30000, 100000] SEARCH_DEEP_K = [0, 1000, 2000, 5000, 10000, 30000, 100000]
GEM_THRESHOLD = 0.25

View File

@ -1,47 +1,73 @@
from flask import Flask, request, redirect, url_for, flash, render_template, send_from_directory
from random import randint
import cv2
import io
import numpy as np
import json
import urllib
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
import ImageRecognitionSettings as settings
import uuid
import requests import requests
import base64
import os, os.path BASE_URL = 'http://bilioso.isti.cnr.it:8290/bcir/'
BASE_URL = 'http://bilioso.isti.cnr.it:8190/bcir/' # ------Get indexed IDS------
payload = {'id': '54b019e5ed5082b0938b14c4-IMG357781'} payload = {'id': '54b3298aed5082b093939ea1-IMG881380', 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.get(BASE_URL + 'getIds')
print(r.json())
# ------Check if an ID exists------
payload = {'id': '54b3298aed5082b093939ea1-IMG881380'}
r = requests.get(BASE_URL + 'exists', params=payload)
print(r.json())
# ------Searching by ID------
print('Searching by ID')
payload = {'id': '54b3298aed5082b093939ea1-IMG881380', 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.get(BASE_URL + 'searchById', params=payload) r = requests.get(BASE_URL + 'searchById', params=payload)
print(r.json()) print(r.json())
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))} # ------Searching by IMG------
r = requests.post(BASE_URL + 'searchByImg', files=files) print('Searching by IMG')
files = {'image': ('query', open('/media/data2/data/swoads/workdir/img/ImmaginiComparazioni/ACC130111[1].jpg', 'rb'))}
data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.post(BASE_URL + 'searchByImg', data=data, files=files)
print(r.json()) print(r.json())
payload = {'url': 'http://bilioso.isti.cnr.it:8190/bcir/54b019e5ed5082b0938b14c4-IMG357781.jpg'} # ------Searching by IMG Basa64------
print('Searching by IMG Base64')
with open('/media/data2/data/swoads/workdir/img/ImmaginiComparazioni/ACC130111[1].jpg', "rb") as img_file:
b64_encoding = base64.b64encode(img_file.read())
data = {'image':b64_encoding, 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.post(BASE_URL + 'searchByImgB64', data=data, files=files)
print(r.json())
# ------Searching by URL------
print('Searching by URL')
payload = {'url': 'http://bilioso.isti.cnr.it:8290/bcir/54b3298aed5082b093939ea1-IMG881380.jpg', 'k': 10,
'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.get(BASE_URL + 'searchByURL', params=payload) r = requests.get(BASE_URL + 'searchByURL', params=payload)
print(r.json()) print(r.json())
files = {'image': ('prova', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))} # ------Adding newIMG------
#files = {'image': ('prova', open('/media/Data/data/beni_culturali/deploy/dataset_ids.bak', 'rb'))} print('Adding newIMG')
files = {'image': (
'newIMG', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))}
r = requests.post(BASE_URL + 'addImg', files=files) r = requests.post(BASE_URL + 'addImg', files=files)
s = r.json() s = r.json()
print(r.json()) print(r.json())
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))} # ------Searching by newIMG------
r = requests.post(BASE_URL + 'searchByImg', files=files) print('Searching by newIMG')
files = {'image': (
'query', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))}
data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.post(BASE_URL + 'searchByImg', data=data, files=files)
print(r.json()) print(r.json())
payload = {'id': 'prova'} # ------Removing newIMG------
print('Removing newIMG')
payload = {'id': 'newIMG'}
r = requests.get(BASE_URL + 'rmImg', params=payload) r = requests.get(BASE_URL + 'rmImg', params=payload)
print(r.json()) print(r.json())
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))} # ------Searching by newIMG now removed from the index------
r = requests.post(BASE_URL + 'searchByImg', files=files) print('Searching by newIMG now removed from the index')
files = {'image': (
'query', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))}
data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.post(BASE_URL + 'searchByImg', data=data, files=files)
print(r.json()) print(r.json())