added FAISS Searcher

This commit is contained in:
Paolo Bolettieri 2022-07-19 18:39:50 +02:00
parent de8bc9a70c
commit aeafcfe219
10 changed files with 457 additions and 267 deletions

View File

@ -5,6 +5,7 @@ import LFUtilities
import BEBLIDParameters
import ImageRecognitionSettings as settings
from line_profiler_pycharm import profile
import logging
class BEBLIDRescorer:
@ -16,7 +17,8 @@ class BEBLIDRescorer:
def rescore_by_id(self, query_id, resultset):
#query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
query = LFUtilities.unpickle_img_lf(settings.DATASET_LF_FOLDER, query_id)
return self.rescore_by_img(query, resultset)
@profile
@ -47,13 +49,13 @@ class BEBLIDRescorer:
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3)))
print(data_id)
print(f'candidate n. {counter}')
logging.info(data_id)
logging.info(f'candidate n. {counter}')
#to get just the first candidate
break
except Exception as e:
print('rescore error evaluating ' + data_id)
print(e)
logging.error('rescore error evaluating ' + data_id)
logging.error(e)
pass
counter += 1
@ -61,16 +63,18 @@ class BEBLIDRescorer:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, kp, des, id):
@staticmethod
def add(doc_id, kp, des):
# LFUtilities.save_img_lf(dest, filename, kp, des)
# LFUtilities.savez_img_lf(dest, filename, kp, des)
LFUtilities.pickle_img_lf(settings.DATASET_LF_FOLDER, id, kp, des)
LFUtilities.pickle_img_lf(settings.DATASET_LF_FOLDER, doc_id, kp, des)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
@staticmethod
def remove(doc_id):
LFUtilities.delete_img_lf(settings.DATASET_LF_FOLDER, doc_id)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
"""lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
@ -78,4 +82,4 @@ class BEBLIDRescorer:
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')
np.savetxt(ids_file, self.ids, fmt='%s')"""

View File

@ -1,11 +1,16 @@
import numpy as np
import ImageRecognitionSettings as settings
import faiss
from shutil import copyfile
import logging
import threading
import glob
import fileinput
import os
class FAISSSearchEngine:
def __init__(self):
#self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
@ -16,32 +21,58 @@ class FAISSSearchEngine:
#self.descs = (self.desc1 + self.desc2) / 2
#self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True)
self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
self.ids = np.loadtxt(fileinput.input(sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.ids'))))
self.init_faiss_index()
self.lock = threading.Lock()
def init_faiss_index(self):
# create an index with inner product similarity
dim = 2048 # dimensionality of the features
metric = faiss.METRIC_INNER_PRODUCT
self.index = faiss.index_factory(dim, 'Flat', metric)
# add the vectors to the index
self.index.add(self.descs) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index
for desc_file in sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.npy')):
# add the vectors to the index
tmp = np.load(desc_file)
self.index.add(tmp) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index
def get_id(self, idx):
return self.ids[idx]
def get_indexed_ids(self):
return self.ids
def add(self, desc, id):
self.ids.append(id)
self.descs = np.vstack((self.descs, desc))
self.save()
def remove(self, id):
idx = self.ids.index(id)
del self.ids[idx]
self.descs = np.delete(self.descs, idx, axis=0)
def add(self, desc, doc_id):
try:
self.lock.acquire()
# self.ids.append(doc_id)
# self.descs = np.vstack((self.descs, desc))
np.savetxt(os.path.join(settings.DATASET_GF_FOLDER, doc_id + '.ids'), self.ids, fmt='%s')
np.save(os.path.join(settings.DATASET_GF_FOLDER, doc_id + '.npy'), desc)
finally:
self.lock.release()
def remove(self, doc_id):
try:
self.lock.acquire()
id_filename = settings.DATASET_GF_FOLDER, doc_id + '.ids'
numpy_filename = settings.DATASET_GF_FOLDER, doc_id + '.npy'
if os.path.exists(id_filename):
os.remove(id_filename)
if os.path.exists(numpy_filename):
os.remove(numpy_filename)
else:
idx = self.ids.index(doc_id)
del self.ids[idx]
self.descs = np.delete(self.descs, idx, axis=0)
descs_file = settings.DATASET
ids_file = settings.DATASET_IDS
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')
finally:
self.lock.release()
def search_by_id(self, query_id, k=10):
query_idx = self.ids.index(query_id)
@ -49,9 +80,9 @@ class FAISSSearchEngine:
def search_by_img(self, query, k=10):
print('----------query features-------')
print(query)
logging.info(query)
queries = np.reshape(query, (-1, 2048))
print(queries)
logging.debug(queries)
scores, indexes = self.index.search(queries, k)
#dot_product = np.dot(self.descs, query)
#idx = dot_product.argsort()[::-1][:k]
@ -63,10 +94,21 @@ class FAISSSearchEngine:
def save(self, is_backup=False):
descs_file = settings.DATASET
ids_file = settings.DATASET_IDS
try:
self.lock.acquire()
"""if is_backup:
descs_file_backup = descs_file +'.bak'
ids_file_backup = ids_file + '.bak'
copyfile(descs_file, descs_file_backup)
copyfile(ids_file, ids_file_backup)
logging.info('Backup features created')
if is_backup:
descs_file += '.bak'
ids_file += '.bak'
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')
logging.info('Storing features done')"""
self.init_faiss_index()
except Exception as e:
logging.error('Error, unable to storing features')
logging.error(e)
finally:
self.lock.release()

120
src/FAISSSearchEngine2.py Normal file
View File

@ -0,0 +1,120 @@
import numpy as np
import ImageRecognitionSettings as settings
import faiss
from shutil import copyfile
import logging
import threading
import glob
import fileinput
import os
import GFUtilities
class FAISSSearchEngine:
def __init__(self):
#self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
#np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
#self.descs = np.load(settings.DATASET)
#self.desc1 = np.load(settings.DATASET1)
#self.desc2 = np.load(settings.DATASET2)
#self.descs = (self.desc1 + self.desc2) / 2
#self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True)
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
#self.ids = np.loadtxt(fileinput.input(sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.ids'))))
self.lock = threading.Lock()
self.init_faiss_index()
def init_faiss_index(self):
# create an index with inner product similarity
dim = 2048 # dimensionality of the features
metric = faiss.METRIC_INNER_PRODUCT
self.index = faiss.index_factory(dim, 'Flat', metric)
self.descs = np.load(settings.DATASET)
self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
self.index.add(self.descs)
for desc_file in sorted(glob.glob(settings.DATASET_GF_FOLDER + '/**/*.dat', recursive=True)):
# add the vectors to the index
img_id = os.path.splitext(os.path.basename(desc_file))[0]
img_desc = GFUtilities.unpickle_img_gf(settings.DATASET_GF_FOLDER, img_id)
self.ids.append(img_id)
self.index.add(img_desc) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index
def get_id(self, idx):
return self.ids[idx]
def get_indexed_ids(self):
return self.ids
def exists(self, doc_id):
return doc_id in self.ids
def add(self, desc, doc_id):
try:
self.lock.acquire()
# self.ids.append(doc_id)
# self.descs = np.vstack((self.descs, desc))
GFUtilities.pickle_img_gf(settings.DATASET_GF_FOLDER, doc_id, desc)
finally:
self.lock.release()
def remove(self, doc_id):
try:
self.lock.acquire()
if not GFUtilities.delete_img_gf( settings.DATASET_GF_FOLDER, doc_id):
try:
idx = self.ids.index(doc_id)
del self.ids[idx]
self.descs = np.delete(self.descs, idx, axis=0)
descs_file = settings.DATASET
ids_file = settings.DATASET_IDS
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')
except ValueError as e:
logging.error('Error, unable to retrieve and delete ' + doc_id)
logging.error(e)
finally:
self.lock.release()
def search_by_id(self, query_id, k=10):
query_idx = self.ids.index(query_id)
return self.search_by_img(self.descs[query_idx], k)
def search_by_img(self, query, k=10):
print('----------query features-------')
logging.info(query)
queries = np.reshape(query, (-1, 2048))
logging.debug(queries)
scores, indexes = self.index.search(queries, k)
#dot_product = np.dot(self.descs, query)
#idx = dot_product.argsort()[::-1][:k]
res = []
for (i,j) in zip(indexes[0], scores[0]):
res.append((self.ids[i], round(float(j), 3)))
return res
def save(self, is_backup=False):
descs_file = settings.DATASET
ids_file = settings.DATASET_IDS
try:
self.lock.acquire()
"""if is_backup:
descs_file_backup = descs_file +'.bak'
ids_file_backup = ids_file + '.bak'
copyfile(descs_file, descs_file_backup)
copyfile(ids_file, ids_file_backup)
logging.info('Backup features created')
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')
logging.info('Storing features done')"""
self.init_faiss_index()
except Exception as e:
logging.error('Error, unable to storing features')
logging.error(e)
finally:
self.lock.release()

36
src/GFUtilities.py Normal file
View File

@ -0,0 +1,36 @@
import cv2
import numpy as np
import pickle as pickle
import os
def pickle_img_gf(dest, id, descriptors):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
if (not os.path.exists(dest_folder_path)):
os.mkdir(dest_folder_path)
dest_path = os.path.join(dest_folder_path, filename)
pickle.dump(descriptors, open(dest_path, 'wb'))
def unpickle_img_gf(dest, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
des = pickle.load((open(dest_path, "rb")))
return des
def delete_img_gf(dest, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
if os.path.exists(dest_path):
os.remove(dest_path)
if not os.listdir(dest_folder_path):
os.rmdir(dest_folder_path)
return True
return False

View File

@ -15,6 +15,8 @@ import os, os.path
import tornado.wsgi
import tornado.httpserver
import argparse
import logging
import base64
app = Flask(__name__)
@ -65,13 +67,9 @@ def get_res(results, query_url=None):
@app.route('/bcir/searchById')
def search_by_id():
id = request.args.get('id')
try:
searchDeepLevel = int(request.args.get("searchDeepLevel"))
except Exception as e:
print(e)
print('Setting default deep level 1')
search_deep_level = 1
results = searcher.search_by_id(id, settings.k, searchDeepLevel)
k, threshold, search_deep_level = get_parameters(request.args.get("k"), request.args.get("threshold"),
request.args.get("searchDeepLevel"))
results = searcher.search_by_id(id, k, threshold, search_deep_level)
query_url = None
if request.args.get("tohtml") is not None:
query_url = id + ".jpg"
@ -86,12 +84,8 @@ def search_by_img():
file = request.files['image']
img_file = post_to_file(file)
try:
search_deep_level = int(request.form.get("searchDeepLevel"))
except Exception as e:
print(e)
print('Setting default deep level 1')
search_deep_level = 1
k, threshold, search_deep_level = get_parameters(request.form.get("k"), request.form.get("threshold"), request.form.get("searchDeepLevel"))
#dest_file = uuid.uuid4().hex + ".jpg"
#dest_path = settings.logs + "/" + dest_file
@ -99,24 +93,48 @@ def search_by_img():
#files = {'image': (dest_file, open(dest_path, 'rb'))}
#r = requests.post(settings.rmac_service, files=files)
#results = search_engine.search_by_img(np.array(r.json()), settings.k)
results = searcher.search_by_img(img_file, settings.k, search_deep_level)
results = searcher.search_by_img(img_file, k, threshold, search_deep_level)
query_url = None
if request.form.get("tohtml") is not None:
query_url = ""
return get_res(results, query_url)
@app.route('/bcir/searchByImgB64', methods=['POST'])
def search_by_img_base64():
image = request.form.get('image')
if image:
img_file = base64_to_file(image)
else:
flash('No img sent')
return redirect(request.url)
k, threshold, search_deep_level = get_parameters(request.form.get("k"), request.form.get("threshold"), request.form.get("searchDeepLevel"))
results = searcher.search_by_img(img_file, k, threshold, search_deep_level)
query_url = None
if request.form.get("tohtml") is not None:
query_url = ""
return get_res(results, query_url)
def base64_to_file(image_base64):
ext = ".png"
dest_file = uuid.uuid4().hex + ext
dest_path = settings.logs + "/" + dest_file
with open(dest_path, "wb") as image_file:
byte_content = base64.b64decode(image_base64)
image_file.write(byte_content)
return dest_path
@app.route('/bcir/searchByURL')
def search_by_url():
url = request.args.get('url')
try:
search_deep_level = int(request.args.get("searchDeepLevel"))
except Exception as e:
print(e)
print('Setting default deep level 1')
search_deep_level = 1
img_file = url_to_file(url)
k, threshold, search_deep_level = get_parameters(request.args.get("k"), request.args.get("threshold"),
request.args.get("searchDeepLevel"))
# query = cv2.imdecode(image, cv2.IMREAD_COLOR)
# dest_file = uuid.uuid4().hex + ".jpg"
# dest_path = settings.logs + "/" + dest_file
@ -124,12 +142,49 @@ def search_by_url():
# files = {'image': open(dest_path, 'rb')}
# r = requests.post(settings.rmac_service, files=files)
# results = search_engine.search_by_img(np.array(r.json()), settings.k)
results = searcher.search_by_img(img_file, settings.k, search_deep_level)
results = searcher.search_by_img(img_file, k, threshold, search_deep_level)
query_url = None
if request.args.get("tohtml") is not None:
query_url = url
return get_res(results, query_url)
def get_parameters(k, threshold, search_deep_level):
try:
threshold = float(threshold)
except Exception as e:
logging.error(e)
threshold = settings.SEARCH_THRESHOLD
logging.error('Setting default threshold value to ' + str(threshold))
try:
k = int(k)
except Exception as e:
logging.error(e)
k = settings.k
logging.error('Setting default k value to ' + str(k))
try:
search_deep_level = int(search_deep_level)
except Exception as e:
logging.error(e)
search_deep_level = settings.SEARCH_DEEP_LEVEL
logging.error('Setting default deep level to ' + str(search_deep_level))
return k, threshold, search_deep_level
@app.route('/bcir/getIds')
def get_indexed_ids():
json_ids = json.dumps(searcher.get_indexed_ids())
return json_ids
@app.route('/bcir/exists')
def exists():
doc_id = request.args.get('id')
return json.dumps(searcher.exists(doc_id))
@app.route('/bcir/addImg', methods=['POST'])
def add_img():
if 'image' not in request.files:
@ -137,42 +192,46 @@ def add_img():
return redirect(request.url)
try:
file = request.files['image']
id = request.files['image'].filename
id, _ = os.path.splitext(id)
doc_id = request.files['image'].filename
doc_id, _ = os.path.splitext(doc_id)
img_file = post_to_file(file)
searcher.add(img_file, id)
searcher.add(img_file, doc_id)
json_res = json.dumps("done")
return json_res
except:
except Exception as e:
logging.error('Unable to add ' + doc_id + 'to the index')
logging.error(e)
abort(500)
@app.route('/bcir/rmImg')
def remove_img():
try:
id = request.args.get('id')
searcher.remove(id)
doc_id = request.args.get('id')
searcher.remove(doc_id)
json_res = json.dumps("done")
return json_res
except:
except Exception as e:
logging.error('Unable to remove ' + doc_id + 'to the index')
logging.error(e)
abort(500)
@app.route('/bcir/<path:filename>')
def download_file(filename):
print(filename)
logging.debug(filename)
values = filename.split('/')
print(values)
logging.debug(values)
return send_from_directory(settings.img_folder, filename, as_attachment=False)
@app.route('/bcir/queries/<path:filename>')
def queries(filename):
print(filename)
logging.debug(filename)
values = filename.split('/')
folder = values[0]
name = values[1]
print(folder)
print(name)
logging.debug(folder)
logging.debug(name)
return send_from_directory(settings.working_folder + '/' + folder, name, as_attachment=False)

View File

@ -2,7 +2,7 @@ import json
import os
def load_setting(conf_file):
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET_LF_FOLDER, DATASET_IDS, DB_LF
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET_LF_FOLDER, DATASET_GF_FOLDER, DATASET, DATASET_IDS, DB_LF, SEARCH_THRESHOLD, SEARCH_DEEP_LEVEL
with open(conf_file) as settings_file:
@ -19,10 +19,14 @@ def load_setting(conf_file):
if not os.path.isdir(data_folder):
os.mkdir(data_folder)
DATASET = os.path.join(data_folder, 'dataset.npy')
DATASET_LF_FOLDER = os.path.join(data_folder, 'lf')
DATASET_IDS = os.path.join(data_folder, 'dataset.ids')
DATASET_GF_FOLDER = os.path.join(data_folder, 'gf')
DATASET = os.path.join(DATASET_GF_FOLDER, 'dataset.npy')
DATASET_IDS = os.path.join(DATASET_GF_FOLDER, 'dataset.ids')
DB_LF = os.path.join(data_folder, 'sqlite_lf/lf.db')
SEARCH_THRESHOLD = settings['search_th']
SEARCH_DEEP_LEVEL = settings['search_dl']
img_folder = settings['img_folder']
logs = os.path.join(working_folder, settings['log_folder'])

View File

@ -1,151 +1,48 @@
import cv2
import numpy as np
import pickle as pickle
import os
from line_profiler_pycharm import profile
def resize(max_side, img):
if img.shape[1] > img.shape[0]:
r = max_side / img.shape[1]
dim = (max_side, int(img.shape[0] * r))
else:
r = max_side / img.shape[0]
dim = (int(img.shape[1] * r), max_side)
# perform the actual resizing of the image and show it
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
return resized
def pickle_keypoints(keypoints, descriptors):
i = 0
temp_array = []
for point in keypoints:
temp = (point.pt, point.size, point.angle, point.response, point.octave,
point.class_id, descriptors[i])
i += 1
temp_array.append(temp)
return temp_array
def serialize_object(obj):
return pickle.dumps(obj)
def deserialize_object(serialized_obj):
return pickle.loads(serialized_obj)
def serializeV1(keypoints, descriptors):
temp_array = []
for point in keypoints:
kp = [point.pt, point.size, point.angle, point.response, point.octave, point.class_id]
temp_array.append(kp)
return temp_array, descriptors
def serialize(keypoints, descriptors):
pts = np.float32([keypoints[i].pt for i in range(0, len(keypoints))])
return pts, descriptors
def deserialize(ser_kp, ser_des):
keypoints = []
#data_list = array.tolist()
for point in ser_kp:
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
keypoints.append(temp_feature)
return keypoints, ser_des
def deserializev1(ser_kp, ser_des):
keypoints = []
#data_list = array.tolist()
for point in ser_kp:
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
keypoints.append(temp_feature)
return keypoints, ser_des
def pickle_img_lf(dest, id, keypoints, descriptors):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
if (not os.path.exists(dest_folder_path)):
os.mkdir(dest_folder_path)
dest_path = os.path.join(dest_folder_path, filename)
kps, des = serialize(keypoints, descriptors)
pickle.dump([kps, des], open(dest_path, 'wb'))
def delete_img_lf(dest, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
if os.path.exists(dest_path):
os.remove(dest_path)
@profile
def unpickle_img_lf(lf_path, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
kps, des = pickle.load((open(dest_path, "rb")))
return kps, des
@profile
def loadz_img_lf(lf_path, id):
dest_folder_name = id[0:3]
filename = id + '.dat.npz'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
data = np.load(dest_path, allow_pickle=False)
kps = data.f.kps
des = data.f.des
#kps = data['kps']
#des = data['des']
#kp, desc = deserialize(data['kps'], data['des'])
return kps, des
def savez_img_lf(dest, id, keypoints, descriptors):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
if (not os.path.exists(dest_folder_path)):
os.mkdir(dest_folder_path)
dest_path = os.path.join(dest_folder_path, filename)
kps, des = serialize(keypoints, descriptors)
#np.savez(dest_path, data)
np.savez(dest_path, kps=kps, des=des)
@profile
def loadz_img_lf(lf_path, id):
dest_folder_name = id[0:3]
filename = id + '.dat.npz'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
data = np.load(dest_path, allow_pickle=False)
kps = data.f.kps
des = data.f.des
#kps = data['kps']
#des = data['des']
#kp, desc = deserialize(data['kps'], data['des'])
return kps, des
def unpickle_keypoints(array):
keypoints = []
descriptors = []
data_list = array.tolist()
for point in array:
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
temp_descriptor = point[6]
keypoints.append(temp_feature)
descriptors.append(temp_descriptor)
return keypoints, np.array(descriptors)
import cv2
import numpy as np
import pickle as pickle
import os
def resize(max_side, img):
if img.shape[1] > img.shape[0]:
r = max_side / img.shape[1]
dim = (max_side, int(img.shape[0] * r))
else:
r = max_side / img.shape[0]
dim = (int(img.shape[1] * r), max_side)
# perform the actual resizing of the image and show it
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
return resized
def pickle_img_lf(dest, id, keypoints, descriptors):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
if (not os.path.exists(dest_folder_path)):
os.mkdir(dest_folder_path)
dest_path = os.path.join(dest_folder_path, filename)
kps = np.float32([keypoints[i].pt for i in range(0, len(keypoints))])
pickle.dump([kps, descriptors], open(dest_path, 'wb'))
def unpickle_img_lf(lf_path, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
kps, des = pickle.load((open(dest_path, "rb")))
return kps, des
def delete_img_lf(dest, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
if os.path.exists(dest_path):
os.remove(dest_path)
if not os.listdir(dest_folder_path):
os.rmdir(dest_folder_path)

View File

@ -9,11 +9,11 @@ import ImageRecognitionSettings as settings
from BEBLIDRescorer import BEBLIDRescorer
import SearcherParameters
from FAISSSearchEngine import FAISSSearchEngine
from FAISSSearchEngine2 import FAISSSearchEngine
import FeatureExtractor as fe
import BEBLIDExtractorQ as lfQ
import BEBLIDExtractorD as lfD
from line_profiler_pycharm import profile
import logging
class Searcher:
@ -25,31 +25,34 @@ class Searcher:
self.search_engine = FAISSSearchEngine()
self.rescorer = BEBLIDRescorer()
def get_indexed_ids(self):
return self.search_engine.get_indexed_ids()
def get_id(self, idx):
return self.search_engine.get_id(idx)
def add(self, img_file, id):
self.save(True)
def exists(self, doc_id):
return self.search_engine.exists(doc_id)
def add(self, img_file, doc_id):
desc = fe.extract(img_file)
self.search_engine.add(desc, id)
self.search_engine.add(desc, doc_id)
lf = lfD.extract(img_file)
self.rescorer.add(lf, id)
kp, des = lfD.extract(img_file)
self.rescorer.add(doc_id, kp, des)
#orb = lf.extract(img_file)
self.save()
print('added ' + id)
def remove(self, id):
self.save(True)
self.search_engine.remove(id)
#self.rescorer.remove(idx)
self.save()
print('removed ' + id)
logging.info('added ' + doc_id)
def search_by_id(self, query_id, k=10, search_deep_level=1):
def remove(self, doc_id):
self.search_engine.remove(doc_id)
self.rescorer.remove(doc_id)
self.save(True)
logging.info('removed ' + doc_id)
def search_by_id(self, query_id, k=10, search_threshold=0.25, search_deep_level=1):
kq = k
if search_deep_level > 0:
kq = SearcherParameters.SEARCH_DEEP_K[search_deep_level]
@ -57,10 +60,10 @@ class Searcher:
if search_deep_level > 0:
res_lf = self.rescorer.rescore_by_id(query_id, res)
res = res_lf if res_lf else res[:k]
res = [result for result in res if result[1] >= search_threshold]
return res
@profile
def search_by_img(self, query_img, k=10, search_deep_level=1):
def search_by_img(self, query_img, k=10, search_threshold=0.25, search_deep_level=1):
kq = k
if search_deep_level:
kq = SearcherParameters.SEARCH_DEEP_K[search_deep_level]
@ -71,7 +74,7 @@ class Searcher:
res_lf = self.rescorer.rescore_by_img(query_lf, res)
#res = res_lf if res_lf else res[:k]
res = res_lf if res_lf else res[:k]
res = [result for result in res if result[1] >= SearcherParameters.GEM_THRESHOLD]
res = [result for result in res if result[1] >= search_threshold]
return res
def save(self, is_backup=False):

View File

@ -1,2 +1 @@
SEARCH_DEEP_K = [0, 1000, 2000, 5000, 10000, 30000, 100000]
GEM_THRESHOLD = 0.25

View File

@ -1,47 +1,73 @@
from flask import Flask, request, redirect, url_for, flash, render_template, send_from_directory
from random import randint
import cv2
import io
import numpy as np
import json
import urllib
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
import ImageRecognitionSettings as settings
import uuid
import requests
import base64
import os, os.path
BASE_URL = 'http://bilioso.isti.cnr.it:8290/bcir/'
BASE_URL = 'http://bilioso.isti.cnr.it:8190/bcir/'
payload = {'id': '54b019e5ed5082b0938b14c4-IMG357781'}
# ------Get indexed IDS------
payload = {'id': '54b3298aed5082b093939ea1-IMG881380', 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.get(BASE_URL + 'getIds')
print(r.json())
# ------Check if an ID exists------
payload = {'id': '54b3298aed5082b093939ea1-IMG881380'}
r = requests.get(BASE_URL + 'exists', params=payload)
print(r.json())
# ------Searching by ID------
print('Searching by ID')
payload = {'id': '54b3298aed5082b093939ea1-IMG881380', 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.get(BASE_URL + 'searchById', params=payload)
print(r.json())
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
r = requests.post(BASE_URL + 'searchByImg', files=files)
# ------Searching by IMG------
print('Searching by IMG')
files = {'image': ('query', open('/media/data2/data/swoads/workdir/img/ImmaginiComparazioni/ACC130111[1].jpg', 'rb'))}
data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.post(BASE_URL + 'searchByImg', data=data, files=files)
print(r.json())
payload = {'url': 'http://bilioso.isti.cnr.it:8190/bcir/54b019e5ed5082b0938b14c4-IMG357781.jpg'}
# ------Searching by IMG Basa64------
print('Searching by IMG Base64')
with open('/media/data2/data/swoads/workdir/img/ImmaginiComparazioni/ACC130111[1].jpg', "rb") as img_file:
b64_encoding = base64.b64encode(img_file.read())
data = {'image':b64_encoding, 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.post(BASE_URL + 'searchByImgB64', data=data, files=files)
print(r.json())
# ------Searching by URL------
print('Searching by URL')
payload = {'url': 'http://bilioso.isti.cnr.it:8290/bcir/54b3298aed5082b093939ea1-IMG881380.jpg', 'k': 10,
'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.get(BASE_URL + 'searchByURL', params=payload)
print(r.json())
files = {'image': ('prova', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
#files = {'image': ('prova', open('/media/Data/data/beni_culturali/deploy/dataset_ids.bak', 'rb'))}
# ------Adding newIMG------
print('Adding newIMG')
files = {'image': (
'newIMG', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))}
r = requests.post(BASE_URL + 'addImg', files=files)
s = r.json()
print(r.json())
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
r = requests.post(BASE_URL + 'searchByImg', files=files)
# ------Searching by newIMG------
print('Searching by newIMG')
files = {'image': (
'query', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))}
data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.post(BASE_URL + 'searchByImg', data=data, files=files)
print(r.json())
payload = {'id': 'prova'}
# ------Removing newIMG------
print('Removing newIMG')
payload = {'id': 'newIMG'}
r = requests.get(BASE_URL + 'rmImg', params=payload)
print(r.json())
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
r = requests.post(BASE_URL + 'searchByImg', files=files)
# ------Searching by newIMG now removed from the index------
print('Searching by newIMG now removed from the index')
files = {'image': (
'query', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))}
data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
r = requests.post(BASE_URL + 'searchByImg', data=data, files=files)
print(r.json())