added FAISS Searcher
This commit is contained in:
parent
de8bc9a70c
commit
aeafcfe219
|
@ -5,6 +5,7 @@ import LFUtilities
|
|||
import BEBLIDParameters
|
||||
import ImageRecognitionSettings as settings
|
||||
from line_profiler_pycharm import profile
|
||||
import logging
|
||||
|
||||
class BEBLIDRescorer:
|
||||
|
||||
|
@ -16,7 +17,8 @@ class BEBLIDRescorer:
|
|||
|
||||
def rescore_by_id(self, query_id, resultset):
|
||||
#query_idx = self.ids.index(query_id)
|
||||
query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
|
||||
query = LFUtilities.unpickle_img_lf(settings.DATASET_LF_FOLDER, query_id)
|
||||
|
||||
return self.rescore_by_img(query, resultset)
|
||||
|
||||
@profile
|
||||
|
@ -47,13 +49,13 @@ class BEBLIDRescorer:
|
|||
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
|
||||
max_inliers = inliers
|
||||
res.append((data_id, round(inliers/len(good), 3)))
|
||||
print(data_id)
|
||||
print(f'candidate n. {counter}')
|
||||
logging.info(data_id)
|
||||
logging.info(f'candidate n. {counter}')
|
||||
#to get just the first candidate
|
||||
break
|
||||
except Exception as e:
|
||||
print('rescore error evaluating ' + data_id)
|
||||
print(e)
|
||||
logging.error('rescore error evaluating ' + data_id)
|
||||
logging.error(e)
|
||||
pass
|
||||
counter += 1
|
||||
|
||||
|
@ -61,16 +63,18 @@ class BEBLIDRescorer:
|
|||
res.sort(key=lambda result: result[1], reverse=True)
|
||||
return res
|
||||
|
||||
def add(self, kp, des, id):
|
||||
@staticmethod
|
||||
def add(doc_id, kp, des):
|
||||
# LFUtilities.save_img_lf(dest, filename, kp, des)
|
||||
# LFUtilities.savez_img_lf(dest, filename, kp, des)
|
||||
LFUtilities.pickle_img_lf(settings.DATASET_LF_FOLDER, id, kp, des)
|
||||
LFUtilities.pickle_img_lf(settings.DATASET_LF_FOLDER, doc_id, kp, des)
|
||||
|
||||
def remove(self, idx):
|
||||
self.descs = np.delete(self.descs, idx, axis=0)
|
||||
@staticmethod
|
||||
def remove(doc_id):
|
||||
LFUtilities.delete_img_lf(settings.DATASET_LF_FOLDER, doc_id)
|
||||
|
||||
def save(self, is_backup=False):
|
||||
lf_save_file = settings.DATASET_LF
|
||||
"""lf_save_file = settings.DATASET_LF
|
||||
ids_file = settings.DATASET_IDS_LF
|
||||
if lf_save_file != "None":
|
||||
if is_backup:
|
||||
|
@ -78,4 +82,4 @@ class BEBLIDRescorer:
|
|||
ids_file += '.bak'
|
||||
|
||||
LFUtilities.save(lf_save_file, self.lf)
|
||||
np.savetxt(ids_file, self.ids, fmt='%s')
|
||||
np.savetxt(ids_file, self.ids, fmt='%s')"""
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
import numpy as np
|
||||
import ImageRecognitionSettings as settings
|
||||
import faiss
|
||||
from shutil import copyfile
|
||||
import logging
|
||||
import threading
|
||||
import glob
|
||||
import fileinput
|
||||
import os
|
||||
|
||||
|
||||
class FAISSSearchEngine:
|
||||
|
||||
|
||||
def __init__(self):
|
||||
#self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
|
||||
|
||||
|
@ -16,32 +21,58 @@ class FAISSSearchEngine:
|
|||
|
||||
#self.descs = (self.desc1 + self.desc2) / 2
|
||||
#self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True)
|
||||
self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
|
||||
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
|
||||
self.ids = np.loadtxt(fileinput.input(sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.ids'))))
|
||||
self.init_faiss_index()
|
||||
self.lock = threading.Lock()
|
||||
|
||||
|
||||
def init_faiss_index(self):
|
||||
# create an index with inner product similarity
|
||||
dim = 2048 # dimensionality of the features
|
||||
metric = faiss.METRIC_INNER_PRODUCT
|
||||
self.index = faiss.index_factory(dim, 'Flat', metric)
|
||||
|
||||
# add the vectors to the index
|
||||
self.index.add(self.descs) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index
|
||||
|
||||
for desc_file in sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.npy')):
|
||||
# add the vectors to the index
|
||||
tmp = np.load(desc_file)
|
||||
self.index.add(tmp) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index
|
||||
|
||||
def get_id(self, idx):
|
||||
return self.ids[idx]
|
||||
|
||||
def get_indexed_ids(self):
|
||||
return self.ids
|
||||
|
||||
def add(self, desc, id):
|
||||
self.ids.append(id)
|
||||
self.descs = np.vstack((self.descs, desc))
|
||||
self.save()
|
||||
|
||||
|
||||
def remove(self, id):
|
||||
idx = self.ids.index(id)
|
||||
del self.ids[idx]
|
||||
self.descs = np.delete(self.descs, idx, axis=0)
|
||||
def add(self, desc, doc_id):
|
||||
try:
|
||||
self.lock.acquire()
|
||||
# self.ids.append(doc_id)
|
||||
# self.descs = np.vstack((self.descs, desc))
|
||||
np.savetxt(os.path.join(settings.DATASET_GF_FOLDER, doc_id + '.ids'), self.ids, fmt='%s')
|
||||
np.save(os.path.join(settings.DATASET_GF_FOLDER, doc_id + '.npy'), desc)
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
def remove(self, doc_id):
|
||||
try:
|
||||
self.lock.acquire()
|
||||
id_filename = settings.DATASET_GF_FOLDER, doc_id + '.ids'
|
||||
numpy_filename = settings.DATASET_GF_FOLDER, doc_id + '.npy'
|
||||
if os.path.exists(id_filename):
|
||||
os.remove(id_filename)
|
||||
if os.path.exists(numpy_filename):
|
||||
os.remove(numpy_filename)
|
||||
else:
|
||||
idx = self.ids.index(doc_id)
|
||||
del self.ids[idx]
|
||||
self.descs = np.delete(self.descs, idx, axis=0)
|
||||
descs_file = settings.DATASET
|
||||
ids_file = settings.DATASET_IDS
|
||||
np.save(descs_file, self.descs)
|
||||
np.savetxt(ids_file, self.ids, fmt='%s')
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
def search_by_id(self, query_id, k=10):
|
||||
query_idx = self.ids.index(query_id)
|
||||
|
@ -49,9 +80,9 @@ class FAISSSearchEngine:
|
|||
|
||||
def search_by_img(self, query, k=10):
|
||||
print('----------query features-------')
|
||||
print(query)
|
||||
logging.info(query)
|
||||
queries = np.reshape(query, (-1, 2048))
|
||||
print(queries)
|
||||
logging.debug(queries)
|
||||
scores, indexes = self.index.search(queries, k)
|
||||
#dot_product = np.dot(self.descs, query)
|
||||
#idx = dot_product.argsort()[::-1][:k]
|
||||
|
@ -63,10 +94,21 @@ class FAISSSearchEngine:
|
|||
def save(self, is_backup=False):
|
||||
descs_file = settings.DATASET
|
||||
ids_file = settings.DATASET_IDS
|
||||
try:
|
||||
self.lock.acquire()
|
||||
"""if is_backup:
|
||||
descs_file_backup = descs_file +'.bak'
|
||||
ids_file_backup = ids_file + '.bak'
|
||||
copyfile(descs_file, descs_file_backup)
|
||||
copyfile(ids_file, ids_file_backup)
|
||||
logging.info('Backup features created')
|
||||
|
||||
if is_backup:
|
||||
descs_file += '.bak'
|
||||
ids_file += '.bak'
|
||||
|
||||
np.save(descs_file, self.descs)
|
||||
np.savetxt(ids_file, self.ids, fmt='%s')
|
||||
np.save(descs_file, self.descs)
|
||||
np.savetxt(ids_file, self.ids, fmt='%s')
|
||||
logging.info('Storing features done')"""
|
||||
self.init_faiss_index()
|
||||
except Exception as e:
|
||||
logging.error('Error, unable to storing features')
|
||||
logging.error(e)
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
|
|
@ -0,0 +1,120 @@
|
|||
import numpy as np
|
||||
import ImageRecognitionSettings as settings
|
||||
import faiss
|
||||
from shutil import copyfile
|
||||
import logging
|
||||
import threading
|
||||
import glob
|
||||
import fileinput
|
||||
import os
|
||||
import GFUtilities
|
||||
|
||||
class FAISSSearchEngine:
|
||||
|
||||
def __init__(self):
|
||||
#self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
|
||||
|
||||
#np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
|
||||
#self.descs = np.load(settings.DATASET)
|
||||
#self.desc1 = np.load(settings.DATASET1)
|
||||
#self.desc2 = np.load(settings.DATASET2)
|
||||
|
||||
#self.descs = (self.desc1 + self.desc2) / 2
|
||||
#self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True)
|
||||
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
|
||||
#self.ids = np.loadtxt(fileinput.input(sorted(glob.glob(settings.DATASET_GF_FOLDER + '/*.ids'))))
|
||||
self.lock = threading.Lock()
|
||||
self.init_faiss_index()
|
||||
|
||||
|
||||
def init_faiss_index(self):
|
||||
# create an index with inner product similarity
|
||||
dim = 2048 # dimensionality of the features
|
||||
metric = faiss.METRIC_INNER_PRODUCT
|
||||
self.index = faiss.index_factory(dim, 'Flat', metric)
|
||||
self.descs = np.load(settings.DATASET)
|
||||
self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
|
||||
self.index.add(self.descs)
|
||||
|
||||
for desc_file in sorted(glob.glob(settings.DATASET_GF_FOLDER + '/**/*.dat', recursive=True)):
|
||||
# add the vectors to the index
|
||||
img_id = os.path.splitext(os.path.basename(desc_file))[0]
|
||||
img_desc = GFUtilities.unpickle_img_gf(settings.DATASET_GF_FOLDER, img_id)
|
||||
self.ids.append(img_id)
|
||||
self.index.add(img_desc) # my_database is a numpy array of shape N x dim, where N is the number of vectors to index
|
||||
|
||||
|
||||
def get_id(self, idx):
|
||||
return self.ids[idx]
|
||||
|
||||
def get_indexed_ids(self):
|
||||
return self.ids
|
||||
|
||||
def exists(self, doc_id):
|
||||
return doc_id in self.ids
|
||||
|
||||
def add(self, desc, doc_id):
|
||||
try:
|
||||
self.lock.acquire()
|
||||
# self.ids.append(doc_id)
|
||||
# self.descs = np.vstack((self.descs, desc))
|
||||
GFUtilities.pickle_img_gf(settings.DATASET_GF_FOLDER, doc_id, desc)
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
def remove(self, doc_id):
|
||||
try:
|
||||
self.lock.acquire()
|
||||
if not GFUtilities.delete_img_gf( settings.DATASET_GF_FOLDER, doc_id):
|
||||
try:
|
||||
idx = self.ids.index(doc_id)
|
||||
del self.ids[idx]
|
||||
self.descs = np.delete(self.descs, idx, axis=0)
|
||||
descs_file = settings.DATASET
|
||||
ids_file = settings.DATASET_IDS
|
||||
np.save(descs_file, self.descs)
|
||||
np.savetxt(ids_file, self.ids, fmt='%s')
|
||||
except ValueError as e:
|
||||
logging.error('Error, unable to retrieve and delete ' + doc_id)
|
||||
logging.error(e)
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
def search_by_id(self, query_id, k=10):
|
||||
query_idx = self.ids.index(query_id)
|
||||
return self.search_by_img(self.descs[query_idx], k)
|
||||
|
||||
def search_by_img(self, query, k=10):
|
||||
print('----------query features-------')
|
||||
logging.info(query)
|
||||
queries = np.reshape(query, (-1, 2048))
|
||||
logging.debug(queries)
|
||||
scores, indexes = self.index.search(queries, k)
|
||||
#dot_product = np.dot(self.descs, query)
|
||||
#idx = dot_product.argsort()[::-1][:k]
|
||||
res = []
|
||||
for (i,j) in zip(indexes[0], scores[0]):
|
||||
res.append((self.ids[i], round(float(j), 3)))
|
||||
return res
|
||||
|
||||
def save(self, is_backup=False):
|
||||
descs_file = settings.DATASET
|
||||
ids_file = settings.DATASET_IDS
|
||||
try:
|
||||
self.lock.acquire()
|
||||
"""if is_backup:
|
||||
descs_file_backup = descs_file +'.bak'
|
||||
ids_file_backup = ids_file + '.bak'
|
||||
copyfile(descs_file, descs_file_backup)
|
||||
copyfile(ids_file, ids_file_backup)
|
||||
logging.info('Backup features created')
|
||||
|
||||
np.save(descs_file, self.descs)
|
||||
np.savetxt(ids_file, self.ids, fmt='%s')
|
||||
logging.info('Storing features done')"""
|
||||
self.init_faiss_index()
|
||||
except Exception as e:
|
||||
logging.error('Error, unable to storing features')
|
||||
logging.error(e)
|
||||
finally:
|
||||
self.lock.release()
|
|
@ -0,0 +1,36 @@
|
|||
import cv2
|
||||
import numpy as np
|
||||
import pickle as pickle
|
||||
import os
|
||||
|
||||
|
||||
def pickle_img_gf(dest, id, descriptors):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat'
|
||||
dest_folder_path = os.path.join(dest, dest_folder_name)
|
||||
if (not os.path.exists(dest_folder_path)):
|
||||
os.mkdir(dest_folder_path)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
pickle.dump(descriptors, open(dest_path, 'wb'))
|
||||
|
||||
|
||||
def unpickle_img_gf(dest, id):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat'
|
||||
dest_folder_path = os.path.join(dest, dest_folder_name)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
des = pickle.load((open(dest_path, "rb")))
|
||||
return des
|
||||
|
||||
|
||||
def delete_img_gf(dest, id):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat'
|
||||
dest_folder_path = os.path.join(dest, dest_folder_name)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
if os.path.exists(dest_path):
|
||||
os.remove(dest_path)
|
||||
if not os.listdir(dest_folder_path):
|
||||
os.rmdir(dest_folder_path)
|
||||
return True
|
||||
return False
|
|
@ -15,6 +15,8 @@ import os, os.path
|
|||
import tornado.wsgi
|
||||
import tornado.httpserver
|
||||
import argparse
|
||||
import logging
|
||||
import base64
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
|
@ -65,13 +67,9 @@ def get_res(results, query_url=None):
|
|||
@app.route('/bcir/searchById')
|
||||
def search_by_id():
|
||||
id = request.args.get('id')
|
||||
try:
|
||||
searchDeepLevel = int(request.args.get("searchDeepLevel"))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('Setting default deep level 1')
|
||||
search_deep_level = 1
|
||||
results = searcher.search_by_id(id, settings.k, searchDeepLevel)
|
||||
k, threshold, search_deep_level = get_parameters(request.args.get("k"), request.args.get("threshold"),
|
||||
request.args.get("searchDeepLevel"))
|
||||
results = searcher.search_by_id(id, k, threshold, search_deep_level)
|
||||
query_url = None
|
||||
if request.args.get("tohtml") is not None:
|
||||
query_url = id + ".jpg"
|
||||
|
@ -86,12 +84,8 @@ def search_by_img():
|
|||
|
||||
file = request.files['image']
|
||||
img_file = post_to_file(file)
|
||||
try:
|
||||
search_deep_level = int(request.form.get("searchDeepLevel"))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('Setting default deep level 1')
|
||||
search_deep_level = 1
|
||||
|
||||
k, threshold, search_deep_level = get_parameters(request.form.get("k"), request.form.get("threshold"), request.form.get("searchDeepLevel"))
|
||||
|
||||
#dest_file = uuid.uuid4().hex + ".jpg"
|
||||
#dest_path = settings.logs + "/" + dest_file
|
||||
|
@ -99,24 +93,48 @@ def search_by_img():
|
|||
#files = {'image': (dest_file, open(dest_path, 'rb'))}
|
||||
#r = requests.post(settings.rmac_service, files=files)
|
||||
#results = search_engine.search_by_img(np.array(r.json()), settings.k)
|
||||
results = searcher.search_by_img(img_file, settings.k, search_deep_level)
|
||||
results = searcher.search_by_img(img_file, k, threshold, search_deep_level)
|
||||
query_url = None
|
||||
if request.form.get("tohtml") is not None:
|
||||
query_url = ""
|
||||
return get_res(results, query_url)
|
||||
|
||||
|
||||
@app.route('/bcir/searchByImgB64', methods=['POST'])
|
||||
def search_by_img_base64():
|
||||
image = request.form.get('image')
|
||||
if image:
|
||||
img_file = base64_to_file(image)
|
||||
else:
|
||||
flash('No img sent')
|
||||
return redirect(request.url)
|
||||
|
||||
k, threshold, search_deep_level = get_parameters(request.form.get("k"), request.form.get("threshold"), request.form.get("searchDeepLevel"))
|
||||
|
||||
results = searcher.search_by_img(img_file, k, threshold, search_deep_level)
|
||||
query_url = None
|
||||
if request.form.get("tohtml") is not None:
|
||||
query_url = ""
|
||||
return get_res(results, query_url)
|
||||
|
||||
|
||||
def base64_to_file(image_base64):
|
||||
ext = ".png"
|
||||
dest_file = uuid.uuid4().hex + ext
|
||||
dest_path = settings.logs + "/" + dest_file
|
||||
with open(dest_path, "wb") as image_file:
|
||||
byte_content = base64.b64decode(image_base64)
|
||||
image_file.write(byte_content)
|
||||
return dest_path
|
||||
|
||||
@app.route('/bcir/searchByURL')
|
||||
def search_by_url():
|
||||
url = request.args.get('url')
|
||||
try:
|
||||
search_deep_level = int(request.args.get("searchDeepLevel"))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('Setting default deep level 1')
|
||||
search_deep_level = 1
|
||||
|
||||
img_file = url_to_file(url)
|
||||
|
||||
k, threshold, search_deep_level = get_parameters(request.args.get("k"), request.args.get("threshold"),
|
||||
request.args.get("searchDeepLevel"))
|
||||
# query = cv2.imdecode(image, cv2.IMREAD_COLOR)
|
||||
# dest_file = uuid.uuid4().hex + ".jpg"
|
||||
# dest_path = settings.logs + "/" + dest_file
|
||||
|
@ -124,12 +142,49 @@ def search_by_url():
|
|||
# files = {'image': open(dest_path, 'rb')}
|
||||
# r = requests.post(settings.rmac_service, files=files)
|
||||
# results = search_engine.search_by_img(np.array(r.json()), settings.k)
|
||||
results = searcher.search_by_img(img_file, settings.k, search_deep_level)
|
||||
results = searcher.search_by_img(img_file, k, threshold, search_deep_level)
|
||||
query_url = None
|
||||
if request.args.get("tohtml") is not None:
|
||||
query_url = url
|
||||
return get_res(results, query_url)
|
||||
|
||||
|
||||
def get_parameters(k, threshold, search_deep_level):
|
||||
try:
|
||||
threshold = float(threshold)
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
threshold = settings.SEARCH_THRESHOLD
|
||||
logging.error('Setting default threshold value to ' + str(threshold))
|
||||
try:
|
||||
k = int(k)
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
k = settings.k
|
||||
logging.error('Setting default k value to ' + str(k))
|
||||
|
||||
try:
|
||||
search_deep_level = int(search_deep_level)
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
search_deep_level = settings.SEARCH_DEEP_LEVEL
|
||||
logging.error('Setting default deep level to ' + str(search_deep_level))
|
||||
|
||||
return k, threshold, search_deep_level
|
||||
|
||||
|
||||
@app.route('/bcir/getIds')
|
||||
def get_indexed_ids():
|
||||
json_ids = json.dumps(searcher.get_indexed_ids())
|
||||
return json_ids
|
||||
|
||||
|
||||
@app.route('/bcir/exists')
|
||||
def exists():
|
||||
doc_id = request.args.get('id')
|
||||
return json.dumps(searcher.exists(doc_id))
|
||||
|
||||
|
||||
@app.route('/bcir/addImg', methods=['POST'])
|
||||
def add_img():
|
||||
if 'image' not in request.files:
|
||||
|
@ -137,42 +192,46 @@ def add_img():
|
|||
return redirect(request.url)
|
||||
try:
|
||||
file = request.files['image']
|
||||
id = request.files['image'].filename
|
||||
id, _ = os.path.splitext(id)
|
||||
doc_id = request.files['image'].filename
|
||||
doc_id, _ = os.path.splitext(doc_id)
|
||||
img_file = post_to_file(file)
|
||||
searcher.add(img_file, id)
|
||||
searcher.add(img_file, doc_id)
|
||||
json_res = json.dumps("done")
|
||||
return json_res
|
||||
except:
|
||||
except Exception as e:
|
||||
logging.error('Unable to add ' + doc_id + 'to the index')
|
||||
logging.error(e)
|
||||
abort(500)
|
||||
|
||||
|
||||
@app.route('/bcir/rmImg')
|
||||
def remove_img():
|
||||
try:
|
||||
id = request.args.get('id')
|
||||
searcher.remove(id)
|
||||
doc_id = request.args.get('id')
|
||||
searcher.remove(doc_id)
|
||||
json_res = json.dumps("done")
|
||||
return json_res
|
||||
except:
|
||||
except Exception as e:
|
||||
logging.error('Unable to remove ' + doc_id + 'to the index')
|
||||
logging.error(e)
|
||||
abort(500)
|
||||
|
||||
@app.route('/bcir/<path:filename>')
|
||||
def download_file(filename):
|
||||
print(filename)
|
||||
logging.debug(filename)
|
||||
values = filename.split('/')
|
||||
print(values)
|
||||
logging.debug(values)
|
||||
|
||||
return send_from_directory(settings.img_folder, filename, as_attachment=False)
|
||||
|
||||
@app.route('/bcir/queries/<path:filename>')
|
||||
def queries(filename):
|
||||
print(filename)
|
||||
logging.debug(filename)
|
||||
values = filename.split('/')
|
||||
folder = values[0]
|
||||
name = values[1]
|
||||
print(folder)
|
||||
print(name)
|
||||
logging.debug(folder)
|
||||
logging.debug(name)
|
||||
|
||||
return send_from_directory(settings.working_folder + '/' + folder, name, as_attachment=False)
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ import json
|
|||
import os
|
||||
|
||||
def load_setting(conf_file):
|
||||
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET_LF_FOLDER, DATASET_IDS, DB_LF
|
||||
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET_LF_FOLDER, DATASET_GF_FOLDER, DATASET, DATASET_IDS, DB_LF, SEARCH_THRESHOLD, SEARCH_DEEP_LEVEL
|
||||
|
||||
with open(conf_file) as settings_file:
|
||||
|
||||
|
@ -19,10 +19,14 @@ def load_setting(conf_file):
|
|||
if not os.path.isdir(data_folder):
|
||||
os.mkdir(data_folder)
|
||||
|
||||
DATASET = os.path.join(data_folder, 'dataset.npy')
|
||||
DATASET_LF_FOLDER = os.path.join(data_folder, 'lf')
|
||||
DATASET_IDS = os.path.join(data_folder, 'dataset.ids')
|
||||
DATASET_GF_FOLDER = os.path.join(data_folder, 'gf')
|
||||
DATASET = os.path.join(DATASET_GF_FOLDER, 'dataset.npy')
|
||||
DATASET_IDS = os.path.join(DATASET_GF_FOLDER, 'dataset.ids')
|
||||
DB_LF = os.path.join(data_folder, 'sqlite_lf/lf.db')
|
||||
SEARCH_THRESHOLD = settings['search_th']
|
||||
SEARCH_DEEP_LEVEL = settings['search_dl']
|
||||
|
||||
|
||||
img_folder = settings['img_folder']
|
||||
logs = os.path.join(working_folder, settings['log_folder'])
|
||||
|
|
|
@ -1,151 +1,48 @@
|
|||
import cv2
|
||||
import numpy as np
|
||||
import pickle as pickle
|
||||
import os
|
||||
from line_profiler_pycharm import profile
|
||||
|
||||
def resize(max_side, img):
|
||||
if img.shape[1] > img.shape[0]:
|
||||
r = max_side / img.shape[1]
|
||||
dim = (max_side, int(img.shape[0] * r))
|
||||
else:
|
||||
r = max_side / img.shape[0]
|
||||
dim = (int(img.shape[1] * r), max_side)
|
||||
|
||||
# perform the actual resizing of the image and show it
|
||||
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
|
||||
return resized
|
||||
|
||||
|
||||
def pickle_keypoints(keypoints, descriptors):
|
||||
i = 0
|
||||
temp_array = []
|
||||
for point in keypoints:
|
||||
temp = (point.pt, point.size, point.angle, point.response, point.octave,
|
||||
point.class_id, descriptors[i])
|
||||
i += 1
|
||||
temp_array.append(temp)
|
||||
return temp_array
|
||||
|
||||
|
||||
def serialize_object(obj):
|
||||
return pickle.dumps(obj)
|
||||
|
||||
|
||||
def deserialize_object(serialized_obj):
|
||||
return pickle.loads(serialized_obj)
|
||||
|
||||
|
||||
def serializeV1(keypoints, descriptors):
|
||||
temp_array = []
|
||||
for point in keypoints:
|
||||
kp = [point.pt, point.size, point.angle, point.response, point.octave, point.class_id]
|
||||
temp_array.append(kp)
|
||||
return temp_array, descriptors
|
||||
|
||||
|
||||
def serialize(keypoints, descriptors):
|
||||
pts = np.float32([keypoints[i].pt for i in range(0, len(keypoints))])
|
||||
return pts, descriptors
|
||||
|
||||
def deserialize(ser_kp, ser_des):
|
||||
keypoints = []
|
||||
#data_list = array.tolist()
|
||||
for point in ser_kp:
|
||||
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
|
||||
keypoints.append(temp_feature)
|
||||
return keypoints, ser_des
|
||||
|
||||
|
||||
def deserializev1(ser_kp, ser_des):
|
||||
keypoints = []
|
||||
#data_list = array.tolist()
|
||||
for point in ser_kp:
|
||||
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
|
||||
keypoints.append(temp_feature)
|
||||
return keypoints, ser_des
|
||||
|
||||
def pickle_img_lf(dest, id, keypoints, descriptors):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat'
|
||||
dest_folder_path = os.path.join(dest, dest_folder_name)
|
||||
if (not os.path.exists(dest_folder_path)):
|
||||
os.mkdir(dest_folder_path)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
kps, des = serialize(keypoints, descriptors)
|
||||
pickle.dump([kps, des], open(dest_path, 'wb'))
|
||||
|
||||
def delete_img_lf(dest, id):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat'
|
||||
dest_folder_path = os.path.join(dest, dest_folder_name)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
if os.path.exists(dest_path):
|
||||
os.remove(dest_path)
|
||||
|
||||
@profile
|
||||
def unpickle_img_lf(lf_path, id):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat'
|
||||
dest_folder_path = os.path.join(lf_path, dest_folder_name)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
kps, des = pickle.load((open(dest_path, "rb")))
|
||||
return kps, des
|
||||
|
||||
|
||||
@profile
|
||||
def loadz_img_lf(lf_path, id):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat.npz'
|
||||
dest_folder_path = os.path.join(lf_path, dest_folder_name)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
data = np.load(dest_path, allow_pickle=False)
|
||||
kps = data.f.kps
|
||||
des = data.f.des
|
||||
#kps = data['kps']
|
||||
#des = data['des']
|
||||
#kp, desc = deserialize(data['kps'], data['des'])
|
||||
return kps, des
|
||||
|
||||
|
||||
def savez_img_lf(dest, id, keypoints, descriptors):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat'
|
||||
dest_folder_path = os.path.join(dest, dest_folder_name)
|
||||
if (not os.path.exists(dest_folder_path)):
|
||||
os.mkdir(dest_folder_path)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
kps, des = serialize(keypoints, descriptors)
|
||||
#np.savez(dest_path, data)
|
||||
np.savez(dest_path, kps=kps, des=des)
|
||||
|
||||
|
||||
@profile
|
||||
def loadz_img_lf(lf_path, id):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat.npz'
|
||||
dest_folder_path = os.path.join(lf_path, dest_folder_name)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
data = np.load(dest_path, allow_pickle=False)
|
||||
kps = data.f.kps
|
||||
des = data.f.des
|
||||
#kps = data['kps']
|
||||
#des = data['des']
|
||||
#kp, desc = deserialize(data['kps'], data['des'])
|
||||
return kps, des
|
||||
|
||||
|
||||
def unpickle_keypoints(array):
|
||||
keypoints = []
|
||||
descriptors = []
|
||||
data_list = array.tolist()
|
||||
for point in array:
|
||||
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
|
||||
temp_descriptor = point[6]
|
||||
keypoints.append(temp_feature)
|
||||
descriptors.append(temp_descriptor)
|
||||
return keypoints, np.array(descriptors)
|
||||
|
||||
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pickle as pickle
|
||||
import os
|
||||
|
||||
|
||||
def resize(max_side, img):
|
||||
if img.shape[1] > img.shape[0]:
|
||||
r = max_side / img.shape[1]
|
||||
dim = (max_side, int(img.shape[0] * r))
|
||||
else:
|
||||
r = max_side / img.shape[0]
|
||||
dim = (int(img.shape[1] * r), max_side)
|
||||
|
||||
# perform the actual resizing of the image and show it
|
||||
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
|
||||
return resized
|
||||
|
||||
|
||||
def pickle_img_lf(dest, id, keypoints, descriptors):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat'
|
||||
dest_folder_path = os.path.join(dest, dest_folder_name)
|
||||
if (not os.path.exists(dest_folder_path)):
|
||||
os.mkdir(dest_folder_path)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
kps = np.float32([keypoints[i].pt for i in range(0, len(keypoints))])
|
||||
pickle.dump([kps, descriptors], open(dest_path, 'wb'))
|
||||
|
||||
|
||||
def unpickle_img_lf(lf_path, id):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat'
|
||||
dest_folder_path = os.path.join(lf_path, dest_folder_name)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
kps, des = pickle.load((open(dest_path, "rb")))
|
||||
return kps, des
|
||||
|
||||
|
||||
def delete_img_lf(dest, id):
|
||||
dest_folder_name = id[0:3]
|
||||
filename = id + '.dat'
|
||||
dest_folder_path = os.path.join(dest, dest_folder_name)
|
||||
dest_path = os.path.join(dest_folder_path, filename)
|
||||
if os.path.exists(dest_path):
|
||||
os.remove(dest_path)
|
||||
if not os.listdir(dest_folder_path):
|
||||
os.rmdir(dest_folder_path)
|
||||
|
|
|
@ -9,11 +9,11 @@ import ImageRecognitionSettings as settings
|
|||
from BEBLIDRescorer import BEBLIDRescorer
|
||||
import SearcherParameters
|
||||
|
||||
from FAISSSearchEngine import FAISSSearchEngine
|
||||
from FAISSSearchEngine2 import FAISSSearchEngine
|
||||
import FeatureExtractor as fe
|
||||
import BEBLIDExtractorQ as lfQ
|
||||
import BEBLIDExtractorD as lfD
|
||||
from line_profiler_pycharm import profile
|
||||
import logging
|
||||
|
||||
|
||||
class Searcher:
|
||||
|
@ -25,31 +25,34 @@ class Searcher:
|
|||
self.search_engine = FAISSSearchEngine()
|
||||
self.rescorer = BEBLIDRescorer()
|
||||
|
||||
def get_indexed_ids(self):
|
||||
return self.search_engine.get_indexed_ids()
|
||||
|
||||
def get_id(self, idx):
|
||||
return self.search_engine.get_id(idx)
|
||||
|
||||
def add(self, img_file, id):
|
||||
self.save(True)
|
||||
def exists(self, doc_id):
|
||||
return self.search_engine.exists(doc_id)
|
||||
|
||||
def add(self, img_file, doc_id):
|
||||
desc = fe.extract(img_file)
|
||||
self.search_engine.add(desc, id)
|
||||
self.search_engine.add(desc, doc_id)
|
||||
|
||||
lf = lfD.extract(img_file)
|
||||
self.rescorer.add(lf, id)
|
||||
kp, des = lfD.extract(img_file)
|
||||
self.rescorer.add(doc_id, kp, des)
|
||||
|
||||
#orb = lf.extract(img_file)
|
||||
|
||||
self.save()
|
||||
print('added ' + id)
|
||||
|
||||
def remove(self, id):
|
||||
self.save(True)
|
||||
self.search_engine.remove(id)
|
||||
#self.rescorer.remove(idx)
|
||||
self.save()
|
||||
print('removed ' + id)
|
||||
logging.info('added ' + doc_id)
|
||||
|
||||
def search_by_id(self, query_id, k=10, search_deep_level=1):
|
||||
def remove(self, doc_id):
|
||||
self.search_engine.remove(doc_id)
|
||||
self.rescorer.remove(doc_id)
|
||||
self.save(True)
|
||||
logging.info('removed ' + doc_id)
|
||||
|
||||
def search_by_id(self, query_id, k=10, search_threshold=0.25, search_deep_level=1):
|
||||
kq = k
|
||||
if search_deep_level > 0:
|
||||
kq = SearcherParameters.SEARCH_DEEP_K[search_deep_level]
|
||||
|
@ -57,10 +60,10 @@ class Searcher:
|
|||
if search_deep_level > 0:
|
||||
res_lf = self.rescorer.rescore_by_id(query_id, res)
|
||||
res = res_lf if res_lf else res[:k]
|
||||
res = [result for result in res if result[1] >= search_threshold]
|
||||
return res
|
||||
|
||||
@profile
|
||||
def search_by_img(self, query_img, k=10, search_deep_level=1):
|
||||
def search_by_img(self, query_img, k=10, search_threshold=0.25, search_deep_level=1):
|
||||
kq = k
|
||||
if search_deep_level:
|
||||
kq = SearcherParameters.SEARCH_DEEP_K[search_deep_level]
|
||||
|
@ -71,7 +74,7 @@ class Searcher:
|
|||
res_lf = self.rescorer.rescore_by_img(query_lf, res)
|
||||
#res = res_lf if res_lf else res[:k]
|
||||
res = res_lf if res_lf else res[:k]
|
||||
res = [result for result in res if result[1] >= SearcherParameters.GEM_THRESHOLD]
|
||||
res = [result for result in res if result[1] >= search_threshold]
|
||||
return res
|
||||
|
||||
def save(self, is_backup=False):
|
||||
|
|
|
@ -1,2 +1 @@
|
|||
SEARCH_DEEP_K = [0, 1000, 2000, 5000, 10000, 30000, 100000]
|
||||
GEM_THRESHOLD = 0.25
|
||||
|
|
|
@ -1,47 +1,73 @@
|
|||
from flask import Flask, request, redirect, url_for, flash, render_template, send_from_directory
|
||||
from random import randint
|
||||
import cv2
|
||||
import io
|
||||
import numpy as np
|
||||
import json
|
||||
|
||||
import urllib
|
||||
|
||||
|
||||
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
|
||||
import ImageRecognitionSettings as settings
|
||||
import uuid
|
||||
import requests
|
||||
import base64
|
||||
|
||||
import os, os.path
|
||||
BASE_URL = 'http://bilioso.isti.cnr.it:8290/bcir/'
|
||||
|
||||
BASE_URL = 'http://bilioso.isti.cnr.it:8190/bcir/'
|
||||
payload = {'id': '54b019e5ed5082b0938b14c4-IMG357781'}
|
||||
# ------Get indexed IDS------
|
||||
payload = {'id': '54b3298aed5082b093939ea1-IMG881380', 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
|
||||
r = requests.get(BASE_URL + 'getIds')
|
||||
print(r.json())
|
||||
|
||||
# ------Check if an ID exists------
|
||||
payload = {'id': '54b3298aed5082b093939ea1-IMG881380'}
|
||||
r = requests.get(BASE_URL + 'exists', params=payload)
|
||||
print(r.json())
|
||||
|
||||
# ------Searching by ID------
|
||||
print('Searching by ID')
|
||||
payload = {'id': '54b3298aed5082b093939ea1-IMG881380', 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
|
||||
r = requests.get(BASE_URL + 'searchById', params=payload)
|
||||
print(r.json())
|
||||
|
||||
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
|
||||
r = requests.post(BASE_URL + 'searchByImg', files=files)
|
||||
# ------Searching by IMG------
|
||||
print('Searching by IMG')
|
||||
files = {'image': ('query', open('/media/data2/data/swoads/workdir/img/ImmaginiComparazioni/ACC130111[1].jpg', 'rb'))}
|
||||
data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
|
||||
r = requests.post(BASE_URL + 'searchByImg', data=data, files=files)
|
||||
print(r.json())
|
||||
|
||||
payload = {'url': 'http://bilioso.isti.cnr.it:8190/bcir/54b019e5ed5082b0938b14c4-IMG357781.jpg'}
|
||||
# ------Searching by IMG Basa64------
|
||||
print('Searching by IMG Base64')
|
||||
with open('/media/data2/data/swoads/workdir/img/ImmaginiComparazioni/ACC130111[1].jpg', "rb") as img_file:
|
||||
b64_encoding = base64.b64encode(img_file.read())
|
||||
data = {'image':b64_encoding, 'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
|
||||
r = requests.post(BASE_URL + 'searchByImgB64', data=data, files=files)
|
||||
print(r.json())
|
||||
|
||||
# ------Searching by URL------
|
||||
print('Searching by URL')
|
||||
payload = {'url': 'http://bilioso.isti.cnr.it:8290/bcir/54b3298aed5082b093939ea1-IMG881380.jpg', 'k': 10,
|
||||
'threshold': 0.25, 'searchDeepLevel': 1}
|
||||
r = requests.get(BASE_URL + 'searchByURL', params=payload)
|
||||
print(r.json())
|
||||
|
||||
files = {'image': ('prova', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
|
||||
#files = {'image': ('prova', open('/media/Data/data/beni_culturali/deploy/dataset_ids.bak', 'rb'))}
|
||||
# ------Adding newIMG------
|
||||
print('Adding newIMG')
|
||||
files = {'image': (
|
||||
'newIMG', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))}
|
||||
r = requests.post(BASE_URL + 'addImg', files=files)
|
||||
s = r.json()
|
||||
print(r.json())
|
||||
|
||||
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
|
||||
r = requests.post(BASE_URL + 'searchByImg', files=files)
|
||||
# ------Searching by newIMG------
|
||||
print('Searching by newIMG')
|
||||
files = {'image': (
|
||||
'query', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))}
|
||||
data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
|
||||
|
||||
r = requests.post(BASE_URL + 'searchByImg', data=data, files=files)
|
||||
print(r.json())
|
||||
|
||||
payload = {'id': 'prova'}
|
||||
# ------Removing newIMG------
|
||||
print('Removing newIMG')
|
||||
payload = {'id': 'newIMG'}
|
||||
r = requests.get(BASE_URL + 'rmImg', params=payload)
|
||||
print(r.json())
|
||||
|
||||
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
|
||||
r = requests.post(BASE_URL + 'searchByImg', files=files)
|
||||
# ------Searching by newIMG now removed from the index------
|
||||
print('Searching by newIMG now removed from the index')
|
||||
files = {'image': (
|
||||
'query', open('/media/ssd2/data/swoads/workdir/img/ImmaginiComparazioni/ACC97468-149[3].jpg', 'rb'))}
|
||||
data = {'k': 10, 'threshold': 0.25, 'searchDeepLevel': 1}
|
||||
r = requests.post(BASE_URL + 'searchByImg', data=data, files=files)
|
||||
print(r.json())
|
||||
|
|
Loading…
Reference in New Issue