added FAISS Searcher

This commit is contained in:
Paolo Bolettieri 2022-07-08 18:19:41 +02:00
parent 647a8778ba
commit 2761ccbe95
26 changed files with 772 additions and 466 deletions

View File

@ -7,7 +7,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
git \ git \
wget \ wget \
nano \ nano \
unzip unzip \
sqlite3 \
libsqlite3-dev
RUN pip install numpy tornado flask-restful pillow numpy matplotlib tqdm scikit-learn h5py requests faiss-cpu==1.7.2 RUN pip install numpy tornado flask-restful pillow numpy matplotlib tqdm scikit-learn h5py requests faiss-cpu==1.7.2
ADD . /workspace ADD . /workspace

2
run.sh
View File

@ -1 +1 @@
docker run --net=host -p 8190:8190 -v /media/data2/data/swoads/data:/workspace/data -it image-recognition:swoads python3 /workspace/src/beniculturali.py /workspace/data/conf/img_rec_conf.json docker run --net=host -p 8190:8190 -v /media/data2/data/swoads/data:/workspace/data -it image-recognition:swoads python3 /workspace/src/ImageRecognitionService.py /workspace/data/conf/img_rec_conf.json

View File

@ -8,7 +8,7 @@ import LFUtilities
import BEBLIDParameters as params import BEBLIDParameters as params
detector = cv2.ORB_create(params.KEYPOINTS) detector = cv2.ORB_create(params.KEYPOINTS)
descriptor = cv2.xfeatures2d.BEBLID_create(0.75) descriptor = cv2.xfeatures2d.BEBLID_create(0.75, 101)
def extract(img_path): def extract(img_path):

View File

@ -1,5 +1,5 @@
NN_MATCH_RATIO = 0.8 NN_MATCH_RATIO = 0.8
MIN_GOOD_MATCHES = 12 MIN_GOOD_MATCHES = 22
MIN_INLIERS = 10 MIN_INLIERS = 15
KEYPOINTS = 500 KEYPOINTS = 800
IMG_SIZE = 500 IMG_SIZE = 500

View File

@ -3,7 +3,7 @@ import numpy as np
import LFUtilities import LFUtilities
import BEBLIDParameters import BEBLIDParameters
import beniculturaliSettings as settings import ImageRecognitionSettings as settings
class BEBLIDRescorer: class BEBLIDRescorer:
@ -15,18 +15,20 @@ class BEBLIDRescorer:
self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING) self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
def rescore_by_id(self, query_id, resultset): def rescore_by_id(self, query_id, resultset):
query_idx = self.ids.index(query_id) #query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf(settings.DATASET_BEBLID, query_id) query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
return self.rescore_by_img(query, resultset) return self.rescore_by_img(query, resultset)
def rescore_by_img(self, query, resultset): def rescore_by_img(self, query, resultset):
max_inliers = -1 max_inliers = -1
res = [] res = []
counter = 0 counter = 0
if len(query[0]) > 0:
for data_id, _ in resultset: for data_id, _ in resultset:
try: try:
data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id) data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id)
if len(data_el[1]) > 0:
nn_matches = self.bf.knnMatch(query[1], data_el[1], 2) nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance] good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
@ -34,7 +36,7 @@ class BEBLIDRescorer:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2) src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2) dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0) M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0)
matches_mask = mask.ravel().tolist() matches_mask = mask.ravel().tolist()
# print(len(good)) # print(len(good))
inliers = np.count_nonzero(matches_mask) inliers = np.count_nonzero(matches_mask)
@ -42,9 +44,13 @@ class BEBLIDRescorer:
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers): if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3))) res.append((data_id, round(inliers/len(good), 3)))
print(data_id)
print(f'candidate n. {counter}') print(f'candidate n. {counter}')
except: #to get just the first candidate
break
except Exception as e:
print('rescore error evaluating ' + data_id) print('rescore error evaluating ' + data_id)
print(e)
pass pass
counter += 1 counter += 1

81
src/BEBLIDRescorerDB.py Normal file
View File

@ -0,0 +1,81 @@
import cv2
import numpy as np
import LFUtilities
import BEBLIDParameters
import ImageRecognitionSettings as settings
from LFDB import LFDB
class BEBLIDRescorerDB:
def __init__(self):
#self.lf = LFUtilities.load(settings.DATASET_BEBLID)
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
#self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
self.lf_db = LFDB(settings.DB_LF)
def rescore_by_id(self, query_id, resultset):
#query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
return self.rescore_by_img(query, resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
counter = 0
if len(query[0]) > 0:
for data_id, _ in resultset:
try:
blob = self.lf_db.get(data_id)
serialized_obj = LFUtilities.deserialize_object(blob)
data_el = LFUtilities.unpickle_keypoints(serialized_obj)
if len(data_el[1]) > 0:
nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3)))
print(data_id)
print(f'candidate n. {counter}')
#to get just the first candidate
break
except Exception as e:
print('rescore error evaluating ' + data_id)
print(e)
pass
counter += 1
if res:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, lf):
self.lf.append(lf)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
lf_save_file += '.bak'
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')

75
src/BEBLIDRescorerGPU.py Normal file
View File

@ -0,0 +1,75 @@
import cv2
import numpy as np
import LFUtilities
import BEBLIDParameters
import ImageRecognitionSettings as settings
class BEBLIDRescorerGPU:
def __init__(self):
#self.lf = LFUtilities.load(settings.DATASET_BEBLID)
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
#self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
#self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
self.bf = cv2.cuda.DescriptorMatcher_createBFMatcher(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
def rescore_by_id(self, query_id, resultset):
#query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf_GPU(settings.DATASET_LF_FOLDER, query_id)
return self.rescore_by_img(query, resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
counter = 0
for data_id, _ in resultset:
try:
data_el = LFUtilities.load_img_lf_GPU(settings.DATASET_LF_FOLDER, data_id)
nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3)))
print(data_id)
print(f'candidate n. {counter}')
#to get just the first candidate
break
except Exception as e:
print('rescore error evaluating ' + data_id)
print(e)
pass
counter += 1
if res:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, lf):
self.lf.append(lf)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
lf_save_file += '.bak'
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')

76
src/BEBLIDRescorerV2.py Normal file
View File

@ -0,0 +1,76 @@
import cv2
import numpy as np
import LFUtilities
import BEBLIDParameters
import ImageRecognitionSettings as settings
class BEBLIDRescorer:
def __init__(self):
#self.lf = LFUtilities.load(settings.DATASET_BEBLID)
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
#self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
def rescore_by_id(self, query_id, resultset):
#query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
return self.rescore_by_img(query, resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
counter = 0
if len(query[0]) > 0:
for data_id, _ in resultset:
try:
data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id)
if len(data_el[1]) > 0:
nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3)))
print(data_id)
print(f'candidate n. {counter}')
#to get just the first candidate
break
except Exception as e:
print('rescore error evaluating ' + data_id)
print(e)
pass
counter += 1
if res:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, lf):
self.lf.append(lf)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
lf_save_file += '.bak'
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')

View File

@ -1,66 +0,0 @@
import cv2
import numpy as np
import LFUtilities
import beniculturaliSettings as settings
class BeniCulturaliRescorer:
def __init__(self):
self.lf = LFUtilities.load(settings.DATASET_LF)
self.ids = np.loadtxt(settings.DATASET_IDS_LF, dtype=str).tolist()
self.orb = cv2.ORB_create()
self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
def rescore_by_id(self, query_id, resultset):
query_idx = self.ids.index(query_id)
return self.rescore_by_img(self.lf[query_idx], resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
for data_id, _ in resultset:
data_idx = self.ids.index(data_id)
try:
data_el = self.lf[data_idx]
matches = self.bf.match(query[1], data_el[1])
good = [m for m in matches if m.distance <= LFUtilities.THRESHOLD]
if len(good) > LFUtilities.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= LFUtilities.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, inliers))
except:
print('rescore error evaluating ' + data_id)
pass
if res:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, lf):
self.lf.append(lf)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
lf_save_file += '.bak'
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')

View File

@ -1,60 +0,0 @@
import numpy as np
import beniculturaliSettings as settings
class BeniCulturaliSearchEngine:
def __init__(self):
#self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
#np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
self.descs = np.load(settings.DATASET)
#self.desc1 = np.load(settings.DATASET1)
#self.desc2 = np.load(settings.DATASET2)
#self.descs = (self.desc1 + self.desc2) / 2
#self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True)
self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
def get_id(self, idx):
return self.ids[idx]
def add(self, desc, id):
self.ids.append(id)
self.descs = np.vstack((self.descs, desc))
self.save()
def remove(self, id):
idx = self.ids.index(id)
del self.ids[idx]
self.descs = np.delete(self.descs, idx, axis=0)
def search_by_id(self, query_id, k=10):
query_idx = self.ids.index(query_id)
return self.search_by_img(self.descs[query_idx], k)
def search_by_img(self, query, k=10):
print('----------query features-------')
print(query)
dot_product = np.dot(self.descs, query)
idx = dot_product.argsort()[::-1][:k]
res = []
for i in idx:
res.append((self.ids[i], round(float(dot_product[i]), 3)))
return res
def save(self, is_backup=False):
descs_file = settings.DATASET
ids_file = settings.DATASET_IDS
if is_backup:
descs_file += '.bak'
ids_file += '.bak'
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')

View File

@ -1,68 +0,0 @@
import cv2
import numpy as np
import pickle as pickle
import LFUtilities
import beniculturaliSettings as settings
from BeniCulturaliRescorer import BeniCulturaliRescorer
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
import FeatureExtractor as fe
#import ORBExtractor as lf
class BeniCulturaliSearcher:
K_REORDERING = 15
def __init__(self):
# self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
# np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
self.search_engine = BeniCulturaliSearchEngine()
#self.rescorer = BeniCulturaliRescorer()
def get_id(self, idx):
return self.search_engine.get_id(idx)
def add(self, img_file, id):
self.save(True)
desc = fe.extract(img_file)
#orb = lf.extract(img_file)
self.search_engine.add(desc, id)
#self.rescorer.add(orb)
self.save()
print('added ' + id)
def remove(self, id):
self.save(True)
self.search_engine.remove(id)
#self.rescorer.remove(idx)
self.save()
print('removed ' + id)
def search_by_id(self, query_id, k=10, rescorer=False):
kq = k
if rescorer:
kq = self.K_REORDERING
res = self.search_engine.search_by_id(query_id, kq)
# if rescorer:
# res_lf = self.rescorer.rescore_by_id(query_id, res)
# res = res_lf if res_lf else res[:k]
return res
def search_by_img(self, query_img, k=10, rescorer=False):
kq = k
if rescorer:
kq = self.K_REORDERING
query_desc = fe.extract(query_img)
res = self.search_engine.search_by_img(query_desc, kq)
#if rescorer:
# query_lf = lf.extract(query_img)
# res_lf = self.rescorer.rescore_by_img(query_lf, res)
# res = res_lf if res_lf else res[:k]
return res
def save(self, is_backup=False):
self.search_engine.save(is_backup)
#self.rescorer.save(is_backup)

38
src/BulkSearch.py Normal file
View File

@ -0,0 +1,38 @@
import requests
from pathlib import Path
import tqdm
import argparse
import os
IMG_REC_SERVICE = 'http://localhost:8290/bcir/'
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Img Recognition Bulk Analysis')
parser.add_argument('src', type=str, help='img src folder path')
parser.add_argument('dest', type=str, help='dest file path')
args = parser.parse_args()
src = args.src
dest = args.dest
paths = Path(src).rglob('*.*')
paths_list = list(paths)
print('Analyzing images...')
with open(dest, 'w', encoding='UTF8') as f:
for path in tqdm.tqdm(paths_list):
try:
img_file = {'image': (
'query', open(os.path.join(path.parent, path.name), 'rb'))}
r = requests.post(IMG_REC_SERVICE + 'searchByImg', files=img_file)
res = r.json()
tmp = ';'.join([str(i) for x in res for i in x])
row = path.name + ";" + tmp
f.write(row + '\n')
except Exception as e:
print("cannot process '%s'" % path)
print(e)
pass

View File

@ -1,5 +1,5 @@
import numpy as np import numpy as np
import beniculturaliSettings as settings import ImageRecognitionSettings as settings
import faiss import faiss

View File

@ -1,5 +1,5 @@
import numpy as np import numpy as np
import beniculturaliSettings as settings import ImageRecognitionSettings as settings
import requests import requests

View File

@ -0,0 +1,63 @@
import requests
from pathlib import Path
import tqdm
import argparse
import os
IMG_REC_SERVICE = 'http://localhost:8290/bcir/'
groundtruth_file = '/media/ssd2/data/swoads/workdir/data/groundtruth_no_ext.txt'
precision_at = [0] * 10
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Img Recognition Bulk Analysis')
parser.add_argument('src', type=str, help='img src folder path')
#parser.add_argument('dest', type=str, help='dest file path')
args = parser.parse_args()
src = args.src
#dest = args.dest
groundtruth = {}
with open(groundtruth_file, 'r') as f:
for line in f:
line = line.rstrip() # removes trailing whitespace and '\n' chars
if "," not in line: continue # skips blanks and comments w/o =
if line.startswith("#"): continue # skips comments which contain =
k, v = line.split(",", 1)
groundtruth[k] = v
paths = Path(src).rglob('*.*')
paths_list = list(paths)
print('Analyzing images...')
for path in tqdm.tqdm(paths_list):
key = path.name
exprected_id = groundtruth[key]
print(exprected_id)
try:
img_file = {'image': (
'query', open(os.path.join(path.parent, path.name), 'rb'))}
params = {'rescorer':'true'}
r = requests.post(IMG_REC_SERVICE + 'searchByImg', data=params, files=img_file)
res = r.json()
for i in range (0, len(res)):
print(res[i][0])
if res[i][0] in exprected_id:
precision_at[i] = precision_at[i] + 1
except Exception as e:
print("cannot process '%s'" % path)
print(e)
pass
print(precision_at)

View File

@ -9,8 +9,7 @@ import urllib
#from BeniCulturaliSearcher import BeniCulturaliSearcher #from BeniCulturaliSearcher import BeniCulturaliSearcher
from Searcher import Searcher from Searcher import Searcher
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine import ImageRecognitionSettings as settings
import beniculturaliSettings as settings
import uuid import uuid
import os, os.path import os, os.path
import tornado.wsgi import tornado.wsgi
@ -66,9 +65,9 @@ def get_res(results, query_url=None):
@app.route('/bcir/searchById') @app.route('/bcir/searchById')
def search_by_id(): def search_by_id():
id = request.args.get('id') id = request.args.get('id')
rescorer = False
if request.args.get("rescorer") == 'true':
rescorer = True rescorer = True
if request.args.get("rescorer") == 'false':
rescorer = False
results = searcher.search_by_id(id, settings.k, rescorer) results = searcher.search_by_id(id, settings.k, rescorer)
query_url = None query_url = None
if request.args.get("tohtml") is not None: if request.args.get("tohtml") is not None:
@ -84,9 +83,9 @@ def search_by_img():
file = request.files['image'] file = request.files['image']
img_file = post_to_file(file) img_file = post_to_file(file)
rescorer = False
if request.form.get("rescorer") == 'true':
rescorer = True rescorer = True
if request.form.get("rescorer") == 'false':
rescorer = False
#dest_file = uuid.uuid4().hex + ".jpg" #dest_file = uuid.uuid4().hex + ".jpg"
#dest_path = settings.logs + "/" + dest_file #dest_path = settings.logs + "/" + dest_file
#file.save(dest_path) #file.save(dest_path)
@ -103,9 +102,9 @@ def search_by_img():
@app.route('/bcir/searchByURL') @app.route('/bcir/searchByURL')
def search_by_url(): def search_by_url():
url = request.args.get('url') url = request.args.get('url')
rescorer = False
if request.args.get("rescorer") == 'true':
rescorer = True rescorer = True
if request.args.get("rescorer") == 'false':
rescorer = False
img_file = url_to_file(url) img_file = url_to_file(url)
# query = cv2.imdecode(image, cv2.IMREAD_COLOR) # query = cv2.imdecode(image, cv2.IMREAD_COLOR)
# dest_file = uuid.uuid4().hex + ".jpg" # dest_file = uuid.uuid4().hex + ".jpg"
@ -155,6 +154,17 @@ def download_file(filename):
return send_from_directory(settings.img_folder, filename, as_attachment=False) return send_from_directory(settings.img_folder, filename, as_attachment=False)
@app.route('/bcir/queries/<path:filename>')
def queries(filename):
print(filename)
values = filename.split('/')
folder = values[0]
name = values[1]
print(folder)
print(name)
return send_from_directory(settings.working_folder + '/' + folder, name, as_attachment=False)
""" """
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Reading configuration file') parser = argparse.ArgumentParser(description='Reading configuration file')

View File

@ -2,7 +2,7 @@ import json
import os import os
def load_setting(conf_file): def load_setting(conf_file):
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET1, DATASET2, DATASET_LF_FOLDER, DATASET_IDS, DATASET_IDS_LF global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET_LF_FOLDER, DATASET_IDS, DB_LF
with open(conf_file) as settings_file: with open(conf_file) as settings_file:
@ -20,11 +20,9 @@ def load_setting(conf_file):
os.mkdir(data_folder) os.mkdir(data_folder)
DATASET = os.path.join(data_folder, 'dataset.npy') DATASET = os.path.join(data_folder, 'dataset.npy')
#DATASET1 = os.path.join(data_folder, 'dataset_resized.npy')
#DATASET2 = os.path.join(data_folder, 'dataset_bw.npy')
DATASET_LF_FOLDER = os.path.join(data_folder, 'lf') DATASET_LF_FOLDER = os.path.join(data_folder, 'lf')
DATASET_IDS = os.path.join(data_folder, 'dataset.ids') DATASET_IDS = os.path.join(data_folder, 'dataset.ids')
#DATASET_IDS_LF = os.path.join(data_folder, 'dataset_lf.ids') DB_LF = os.path.join(data_folder, 'sqlite_lf/lf.db')
img_folder = settings['img_folder'] img_folder = settings['img_folder']
logs = os.path.join(working_folder, settings['log_folder']) logs = os.path.join(working_folder, settings['log_folder'])

View File

@ -0,0 +1,40 @@
from pathlib import Path
import tqdm
import LFUtilities
import BEBLIDExtractor as lf
import argparse
import os
from LFDB import LFDB
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='LF bulk extraction')
parser.add_argument('src', type=str, help='img src folder path')
parser.add_argument('dest', type=str, help='LF DB file')
args = parser.parse_args()
src = args.src
dest = args.dest
lf_db = LFDB(dest)
paths = Path(src).rglob('*.*')
paths_list = list(paths)
print('Extracting lf...')
for path in tqdm.tqdm(paths_list):
try:
kp, des = lf.extract(os.path.join(path.parent, path.name))
features = LFUtilities.pickle_keypoints(kp, des)
blob = LFUtilities.serialize_object(features)
filename = os.path.splitext(path.name)[0]
lf_db.put(filename, blob)
except Exception as e:
print("cannot process '%s'" % path)
print(e)
pass
lf_db.commit()
lf_db.close()
print('lf extracted.')

55
src/LFDB.py Normal file
View File

@ -0,0 +1,55 @@
import os
import sqlite3
from sqlite3 import Error
from werkzeug.datastructures import FileStorage
class LFDB:
def __init__(self, db_path):
# self.lf = LFUtilities.load(settings.DATASET_BEBLID)
# self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
# self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
self.conn = sqlite3.connect(db_path, check_same_thread=False)
def close(self):
if self.conn:
self.conn.close()
def put(self, docId, features):
try:
self.conn.text_factory = str
#print("[INFO] : Successful connection!")
cur = self.conn.cursor()
insert_file = '''INSERT INTO lf(docId, features) VALUES(?, ?)'''
cur = self.conn.cursor()
cur.execute(insert_file, (docId, features,))
#print("[INFO] : The blob for ", docId, " is in the database.")
except Error as e:
print(e)
def commit(self):
try:
if self.conn:
self.conn.commit()
print("committing...")
except Error as e:
print(e)
def get(self, docId):
try:
self.conn.text_factory = str
cur = self.conn.cursor()
# print("[INFO] : Connected to SQLite to read_blob_data")
sql_fetch_blob_query = """SELECT * from lf where docId = ?"""
cur.execute(sql_fetch_blob_query, (docId,))
record = cur.fetchall()
for row in record:
converted_file_name = row[1]
blob = row[2]
# parse out the file name from converted_file_name
cur.close()
except sqlite3.Error as error:
print("[INFO] : Failed to read blob data from sqlite table", error)
return blob

View File

@ -3,6 +3,7 @@ import numpy as np
import pickle as pickle import pickle as pickle
import os import os
def resize(max_side, img): def resize(max_side, img):
if img.shape[1] > img.shape[0]: if img.shape[1] > img.shape[0]:
r = max_side / img.shape[1] r = max_side / img.shape[1]
@ -27,6 +28,14 @@ def pickle_keypoints(keypoints, descriptors):
return temp_array return temp_array
def serialize_object(obj):
return pickle.dumps(obj)
def deserialize_object(serialized_obj):
return pickle.loads(serialized_obj)
def unpickle_keypoints(array): def unpickle_keypoints(array):
keypoints = [] keypoints = []
descriptors = [] descriptors = []
@ -74,3 +83,20 @@ def load_img_lf(lf_path, id):
data = pickle.load(open(dest_path, "rb")) data = pickle.load(open(dest_path, "rb"))
kp, desc = unpickle_keypoints(data) kp, desc = unpickle_keypoints(data)
return (kp, desc) return (kp, desc)
def load_img_lf_GPU(lf_path, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
data = pickle.load(open(dest_path, "rb"))
kp, desc = unpickle_keypoints(data)
data_gpu_mat = cv2.cuda_GpuMat(np.zeros((1500,), dtype=int))
if len(desc) > 0:
data_gpu_mat = cv2.cuda_GpuMat(desc)
desc = data_gpu_mat
return (kp, desc)

View File

@ -3,8 +3,9 @@ import numpy as np
import pickle as pickle import pickle as pickle
import LFUtilities import LFUtilities
import beniculturaliSettings as settings import ImageRecognitionSettings as settings
from BEBLIDRescorer import BEBLIDRescorer from BEBLIDRescorerDB import BEBLIDRescorerDB
#from BEBLIDRescorerGPU import BEBLIDRescorerGPU
from FAISSSearchEngine import FAISSSearchEngine from FAISSSearchEngine import FAISSSearchEngine
import FeatureExtractor as fe import FeatureExtractor as fe
import BEBLIDExtractor as lf import BEBLIDExtractor as lf
@ -18,7 +19,7 @@ class Searcher:
# np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset) # np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
self.search_engine = FAISSSearchEngine() self.search_engine = FAISSSearchEngine()
self.rescorer = BEBLIDRescorer() self.rescorer = BEBLIDRescorerDB()
def get_id(self, idx): def get_id(self, idx):
return self.search_engine.get_id(idx) return self.search_engine.get_id(idx)

View File

@ -9,7 +9,7 @@ import urllib
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
import beniculturaliSettings as settings import ImageRecognitionSettings as settings
import uuid import uuid
import requests import requests

14
src/extract_lf.sh Executable file
View File

@ -0,0 +1,14 @@
#!/bin/bash
IMG_FOLDER=/workspace/workdir
DATA_FOLDER=/workspace/workdir/data/lf
mkdir $DATA_FOLDER
#if [[ $2 = '-o' ]]; then
# echo "deleting existing features"
python3 /workspace/src/LFBulkExtraction4File.py $IMG_FOLDER/$1 $DATA_FOLDER
chmod 777 $DATA_FOLDER/*
echo "Done"

12
src/extract_lf_db.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/bash
IMG_FOLDER=/workspace/workdir
DB_PATH=/workspace/workdir/data/sqlite_lf/lf.db
#if [[ $2 = '-o' ]]; then
# echo "deleting existing features"
python3 /workspace/src/LFBulkExtractionToDB.py $IMG_FOLDER/$1 $DB_PATH
chmod 777 $DB_PATH/*
echo "Done"

View File

@ -31,7 +31,7 @@
<td valign="top"> <td valign="top">
<input type="hidden" value="" name="" id="objId"> <input type="hidden" value="" name="" id="objId">
<input type="hidden" value="true" name="tohtml"> <input type="hidden" value="true" name="tohtml">
<input type="text" value="true" name="rescorer"> <input type="hidden" value="true" name="rescorer">
<input style="display: none;" id="urlToUpload" name="url" type="text" size="49" onclick="" onchange="document.getElementById('queryImage').value=''"> <input style="display: none;" id="urlToUpload" name="url" type="text" size="49" onclick="" onchange="document.getElementById('queryImage').value=''">
<input id="imageToUpload" name="image" type="file" size="38" onclick="" onchange="document.getElementById('queryImage').value=''"> <input id="imageToUpload" name="image" type="file" size="38" onclick="" onchange="document.getElementById('queryImage').value=''">

View File

@ -36,6 +36,8 @@
<td valign="top"> <td valign="top">
<input type="hidden" value="" name="" id="objId"> <input type="hidden" value="" name="" id="objId">
<input type="hidden" value="true" name="tohtml"> <input type="hidden" value="true" name="tohtml">
<input type="hidden" value="true" name="rescorer">
<input style="display: none;" id="urlToUpload" name="url" type="text" size="49" onclick="" onchange="document.getElementById('queryImage').value=''"> <input style="display: none;" id="urlToUpload" name="url" type="text" size="49" onclick="" onchange="document.getElementById('queryImage').value=''">
<input id="imageToUpload" name="image" type="file" size="38" onclick="" onchange="document.getElementById('queryImage').value=''"> <input id="imageToUpload" name="image" type="file" size="38" onclick="" onchange="document.getElementById('queryImage').value=''">