added FAISS Searcher

This commit is contained in:
Paolo Bolettieri 2022-06-30 18:37:10 +02:00
parent ad4a95e000
commit 647a8778ba
14 changed files with 228 additions and 43 deletions

View File

@ -0,0 +1,33 @@
from pathlib import Path
import tqdm
import LFUtilities
import BEBLIDExtractor as lf
import argparse
import os
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='BEBLID bulk extraction')
parser.add_argument('src', type=str, help='src folder file containing a list of img paths')
parser.add_argument('dest', type=str, help='BEBLID dest file')
args = parser.parse_args()
src = args.src
dest = args.dest
with open(src, 'r') as src_file:
dataset = []
print('Extracting lf...')
for line in src_file:
try:
kp, des = lf.extract(line.strip())
dataset.append((kp, des))
except:
print("cannot process '%s'" % line)
pass
LFUtilities.save(dataset, dest)
print('lf extracted.')

View File

@ -0,0 +1,33 @@
from pathlib import Path
import tqdm
import LFUtilities
import BEBLIDExtractor as lf
import argparse
import os
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='BEBLID bulk extraction')
parser.add_argument('src', type=str, help='text file containing a list of img paths')
parser.add_argument('dest', type=str, help='BEBLID dest file')
args = parser.parse_args()
src = args.src
dest = args.dest
with open(src, 'r') as src_file:
dataset = []
print('Extracting lf...')
for line in src_file:
try:
kp, des = lf.extract(line.strip())
dataset.append((kp, des))
except:
print("cannot process '%s'" % line)
pass
LFUtilities.save(dataset, dest)
print('lf extracted.')

19
src/BEBLIDExtractor.py Normal file
View File

@ -0,0 +1,19 @@
import cv2
from pathlib import Path
import tqdm
import pickle
import os
import LFUtilities
import BEBLIDParameters as params
detector = cv2.ORB_create(params.KEYPOINTS)
descriptor = cv2.xfeatures2d.BEBLID_create(0.75)
def extract(img_path):
img = LFUtilities.resize(params.IMG_SIZE, cv2.imread(img_path))
kp = detector.detect(img, None)
kp, des = descriptor.compute(img, kp)
return (kp, des)

5
src/BEBLIDParameters.py Normal file
View File

@ -0,0 +1,5 @@
NN_MATCH_RATIO = 0.8
MIN_GOOD_MATCHES = 12
MIN_INLIERS = 10
KEYPOINTS = 500
IMG_SIZE = 500

70
src/BEBLIDRescorer.py Normal file
View File

@ -0,0 +1,70 @@
import cv2
import numpy as np
import LFUtilities
import BEBLIDParameters
import beniculturaliSettings as settings
class BEBLIDRescorer:
def __init__(self):
#self.lf = LFUtilities.load(settings.DATASET_BEBLID)
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
#self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
def rescore_by_id(self, query_id, resultset):
query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf(settings.DATASET_BEBLID, query_id)
return self.rescore_by_img(query, resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
counter = 0
for data_id, _ in resultset:
try:
data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id)
nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3)))
print(f'candidate n. {counter}')
except:
print('rescore error evaluating ' + data_id)
pass
counter += 1
if res:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, lf):
self.lf.append(lf)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
lf_save_file += '.bak'
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')

View File

@ -7,4 +7,4 @@ def extract(img_path):
files = {'image': ('img', open(img_path, 'rb'))}
data = {'resize': 'true', 'bw': 'true'}
r = requests.post(settings.feature_extractor, data=data, files=files)
return np.array(r.json())
return np.array(r.json(), dtype='f')

View File

@ -2,7 +2,7 @@ from pathlib import Path
import tqdm
import LFUtilities
import ORBExtractor as lf
import BEBLIDExtractor as lf
import argparse
import os

View File

@ -0,0 +1,32 @@
from pathlib import Path
import tqdm
import LFUtilities
import BEBLIDExtractor as lf
import argparse
import os
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='LF bulk extraction')
parser.add_argument('src', type=str, help='img src folder path')
parser.add_argument('dest', type=str, help='lf dest folder')
args = parser.parse_args()
src = args.src
dest = args.dest
paths = Path(src).rglob('*.*')
paths_list = list(paths)
print('Extracting lf...')
for path in tqdm.tqdm(paths_list):
try:
kp, des = lf.extract(os.path.join(path.parent, path.name))
filename = os.path.splitext(path.name)[0]
LFUtilities.save_img_lf(dest, filename, kp, des)
except:
print("cannot process '%s'" % path)
pass
print('lf extracted.')

View File

@ -3,14 +3,6 @@ import numpy as np
import pickle as pickle
import os
THRESHOLD = 35
MIN_GOOD_MATCHES = 12
MIN_INLIERS = 6
KEYPOINTS = 128
IMG_SIZE = 500
def resize(max_side, img):
if img.shape[1] > img.shape[0]:
r = max_side / img.shape[1]
@ -39,7 +31,7 @@ def unpickle_keypoints(array):
keypoints = []
descriptors = []
for point in array:
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1],_size=point[1], _angle=point[2], _response=point[3], _octave=point[4], _class_id=point[5])
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5])
temp_descriptor = point[6]
keypoints.append(temp_feature)
descriptors.append(temp_descriptor)
@ -47,7 +39,7 @@ def unpickle_keypoints(array):
def load(lf_path):
print('loading LF dataset')
print('loading LF dataset ' + lf_path)
ser_dataset = pickle.load(open(lf_path, "rb"))
lf_dataset = []
for item in ser_dataset:
@ -63,5 +55,22 @@ def save(lf_data, lf_path):
pickle.dump(data, open(lf_path, 'wb'))
def save_img_lf(dest, id, keypoints, descriptors):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(dest, dest_folder_name)
if (not os.path.exists(dest_folder_path)):
os.mkdir(dest_folder_path)
dest_path = os.path.join(dest_folder_path, filename)
data = pickle_keypoints(keypoints, descriptors)
pickle.dump(data, open(dest_path, 'wb'))
def load_img_lf(lf_path, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
data = pickle.load(open(dest_path, "rb"))
kp, desc = unpickle_keypoints(data)
return (kp, desc)

View File

@ -1,16 +0,0 @@
import cv2
from pathlib import Path
import tqdm
import pickle
import os
import LFUtilities as lf
orb = cv2.ORB.create(lf.KEYPOINTS)
def extract(img_path):
img = lf.resize(lf.IMG_SIZE, cv2.imread(img_path))
kp, des = orb.detectAndCompute(img, mask=None)
return (kp, des)

View File

@ -4,21 +4,21 @@ import pickle as pickle
import LFUtilities
import beniculturaliSettings as settings
from BeniCulturaliRescorer import BeniCulturaliRescorer
from BEBLIDRescorer import BEBLIDRescorer
from FAISSSearchEngine import FAISSSearchEngine
import FeatureExtractor as fe
import ORBExtractor as lf
import BEBLIDExtractor as lf
class Searcher:
K_REORDERING = 15
K_REORDERING = 1000
def __init__(self):
# self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
# np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
self.search_engine = FAISSSearchEngine()
#self.rescorer = BeniCulturaliRescorer()
self.rescorer = BEBLIDRescorer()
def get_id(self, idx):
return self.search_engine.get_id(idx)
@ -46,9 +46,9 @@ class Searcher:
if rescorer:
kq = self.K_REORDERING
res = self.search_engine.search_by_id(query_id, kq)
# if rescorer:
# res_lf = self.rescorer.rescore_by_id(query_id, res)
# res = res_lf if res_lf else res[:k]
if rescorer:
res_lf = self.rescorer.rescore_by_id(query_id, res)
res = res_lf if res_lf else res[:k]
return res
def search_by_img(self, query_img, k=10, rescorer=False):
@ -57,10 +57,10 @@ class Searcher:
kq = self.K_REORDERING
query_desc = fe.extract(query_img)
res = self.search_engine.search_by_img(query_desc, kq)
#if rescorer:
# query_lf = lf.extract(query_img)
# res_lf = self.rescorer.rescore_by_img(query_lf, res)
# res = res_lf if res_lf else res[:k]
if rescorer:
query_lf = lf.extract(query_img)
res_lf = self.rescorer.rescore_by_img(query_lf, res)
res = res_lf if res_lf else res[:k]
return res
def save(self, is_backup=False):

View File

@ -26,7 +26,7 @@ def api_root():
print('index_with_randoms.html')
random_ids = []
for i in range(0, 15):
random_ids.append(searcher.get_id(randint(0, 30)))
random_ids.append(searcher.get_id(randint(0, 600)))
return render_template('index_with_randoms.html', random_ids=random_ids)

View File

@ -2,7 +2,7 @@ import json
import os
def load_setting(conf_file):
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET1, DATASET2, DATASET_LF, DATASET_IDS, DATASET_IDS_LF
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET1, DATASET2, DATASET_LF_FOLDER, DATASET_IDS, DATASET_IDS_LF
with open(conf_file) as settings_file:
@ -22,7 +22,7 @@ def load_setting(conf_file):
DATASET = os.path.join(data_folder, 'dataset.npy')
#DATASET1 = os.path.join(data_folder, 'dataset_resized.npy')
#DATASET2 = os.path.join(data_folder, 'dataset_bw.npy')
DATASET_LF = os.path.join(data_folder, 'dataset_lf.dat')
DATASET_LF_FOLDER = os.path.join(data_folder, 'lf')
DATASET_IDS = os.path.join(data_folder, 'dataset.ids')
#DATASET_IDS_LF = os.path.join(data_folder, 'dataset_lf.ids')

View File

@ -31,7 +31,7 @@
<td valign="top">
<input type="hidden" value="" name="" id="objId">
<input type="hidden" value="true" name="tohtml">
<input type="hidden" value="false" name="rescorer">
<input type="text" value="true" name="rescorer">
<input style="display: none;" id="urlToUpload" name="url" type="text" size="49" onclick="" onchange="document.getElementById('queryImage').value=''">
<input id="imageToUpload" name="image" type="file" size="38" onclick="" onchange="document.getElementById('queryImage').value=''">