Carica file su 'src'

This commit is contained in:
Paolo Bolettieri 2022-06-22 16:15:47 +02:00
parent 7a465b0406
commit 0491140a94
13 changed files with 751 additions and 0 deletions

View File

@ -0,0 +1,66 @@
import cv2
import numpy as np
import LFUtilities
import beniculturaliSettings as settings
class BeniCulturaliRescorer:
def __init__(self):
self.lf = LFUtilities.load(settings.DATASET_LF)
self.ids = np.loadtxt(settings.DATASET_IDS_LF, dtype=str).tolist()
self.orb = cv2.ORB_create()
self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
def rescore_by_id(self, query_id, resultset):
query_idx = self.ids.index(query_id)
return self.rescore_by_img(self.lf[query_idx], resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
for data_id, _ in resultset:
data_idx = self.ids.index(data_id)
try:
data_el = self.lf[data_idx]
matches = self.bf.match(query[1], data_el[1])
good = [m for m in matches if m.distance <= LFUtilities.THRESHOLD]
if len(good) > LFUtilities.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= LFUtilities.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, inliers))
except:
print('rescore error evaluating ' + data_id)
pass
if res:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, lf):
self.lf.append(lf)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
lf_save_file += '.bak'
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')

View File

@ -0,0 +1,61 @@
import h5py
import numpy as np
import beniculturaliSettings as settings
class BeniCulturaliSearchEngine:
def __init__(self):
#self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
#np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
self.descs = np.load(settings.DATASET)
#self.desc1 = np.load(settings.DATASET1)
#self.desc2 = np.load(settings.DATASET2)
#self.descs = (self.desc1 + self.desc2) / 2
#self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True)
self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
def get_id(self, idx):
return self.ids[idx]
def add(self, desc, id):
self.ids.append(id)
self.descs = np.vstack((self.descs, desc))
self.save()
def remove(self, id):
idx = self.ids.index(id)
del self.ids[idx]
self.descs = np.delete(self.descs, idx, axis=0)
def search_by_id(self, query_id, k=10):
query_idx = self.ids.index(query_id)
return self.search_by_img(self.descs[query_idx], k)
def search_by_img(self, query, k=10):
print('----------query features-------')
print(query)
dot_product = np.dot(self.descs, query)
idx = dot_product.argsort()[::-1][:k]
res = []
for i in idx:
res.append((self.ids[i], round(float(dot_product[i]), 3)))
return res
def save(self, is_backup=False):
descs_file = settings.DATASET
ids_file = settings.DATASET_IDS
if is_backup:
descs_file += '.bak'
ids_file += '.bak'
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')

View File

@ -0,0 +1,68 @@
import cv2
import numpy as np
import pickle as pickle
import LFUtilities
import beniculturaliSettings as settings
from BeniCulturaliRescorer import BeniCulturaliRescorer
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
import FeatureExtractor as fe
import ORBExtractor as lf
class BeniCulturaliSearcher:
K_REORDERING = 15
def __init__(self):
# self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
# np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
self.search_engine = BeniCulturaliSearchEngine()
#self.rescorer = BeniCulturaliRescorer()
def get_id(self, idx):
return self.search_engine.get_id(idx)
def add(self, img_file, id):
self.save(True)
desc = fe.extract(img_file)
#orb = lf.extract(img_file)
self.search_engine.add(desc, id)
#self.rescorer.add(orb)
self.save()
print('added ' + id)
def remove(self, id):
self.save(True)
self.search_engine.remove(id)
#self.rescorer.remove(idx)
self.save()
print('removed ' + id)
def search_by_id(self, query_id, k=10, rescorer=False):
kq = k
if rescorer:
kq = self.K_REORDERING
res = self.search_engine.search_by_id(query_id, kq)
# if rescorer:
# res_lf = self.rescorer.rescore_by_id(query_id, res)
# res = res_lf if res_lf else res[:k]
return res
def search_by_img(self, query_img, k=10, rescorer=False):
kq = k
if rescorer:
kq = self.K_REORDERING
query_desc = fe.extract(query_img)
res = self.search_engine.search_by_img(query_desc, kq)
#if rescorer:
# query_lf = lf.extract(query_img)
# res_lf = self.rescorer.rescore_by_img(query_lf, res)
# res = res_lf if res_lf else res[:k]
return res
def save(self, is_backup=False):
self.search_engine.save(is_backup)
#self.rescorer.save(is_backup)

10
src/FeatureExtractor.py Normal file
View File

@ -0,0 +1,10 @@
import numpy as np
import beniculturaliSettings as settings
import requests
def extract(img_path):
files = {'image': ('img', open(img_path, 'rb'))}
data = {'resize': 'true', 'bw': 'true'}
r = requests.post(settings.feature_extractor, data=data, files=files)
return np.array(r.json())

44
src/ImageBW.py Normal file
View File

@ -0,0 +1,44 @@
import glob, os
from PIL import Image, ImageOps
import argparse
from pathlib import Path
import tqdm
max_size = 1050
def resize_img(src, dest):
if not os.path.isdir(dest):
os.mkdir(dest)
paths = Path(src).rglob('*.*')
paths_list = list(paths)
for path in tqdm.tqdm(paths_list):
#print(path.name)
id, _ = os.path.splitext(path.name)
outfile = os.path.join(dest, id + ".jpg")
try:
im = Image.open(path)
if im.mode in ("RGBA", "P"):
im = im.convert("RGB")
width, height = im.size
if width > max_size or height > max_size:
im.thumbnail((max_size, max_size), Image.ANTIALIAS)
im = ImageOps.grayscale(im)
im.save(outfile, "JPEG")
except IOError:
print("cannot create thumbnail for '%s'" % path)
pass
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Image resizing')
parser.add_argument('src', type=str, help='images source folder path')
parser.add_argument('dest', type=str, help='images dest folder path')
#args = parser.parse_args()
#resize_img(args.src, args.dest)
resize_img('/media/Data/data/test/gem/img/originals/export_Immagini_SitoPubblico', '/media/Data/data/test/gem/img/export_Immagini_SitoPubblico_resized_bw_autocontrast')
#resize_img('/media/Data/data/test/gem/img/originals/ImmaginiComparazioni', '/media/Data/data/test/gem/img/ImmaginiComparazioni_resized_bw_autocontrast')

53
src/ImagePreprocessing.py Normal file
View File

@ -0,0 +1,53 @@
import glob, os
from PIL import Image, ImageOps
import argparse
from pathlib import Path
import tqdm
max_size = 1050
def resize_img(src, dest):
if not os.path.isdir(dest):
os.mkdir(dest)
res_folder = os.path.join(dest, 'resized')
if not os.path.isdir(res_folder):
os.mkdir(res_folder)
bw_folder = os.path.join(dest, 'bw')
if not os.path.isdir(bw_folder):
os.mkdir(bw_folder)
paths = Path(src).rglob('*.*')
paths_list = list(paths)
for path in tqdm.tqdm(paths_list):
#print(path.name)
id, _ = os.path.splitext(path.name)
outfile_res = os.path.join(res_folder, id + ".jpg")
outfile_bw = os.path.join(bw_folder, id + ".jpg")
try:
im = Image.open(path)
if im.mode in ("RGBA", "P"):
im = im.convert("RGB")
width, height = im.size
if width > max_size or height > max_size:
im.thumbnail((max_size, max_size), Image.ANTIALIAS)
im.save(outfile_res, "JPEG")
im = ImageOps.grayscale(im)
im.save(outfile_bw, "JPEG")
except IOError:
print("cannot process '%s'" % path)
pass
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Image resizing')
parser.add_argument('src', type=str, help='images source folder path')
parser.add_argument('dest', type=str, help='images dest folder path')
args = parser.parse_args()
resize_img(args.src, args.dest)
#resize_img('/media/Data/data/test/gem/img/originals/export_Immagini_SitoPubblico', '/media/Data/data/test/gem/img/export_Immagini_SitoPubblico_resized_bw_auto')
#resize_img('/media/Data/data/test/gem/img/originals/ImmaginiComparazioni', '/media/Data/data/test/gem/img/ImmaginiComparazioni_resized_bw_autocontrast')

43
src/ImageResize.py Normal file
View File

@ -0,0 +1,43 @@
import glob, os
from PIL import Image
import argparse
from pathlib import Path
import tqdm
max_size = 1050
def resize_img(src, dest):
if not os.path.isdir(dest):
os.mkdir(dest)
paths = Path(src).rglob('*.*')
paths_list = list(paths)
for path in tqdm.tqdm(paths_list):
#print(path.name)
id, _ = os.path.splitext(path.name)
outfile = os.path.join(dest, id + ".jpg")
try:
im = Image.open(path)
if im.mode in ("RGBA", "P"):
im = im.convert("RGB")
width, height = im.size
if width > max_size or height > max_size:
im.thumbnail((max_size, max_size), Image.ANTIALIAS)
im.save(outfile, "JPEG")
except IOError:
print("cannot create thumbnail for '%s'" % path)
pass
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Image resizing')
parser.add_argument('src', type=str, help='images source folder path')
parser.add_argument('dest', type=str, help='images dest folder path')
#args = parser.parse_args()
#resize_img(args.src, args.dest)
resize_img('/media/Data/data/test/gem/img/originals/export_Immagini_SitoPubblico', '/media/Data/data/test/gem/img/export_Immagini_SitoPubblico_resized')
#resize_img('/media/Data/data/test/gem/img/originals/ImmaginiComparazioni', '/media/Data/data/test/gem/img/ImmaginiComparazioni_resized')

38
src/LFBulkExtraction.py Normal file
View File

@ -0,0 +1,38 @@
from pathlib import Path
import tqdm
import LFUtilities
import ORBExtractor as lf
import argparse
import os
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='LF bulk extraction')
parser.add_argument('src', type=str, help='img src folder path')
parser.add_argument('dest', type=str, help='lf dest folder')
args = parser.parse_args()
src = args.src
dest = args.dest
paths = Path(src).rglob('*.*')
paths_list = list(paths)
dataset = []
print('Extracting lf...')
for path in tqdm.tqdm(paths_list):
try:
kp, des = lf.extract(os.path.join(path.parent, path.name))
dataset.append((kp, des))
except:
print("cannot process '%s'" % path)
pass
LFUtilities.save(dataset, os.path.join(dest, 'dataset_lf.dat'))
with open(os.path.join(dest, 'dataset_lf.ids'), 'w') as f:
for path in paths_list:
id, _ = os.path.splitext(path.name)
f.write("%s\n" % id)
print('lf extracted.')

67
src/LFUtilities.py Normal file
View File

@ -0,0 +1,67 @@
import cv2
import numpy as np
import pickle as pickle
import os
THRESHOLD = 35
MIN_GOOD_MATCHES = 12
MIN_INLIERS = 6
KEYPOINTS = 128
IMG_SIZE = 500
def resize(max_side, img):
if img.shape[1] > img.shape[0]:
r = max_side / img.shape[1]
dim = (max_side, int(img.shape[0] * r))
else:
r = max_side / img.shape[0]
dim = (int(img.shape[1] * r), max_side)
# perform the actual resizing of the image and show it
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
return resized
def pickle_keypoints(keypoints, descriptors):
i = 0
temp_array = []
for point in keypoints:
temp = (point.pt, point.size, point.angle, point.response, point.octave,
point.class_id, descriptors[i])
i += 1
temp_array.append(temp)
return temp_array
def unpickle_keypoints(array):
keypoints = []
descriptors = []
for point in array:
temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1],_size=point[1], _angle=point[2], _response=point[3], _octave=point[4], _class_id=point[5])
temp_descriptor = point[6]
keypoints.append(temp_feature)
descriptors.append(temp_descriptor)
return keypoints, np.array(descriptors)
def load(lf_path):
print('loading LF dataset')
ser_dataset = pickle.load(open(lf_path, "rb"))
lf_dataset = []
for item in ser_dataset:
kp, desc = unpickle_keypoints(item)
lf_dataset.append((kp, desc))
return lf_dataset
def save(lf_data, lf_path):
data = []
for lf in lf_data:
data.append(pickle_keypoints(lf[0], lf[1]))
pickle.dump(data, open(lf_path, 'wb'))

16
src/ORBExtractor.py Normal file
View File

@ -0,0 +1,16 @@
import cv2
from pathlib import Path
import tqdm
import pickle
import os
import LFUtilities as lf
orb = cv2.ORB.create(lf.KEYPOINTS)
def extract(img_path):
img = lf.resize(lf.IMG_SIZE, cv2.imread(img_path))
kp, des = orb.detectAndCompute(img, mask=None)
return (kp, des)

47
src/TestClient.py Normal file
View File

@ -0,0 +1,47 @@
from flask import Flask, request, redirect, url_for, flash, render_template, send_from_directory
from random import randint
import cv2
import io
import numpy as np
import json
import urllib
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
import beniculturaliSettings as settings
import uuid
import requests
import os, os.path
BASE_URL = 'http://bilioso.isti.cnr.it:8190/bcir/'
payload = {'id': '54b019e5ed5082b0938b14c4-IMG357781'}
r = requests.get(BASE_URL + 'searchById', params=payload)
print(r.json())
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
r = requests.post(BASE_URL + 'searchByImg', files=files)
print(r.json())
payload = {'url': 'http://bilioso.isti.cnr.it:8190/bcir/54b019e5ed5082b0938b14c4-IMG357781.jpg'}
r = requests.get(BASE_URL + 'searchByURL', params=payload)
print(r.json())
files = {'image': ('prova', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
#files = {'image': ('prova', open('/media/Data/data/beni_culturali/deploy/dataset_ids.bak', 'rb'))}
r = requests.post(BASE_URL + 'addImg', files=files)
s = r.json()
print(r.json())
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
r = requests.post(BASE_URL + 'searchByImg', files=files)
print(r.json())
payload = {'id': 'prova'}
r = requests.get(BASE_URL + 'rmImg', params=payload)
print(r.json())
files = {'image': ('query', open('/media/Data/data/test/tpc_test/img/data_test/54b019e5ed5082b0938b14c4-IMG357781.jpg', 'rb'))}
r = requests.post(BASE_URL + 'searchByImg', files=files)
print(r.json())

203
src/beniculturali.py Normal file
View File

@ -0,0 +1,203 @@
from re import split
from flask import Flask, request, redirect, url_for, flash, render_template, send_from_directory, abort
from random import randint
import cv2
import io
import numpy as np
import json
import urllib
from BeniCulturaliSearcher import BeniCulturaliSearcher
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
import beniculturaliSettings as settings
import uuid
import requests
import os, os.path
from PIL import Image
import tornado.wsgi
import tornado.httpserver
import argparse
app = Flask(__name__)
@app.route('/bcir/')
def api_root():
print('index_with_randoms.html')
random_ids = []
for i in range(0, 15):
random_ids.append(searcher.get_id(randint(0, 3000)))
return render_template('index_with_randoms.html', random_ids=random_ids)
def url_to_file(url):
dest_file = uuid.uuid4().hex + ".png"
dest_path = settings.logs + "/" + dest_file
req = urllib.request.Request(
url,
data=None,
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
}
)
resp = urllib.request.urlopen(req)
image = np.asarray(bytearray(resp.read()), dtype="uint8")
decoded = cv2.imdecode(image, cv2.IMREAD_COLOR)
cv2.imwrite(dest_path, decoded)
#im = Image.fromarray(image)
#im.save(dest_path)
return dest_path
def post_to_file(image):
dest_file = uuid.uuid4().hex + ".png"
dest_path = settings.logs + "/" + dest_file
image.save(dest_path)
return dest_path
def get_res(results, query_url=None):
if query_url is not None:
return render_template('search.html', results=results, query_url=query_url)
json_res = json.dumps(results)
return json_res
@app.route('/bcir/searchById')
def search_by_id():
id = request.args.get('id')
rescorer = False
if request.args.get("rescorer") == 'true':
rescorer = True
results = searcher.search_by_id(id, settings.k, rescorer)
query_url = None
if request.args.get("tohtml") is not None:
query_url = id + ".jpg"
return get_res(results, query_url)
@app.route('/bcir/searchByImg', methods=['POST'])
def search_by_img():
if 'image' not in request.files:
flash('No file part')
return redirect(request.url)
file = request.files['image']
img_file = post_to_file(file)
rescorer = False
if request.form.get("rescorer") == 'true':
rescorer = True
#dest_file = uuid.uuid4().hex + ".jpg"
#dest_path = settings.logs + "/" + dest_file
#file.save(dest_path)
#files = {'image': (dest_file, open(dest_path, 'rb'))}
#r = requests.post(settings.rmac_service, files=files)
#results = search_engine.search_by_img(np.array(r.json()), settings.k)
results = searcher.search_by_img(img_file, settings.k, rescorer)
query_url = None
if request.form.get("tohtml") is not None:
query_url = ""
return get_res(results, query_url)
@app.route('/bcir/searchByURL')
def search_by_url():
url = request.args.get('url')
rescorer = False
if request.args.get("rescorer") == 'true':
rescorer = True
img_file = url_to_file(url)
# query = cv2.imdecode(image, cv2.IMREAD_COLOR)
# dest_file = uuid.uuid4().hex + ".jpg"
# dest_path = settings.logs + "/" + dest_file
# cv2.imwrite(dest_path, query)
# files = {'image': open(dest_path, 'rb')}
# r = requests.post(settings.rmac_service, files=files)
# results = search_engine.search_by_img(np.array(r.json()), settings.k)
results = searcher.search_by_img(img_file, settings.k, rescorer)
query_url = None
if request.args.get("tohtml") is not None:
query_url = url
return get_res(results, query_url)
@app.route('/bcir/addImg', methods=['POST'])
def add_img():
if 'image' not in request.files:
flash('No file part')
return redirect(request.url)
try:
file = request.files['image']
id = request.files['image'].filename
id, _ = os.path.splitext(id)
img_file = post_to_file(file)
searcher.add(img_file, id)
json_res = json.dumps("done")
return json_res
except:
abort(500)
@app.route('/bcir/rmImg')
def remove_img():
try:
id = request.args.get('id')
searcher.remove(id)
json_res = json.dumps("done")
return json_res
except:
abort(500)
@app.route('/bcir/<path:filename>')
def download_file(filename):
print(filename)
values = filename.split('/')
print(values)
return send_from_directory(settings.img_folder, filename, as_attachment=False)
"""
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Reading configuration file')
parser.add_argument('conf', type=str, help='Configuration file path')
args = parser.parse_args()
settings.load_setting(args.conf)
global searcher
searcher = BeniCulturaliSearcher()
#app.run(host='0.0.0.0', port=8090, ssl_context='adhoc')
app.run(host='0.0.0.0', port=settings.port)
# app.run(host='0.0.0.0', port=settings.port)
"""
def start_tornado(app, port=8190):
http_server = tornado.httpserver.HTTPServer(tornado.wsgi.WSGIContainer(app))
http_server.listen(port)
app.logger.info("Tornado server starting on port {}".format(port))
tornado.ioloop.IOLoop.instance().start()
def start_from_terminal(app):
parser = argparse.ArgumentParser(description='Reading configuration file')
parser.add_argument('conf', type=str, help='Configuration file path')
args = parser.parse_args()
settings.load_setting(args.conf)
global searcher
searcher = BeniCulturaliSearcher()
#if args.debug:
# app.run(debug=True, host='0.0.0.0', port=settings.port)
# else:
#start_tornado(app, settings.port)
app.run(debug=False, host='0.0.0.0', port=settings.port)
if __name__ == '__main__':
start_from_terminal(app)

View File

@ -0,0 +1,35 @@
import json
import os
def load_setting(conf_file):
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET1, DATASET2, DATASET_LF, DATASET_IDS, DATASET_IDS_LF
with open(conf_file) as settings_file:
settings = json.load(settings_file)
port = settings['port']
feature_extractor = settings['fe_service']
k = settings['k']
working_folder = settings['working_folder']
data_folder = os.path.join(working_folder, settings['data_folder'])
if not os.path.isdir(data_folder):
os.mkdir(data_folder)
DATASET = os.path.join(data_folder, 'dataset.npy')
#DATASET1 = os.path.join(data_folder, 'dataset_resized.npy')
#DATASET2 = os.path.join(data_folder, 'dataset_bw.npy')
DATASET_LF = os.path.join(data_folder, 'dataset_lf.dat')
DATASET_IDS = os.path.join(data_folder, 'dataset.ids')
#DATASET_IDS_LF = os.path.join(data_folder, 'dataset_lf.ids')
img_folder = settings['img_folder']
logs = os.path.join(working_folder, settings['log_folder'])
if not os.path.isdir(logs):
os.mkdir(logs)