wcag_AI_validation/restserver/routers/routes_wcag_g88.py

221 lines
8.4 KiB
Python

from fastapi import APIRouter, Request
from fastapi.responses import JSONResponse
import logging
from pydantic import BaseModel
import json
import aiofiles
import asyncio
from dependences.utils import (
disclaim_bool_string,
prepare_output_folder,
prepare_folder_path,
create_folder,
db_persistence_insert,
)
from dependences.title_content_extractor import PageTitleExtractor
from dependences.mllm_management import MLLMManager, parse_mllm_standard_response
invalid_json_input_msg = "Invalid JSON format"
unexpected_error_msg = "Unexpected Error: could not end the process"
class WCAG_g88Valuation(BaseModel):
page_url: str = "https://www.bbc.com"
save_elaboration: str = "True"
class WCAG_g88ValuationRoutes:
def __init__(self, connection_db, mllm_settings):
self.connection_db = connection_db
self.mllm_settings = mllm_settings
self.router = APIRouter()
self.router.add_api_route(
"/wcag_g88_validation",
self.wcag_g88_validation,
methods=["POST"],
tags=["Wcag G88 Validation"],
description="WCAG validator G88 validation: Providing descriptive titles for web pages",
name="wcag G88 validation",
dependencies=[],
)
logging.info("wcag g88 routes correctly initialized.")
async def wcag_g88_validation(
self, request: Request, data: WCAG_g88Valuation
) -> JSONResponse:
try:
print("Received wcag G88 validation request.")
json_content = json.loads(data.model_dump_json())
if self.mllm_settings["openai_model"] == "Both":
mllm_model_id_for_logging = (
self.mllm_settings["mllm_model_id"]["model_id_remote"]
+ "&"
+ self.mllm_settings["mllm_model_id"]["model_id_local"]
)
else:
mllm_model_id_for_logging = self.mllm_settings["mllm_model_id"]
# prepare output folders if needed---
if (
disclaim_bool_string(json_content["save_elaboration"]) == True
): # if something to save
url_path, folder_str = prepare_folder_path(
json_content, mllm_model_id_for_logging, tecnhnique_name="g88"
)
output_dir = prepare_output_folder(url_path, folder_str)
# ---------------------
# Create title and content extractor
title_content_extractor = PageTitleExtractor(
json_content["page_url"], threshold=800
)
# Extract title and content
logging.info(f"Extracting title-content from: {json_content['page_url']}")
title_content = await title_content_extractor.extract_page_title()
print("Extracted title_content.", title_content)
if self.mllm_settings["openai_model"] == "Both":
from concurrent.futures import ThreadPoolExecutor
def run_model_evaluation(
endpoint, api_key, model_id, openai_model, label
):
manager = MLLMManager(endpoint, api_key, model_id)
print(
f"Using {label} model for title evaluation.", manager.end_point
)
logging.info("mllm_end_point:%s", endpoint)
logging.info("mllm_model_id:%s", model_id)
responses = manager.make_g88_evaluation(
title_content, openai_model=openai_model
)
parsed_mllm_responses = parse_mllm_standard_response(
responses["mllm_response"], model_id=model_id
)
return parsed_mllm_responses
with ThreadPoolExecutor(max_workers=2) as executor:
future_openai = executor.submit(
run_model_evaluation,
self.mllm_settings["mllm_end_point"]["model_end_point_remote"],
self.mllm_settings["mllm_api_key"]["api_key_remote"],
self.mllm_settings["mllm_model_id"]["model_id_remote"],
True,
"first remote",
)
future_local = executor.submit(
run_model_evaluation,
self.mllm_settings["mllm_end_point"]["model_end_point_local"],
self.mllm_settings["mllm_api_key"]["api_key_local"],
self.mllm_settings["mllm_model_id"]["model_id_local"],
False,
"second local",
)
mllm_responses_openai = future_openai.result()
mllm_responses_local = future_local.result()
mllm_responses_object = {
"mllm_g88_assessments": {
"mllm_g88_assessments_openai": mllm_responses_openai,
"mllm_g88_assessments_local": mllm_responses_local,
}
}
else:
# MLLM settings
mllm_end_point = self.mllm_settings["mllm_end_point"]
mllm_api_key = self.mllm_settings["mllm_api_key"]
mllm_model_id = self.mllm_settings["mllm_model_id"]
logging.info("mllm_end_point:%s", mllm_end_point)
logging.info("mllm_model_id:%s", mllm_model_id)
# Create MLLM manager
mllm_manager = MLLMManager(mllm_end_point, mllm_api_key, mllm_model_id)
print(
"Using single model for g88 evaluation.",
mllm_manager.end_point,
)
# Make g88 evaluation
mllm_responses = mllm_manager.make_g88_evaluation(
title_content=title_content,
openai_model=self.mllm_settings["openai_model"],
)
parsed_mllm_responses = parse_mllm_standard_response(
mllm_responses["mllm_response"], model_id=mllm_model_id
)
mllm_responses_object = {"mllm_g88_assessments": parsed_mllm_responses}
# common: prepare the object to return in the response
returned_object = {
"title_content": title_content,
"mllm_validations": mllm_responses_object,
}
try:
# Persist to local db
# Convert JSON data to string
json_in_str = json.dumps(title_content, ensure_ascii=False)
json_out_str = json.dumps(mllm_responses_object, ensure_ascii=False)
db_persistence_insert(
connection_db=self.connection_db,
insert_type="wcag_g88_validation",
page_url=json_content["page_url"],
llm_model=mllm_model_id_for_logging,
json_in_str=json_in_str,
json_out_str=json_out_str,
table="wcag_validator_results",
)
except Exception as e:
logging.error("error persisting to local db: %s", e)
if (
disclaim_bool_string(json_content["save_elaboration"]) == True
): # Optionally save to JSON
# save mllm input and responses
"""with open(
output_dir + "/mllm_assessments.json", "w", encoding="utf-8"
) as f:
json.dump(returned_object, f, indent=2, ensure_ascii=False)"""
# async version
async with aiofiles.open(
output_dir + "/mllm_assessments.json", "w", encoding="utf-8"
) as f:
await f.write(
json.dumps(returned_object, indent=2, ensure_ascii=False)
)
return JSONResponse(content=returned_object, status_code=200)
except json.JSONDecodeError:
logging.error(invalid_json_input_msg)
return JSONResponse(
content={"error": invalid_json_input_msg}, status_code=400
)
except Exception as e:
logging.error(unexpected_error_msg + " %s", e)
return JSONResponse(
content={"error": unexpected_error_msg}, status_code=500
)