remove seed from ollama call

2026-02-13 17:32:21 +01:00 · 2026-02-13 17:32:21 +01:00 · aea0d9fef8
parent 1706ed3a06
commit aea0d9fef8
4 changed files with 1880 additions and 1620 deletions
--- a/dependences/mllm_management.py
+++ b/dependences/mllm_management.py
@ -66,7 +66,7 @@ class MLLMManager:
                    },
                ],
                "options": {
-                    "seed": 123,
+                    #"seed": 123,
                    "temperature": 0.7,
                    "num_ctx": 8192,  # max input token
                    "num_predict": 800,  # max output tokens
--- a/scripts/esercitazione_12_2025/analisi_esercitazione_12_2025_models_comparisons.ipynb
+++ b/scripts/esercitazione_12_2025/analisi_esercitazione_12_2025_models_comparisons.ipynb
@ -720,7 +720,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
   "id": "770ab52b",
   "metadata": {},
   "outputs": [],
@ -815,7 +815,7 @@
    "                },\n",
    "            ],\n",
    "            \"options\": {\n",
-    "                \"seed\": 123,\n",
+    "                #\"seed\": 123,\n",
    "                \"temperature\": 0.7,\n",
    "                \"num_ctx\": 8192,  # max input token\n",
    "                \"num_predict\": 800,  # max output tokens\n",
@ -7157,7 +7157,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
   "id": "8138d916",
   "metadata": {},
   "outputs": [],
@ -7232,7 +7232,7 @@
    "                },\n",
    "            ],\n",
    "            \"options\": {\n",
-    "                \"seed\": 123,\n",
+    "                #\"seed\": 123,\n",
    "                \"temperature\": 0.7,\n",
    "                \"num_ctx\": 8192,  # max input token\n",
    "                \"num_predict\": 800,  # max output tokens\n",
@ -8640,7 +8640,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 178,
+   "execution_count": null,
   "id": "b7dccb9a",
   "metadata": {},
   "outputs": [],
@ -8716,7 +8716,7 @@
    "                },\n",
    "            ],\n",
    "            \"options\": {\n",
-    "                \"seed\": 123,\n",
+    "                #\"seed\": 123,\n",
    "                \"temperature\": 0.7,\n",
    "                \"num_ctx\": 8192,  # max input token\n",
    "                \"num_predict\": 800,  # max output tokens\n",
@ -9928,7 +9928,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
   "id": "6bfecd18",
   "metadata": {},
   "outputs": [],
@ -9975,7 +9975,7 @@
    "                },\n",
    "            ],\n",
    "            \"options\": {\n",
-    "                \"seed\": 123,\n",
+    "                #\"seed\": 123,\n",
    "                \"temperature\": 0.7,\n",
    "                \"num_ctx\": 8192,  # max input token\n",
    "                \"num_predict\": 800,  # max output tokens\n",
@ -11679,7 +11679,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
   "id": "c3b8fb43",
   "metadata": {},
   "outputs": [],
@ -11697,7 +11697,7 @@
    "             \"content\": \"my name is nicola\"}\n",
    "        ],\n",
    "        \"options\": {\n",
-    "                \"seed\": 123,\n",
+    "                #\"seed\": 123,\n",
    "                \"temperature\": 0.7,\n",
    "                \"num_ctx\": 8192,  # max input token\n",
    "                \"num_predict\": 800,  # max output tokens\n",
--- a/scripts/finetuning_inference_time_scaling/analisi_esercitazione_12_2025_models_comparisons.ipynb
+++ b/scripts/finetuning_inference_time_scaling/analisi_esercitazione_12_2025_models_comparisons.ipynb
--- a/scripts/finetuning_inference_time_scaling/analisi_esercitazione_12_2025_models_comparisons.py
+++ b/scripts/finetuning_inference_time_scaling/analisi_esercitazione_12_2025_models_comparisons.py
@ -0,0 +1,403 @@
+import pandas as pd
+import json
+
+
+import json
+import time
+import urllib.request
+import urllib.parse
+import logging
+import os
+import requests
+import base64
+import re
+from PIL import Image
+import io
+
+
+
+def call_API_urlibrequest(
+    data={},
+    verbose=False,
+    url="",
+    headers=[],
+    method="post",
+    base=2,  # number of seconds to wait
+    max_tries=3,
+):
+
+    if verbose:
+        logging.info("input_data:%s", data)
+
+    # Allow multiple attempts to call the API incase of downtime.
+    # Return provided response to user after 3 failed attempts.
+    wait_seconds = [base**i for i in range(max_tries)]
+
+    for num_tries in range(max_tries):
+        try:
+
+            if method == "get":
+
+                # Encode the parameters and append them to the URL
+                query_string = urllib.parse.urlencode(data)
+
+                url_with_params = f"{url}?{query_string}"
+                request = urllib.request.Request(url_with_params, method="GET")
+                for ele in headers:
+
+                    request.add_header(ele[0], ele[1])
+
+            elif method == "post":
+                # Convert the dictionary to a JSON formatted string and encode it to bytes
+                data_to_send = json.dumps(data).encode("utf-8")
+
+                request = urllib.request.Request(url, data=data_to_send, method="POST")
+                for ele in headers:
+
+                    request.add_header(ele[0], ele[1])
+            else:
+                return {"error_message": "method_not_allowed"}
+
+            # Send the request and capture the response
+
+            with urllib.request.urlopen(request, timeout=300) as response:
+                # Read and decode the response
+
+                response_json = json.loads(response.read().decode("utf-8"))
+                logging.info("response_json:%s", response_json)
+
+                logging.info("response.status_code:%s", response.getcode())
+                return response_json
+
+        except Exception as e:
+
+            logging.error("error message:%s", e)
+            response_json = {"error": e}
+
+            logging.info("num_tries:%s", num_tries)
+            logging.info(
+                "Waiting %s seconds before automatically trying again.",
+                str(wait_seconds[num_tries]),
+            )
+            time.sleep(wait_seconds[num_tries])
+
+    logging.info(
+        "Tried %s times to make API call to get a valid response object", max_tries
+    )
+    logging.info("Returning provided response")
+    return response_json
+
+
+def parse_mllm_alt_text_response(mllm_response):
+    """
+    Parse an MLLM response string and extract key attributes into a JSON object.
+
+    from mllm response like:
+    ```json\n{\n\"Original alt-text assessment\"... etc
+    to a structured dictionary.
+    
+    Args:
+        mllm_response (str): The raw MLLM response text containing JSON data
+        
+    Returns:
+        dict: A dictionary containing the extracted attributes, or None if parsing fails
+    """
+    try:
+        # Handle NaN or None values
+        if mllm_response is None or mllm_response == "":
+            return {
+                "original_alt_text_assessment": None,
+                "assessment": None,
+                "evaluation_result": None,
+                "new_alt_text": None
+            }
+        
+        # Extract JSON content between ```json and ``` markers
+        json_match = re.search(r'```json\s*(.*?)\s*```', mllm_response, re.DOTALL)
+        
+        if not json_match:
+            # Try to find JSON without markdown code blocks
+            json_match = re.search(r'\{.*\}', mllm_response, re.DOTALL)
+            
+        if not json_match:
+            return {
+                "original_alt_text_assessment": None,
+                "assessment": None,
+                "evaluation_result": None,
+                "new_alt_text": None
+            }
+            
+        json_str = json_match.group(1) if '```json' in mllm_response else json_match.group(0)
+        
+        # Parse the JSON string
+        parsed_data = json.loads(json_str)
+        
+        # Create a structured output with the key attributes
+        result = {
+            "original_alt_text_assessment": parsed_data.get("Original alt-text assessment", ""),
+            "assessment": parsed_data.get("Assessment", ""),
+            "evaluation_result": parsed_data.get("EvaluationResult", ""),
+            "new_alt_text": parsed_data.get("New alt-text", "")
+        }
+        
+        return result
+        
+    except json.JSONDecodeError as e:
+        print(f"JSON parsing error: {e}")
+        return {
+            "original_alt_text_assessment": None,
+            "assessment": None,
+            "evaluation_result": None,
+            "new_alt_text": None
+        }
+    except Exception as e:
+        print(f"Error parsing MLLM response: {e}")
+        return {
+            "original_alt_text_assessment": None,
+            "assessment": None,
+            "evaluation_result": None,
+            "new_alt_text": None
+        }
+
+
+
+
+def encode_image_from_url(image_url):
+    response = requests.get(image_url)
+    
+    # Open image and convert to RGB
+    image = Image.open(io.BytesIO(response.content))
+    
+    # Convert to RGB (handles RGBA, grayscale, etc.)
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    
+    # Save to bytes buffer
+    buffer = io.BytesIO()
+    image.save(buffer, format='PNG')  # or 'JPEG'
+    buffer.seek(0)
+    
+    # Encode to base64
+    return base64.b64encode(buffer.getvalue()).decode("utf-8")
+
+
+
+
+
+
+df_esercitazione = pd.read_csv("esercitazione_12_2025/dataset_esercitazione.csv",sep=";")
+
+
+openai_model=False
+openai_model_reasoning=False
+if openai_model:
+        mllm_end_point = "https://hiis-accessibility-fonderia.cognitiveservices.azure.com/openai/deployments/gpt-4.1/chat/completions?api-version=2025-01-01-preview"#"https://hiis-accessibility-fonderia.cognitiveservices.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2025-01-01-preview"#"https://hiis-accessibility-fonderia.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2025-01-01-preview"
+        mllm_api_key = "4lwGUwrx7jsqdxESGBpN9wYYyLNsxzC2s8ZLQlZPCQUayDWuDo3NJQQJ99BKACfhMk5XJ3w3AAAAACOGs2uw"#"4lwGUwrx7jsqdxESGBpN9wYYyLNsxzC2s8ZLQlZPCQUayDWuDo3NJQQJ99BKACfhMk5XJ3w3AAAAACOGs2uw"#"4lwGUwrx7jsqdxESGBpN9wYYyLNsxzC2s8ZLQlZPCQUayDWuDo3NJQQJ99BKACfhMk5XJ3w3AAAAACOGs2uw"
+        mllm_model_id = "gtp-4o"#"gpt-4.1"#"gpt-4o-mini"
+
+elif openai_model_reasoning:
+        mllm_end_point = "https://hiis-accessibility-fonderia.cognitiveservices.azure.com/openai/responses?api-version=2025-04-01-preview"
+        mllm_api_key = "4lwGUwrx7jsqdxESGBpN9wYYyLNsxzC2s8ZLQlZPCQUayDWuDo3NJQQJ99BKACfhMk5XJ3w3AAAAACOGs2uw"
+        mllm_model_id = "gpt-5-mini"#"o1"#"gpt-4.1"#"gpt-4o-mini"
+
+else:
+        mllm_end_point = "https://vgpu.hiis.cloud.isti.cnr.it/api/chat"
+        mllm_api_key = "7122746edd8e53398ce4be0b08a822ef7ab5a4deeab54b5c7aa5e2fcf3766131"
+        mllm_model_id = "gemma3:4b-wcag" #"gemma3:4b-q8_0-wcag"#"gemma3:12b"
+
+system_prompt = """You are a web accessibility evaluation tool. Your task is to evaluate if alterative text for 
+                images on webpages are appropriate according to WCAG guidelines. The alt-text should serve the same purpose and present 
+                the same information as the original image content.  As a result, it is possible to remove the image content and replace it with the text alternative and no functionality or information would be lost. This text alternative should not necessarily describe the image content. 
+                It should serve the same purpose and convey the same information. This may sometimes result in a text alternative that looks like a description of the image content. But this would only be true if that was the best way to serve the same purpose.
+                If possible, the short text alternative should completely convey the purpose and information. If it is not possible to do this in a short phrase or sentence, then the short text alternative should provide a brief overview of the information.
+                The text alternative should be able to substitute for the image content. If the image content were removed from the page and substituted with the text, the page would still provide the same function and information. The text alternative would be brief but as informative as possible.
+                In deciding what text to include in the alternative, it is often a good idea to consider the following questions:
+                Why is this image content here?
+                What information is it presenting?
+                What purpose does it fulfill?
+                If I could not use the image content, what words would I use to convey the same function and/or information?
+
+                When image content contains words that are important to understanding the content, the alt text should include those words.
+                Decorative images don’t add information to the content of a page. For example, the information provided by the image might already be given using adjacent text, or the image might be included to make the website more visually attractive.
+                In these cases, a null (empty) alt text should be provided (alt="") so that they can be ignored by assistive technologies, such as screen readers.
+
+                Follow these instructions carefully:
+                1. You will be provided as input with the following:
+                - The image found on the webpage.
+                - The associated alternative text. When the alt-text is empty or absent, you will be explicitly informed.
+                - The surrounding context of the image.
+                - The page title, headings and the content of the “keywords” and “description” <meta> tag, if found.
+                
+                2. Determine the function and purpose of the image by analyzing these elements. Take into account the purpose and function 
+                of the associated image by considering the page context. Check also if the image is, or is associated with, a link or a button, 
+                and consider this in your judgement. If the image contains text use that as part of the context. 
+                
+                3. Provide a final assessment judgment based on the following:
+                - 'success' if you can assess with 'sufficient certainty' the alt-text is appropriate in relation to the image purpose,
+                - 'failure' if you can assess with 'sufficient certainty' that the alt-text is NOT appropriate,
+                - 'warning' if you cannot determine with 'sufficient certainty'.
+                where the level of certainty goes from 1 to 100 and 'sufficient certainty' means > 80 
+                
+                4. The original alt-text assessment on a scale from 1 to 5, where 5 is the best score. Use an integer number only. 
+                
+                5. Provide a brief reasoning for your judgment. If the image contains text, write it verbatim.
+                
+                6. Keep your response within 150 words.
+
+                7. Generate the new most appropriate alt-text given the context and the steps before. Keep this within 30 words. Use the same natural language (e.g., English, Spanish, Italian) as the original alt-text.
+                 
+                8. Here is the JSON format the results must have:
+                {"Original alt-text assessment" : "*your original alt-text assessment*", "Assessment" : "*your assessment judgment*", "EvaluationResult": "*your response*", "New alt-text":"*new alt-text*"}"""
+
+
+def call_llm_alt_text_assessment(mllm_end_point,original_alt_text,image_url,html_context,page_title,page_description,page_keywords,openai_model,system_prompt):
+
+    try:
+        
+        if original_alt_text ==None or pd.isna(original_alt_text)  :
+            print("original_alt_text was nan:",original_alt_text)
+            original_alt_text=''#'No alt-text found'
+        alt_text = "Here is the alt-text of the image: " + original_alt_text
+        image_URL = image_url
+        HTML_context = (
+            "Here is the surrounding HTML context of the element: "
+            + html_context
+        )
+        page_text = "Here is the content of the page: Title of the page: " + str(
+            page_title
+        )
+        page_text = (
+            page_text
+            + ", content of the <meta name='description'> tag: "
+            + str(page_description)
+        )
+        page_text = (
+            page_text
+            + ", content of the <meta name='keywords'> tag: "
+            + str(page_keywords)
+        )
+    except Exception as e:
+        print("exception on html context management:",e)
+    if openai_model:# or openai_model_reasoning:
+        user_prompt = [
+            {"type": "text", "text": alt_text},
+            {"type": "image_url", "image_url": {"url": image_URL}},
+            {"type": "text", "text": HTML_context},
+            {"type": "text", "text": page_text},
+        ]
+    elif  openai_model_reasoning:
+        user_prompt = [
+            {"type": "input_text", "text": alt_text},
+            {"type": "input_image", "image_url": image_URL},
+            {"type": "input_text", "text": HTML_context},
+            {"type": "input_text", "text": page_text},
+        ]    
+    else:
+        user_prompt = {
+            "user_prompt": alt_text + " " + HTML_context + " " + page_text,
+            "image_base64": encode_image_from_url(image_URL),
+        }
+    print("user prompt:",user_prompt)    
+    if openai_model:
+        print("Creating OpenAI format payload")
+        payload = {
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            "temperature": 0.7,
+            "top_p": 0.95,
+            "frequency_penalty": 0,
+            "presence_penalty": 0,
+            "max_tokens": 800,
+            "stop": None,
+        }
+    elif   openai_model_reasoning:  
+        print("Creating OpenAI reasoning format payload")
+        payload = {
+            "input": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            "model":mllm_model_id,
+            "max_output_tokens": 800,
+            "reasoning": {
+                "effort": "low"}
+        }
+
+    else:  # ollama format
+        model_id=mllm_model_id
+        print("Creating alternative LLM format payload")
+        payload = {
+            "model": model_id,
+            "stream": False,
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {
+                    "role": "user",
+                    "content": user_prompt["user_prompt"],
+                    "images": [user_prompt["image_base64"]],
+                },
+            ],
+            "options": {
+                #"seed": 123,
+                "temperature": 0.7,
+                "num_ctx": 8192,  # max input token
+                "num_predict": 800,  # max output tokens
+                "top_p": 0.95,
+            },
+        }
+
+    headers = [
+        ["Content-Type", "application/json"],
+        ["Authorization", f"Bearer {mllm_api_key}"],
+    ]
+    response = call_API_urlibrequest(
+        url=mllm_end_point , headers=headers, data=payload
+    )
+    try:
+        if openai_model:
+            
+            model_response = response["choices"][0]["message"]["content"]
+        elif openai_model_reasoning: 
+            model_response = response["output"][1]["content"][0]["text"]
+        else:
+            model_response = response["message"]["content"]
+
+    except Exception as e:
+        print("Error getting model response:", e)
+        model_response = {}  
+    parsed_resp = parse_mllm_alt_text_response(model_response)
+    parsed_resp["model_id"]=mllm_model_id
+    return parsed_resp  
+
+def process_row_safe(row):
+    try:
+        result = call_llm_alt_text_assessment(
+            mllm_end_point=mllm_end_point,
+            original_alt_text=row["original_alt_text"],
+            image_url=row["image_url"],
+            html_context=row["html_context"],
+            page_title=row["page_title"],
+            page_description=row["page_description"],
+            page_keywords=row["page_keywords"],
+            openai_model=openai_model,
+            system_prompt=system_prompt
+        )
+        return pd.Series(result)
+    except Exception as e:
+        print(f"Error processing row {row.name}: {e}")
+        return pd.Series({
+            'original_alt_text_assessment': None,
+            'assessment': None,
+            'evaluation_result': None,
+            'new_alt_text': None,
+            'model_id':None
+        })
+
+# Apply and assign to new column names with prefix
+#df_esercitazione[['llm_assessment_1', 'llm_judgment_1', 'llm_evaluation_result_1', 'llm_alt_text_1','llm_model_1']] = df_esercitazione.head(2).apply(process_row_safe, axis=1)
+
+df_esercitazione[['llm_assessment_1', 'llm_judgment_1', 'llm_evaluation_result_1', 'llm_alt_text_1','llm_model_1']] = df_esercitazione.apply(process_row_safe, axis=1)
+
+df_esercitazione.to_csv("dataset_esercitazione_"+mllm_model_id.replace(":","_")+"_ollama.csv",sep=";",index=False)