44author_url: https://github.com/owndev/
55project_url: https://github.com/owndev/Open-WebUI-Functions
66funding_url: https://github.com/sponsors/owndev
7- version: 2.5.1
7+ version: 2.5.2
88license: Apache License 2.0
99description: A pipeline for interacting with Azure AI services, enabling seamless communication with various AI models via configurable headers and robust error handling. This includes support for Azure OpenAI models as well as other Azure AI models by dynamically managing headers and request configurations. Azure AI Search (RAG) integration is only supported with Azure OpenAI endpoints.
1010features:
1818 - Azure AI Search / RAG integration with enhanced citation display (Azure OpenAI only)
1919"""
2020
21- from typing import List , Union , Generator , Iterator , Optional , Dict , Any , AsyncIterator
21+ from typing import (
22+ List ,
23+ Union ,
24+ Generator ,
25+ Iterator ,
26+ Optional ,
27+ Dict ,
28+ Any ,
29+ AsyncIterator ,
30+ Set ,
31+ )
2232from urllib .parse import urlparse
2333from fastapi .responses import StreamingResponse
2434from pydantic import BaseModel , Field , GetCoreSchemaHandler
3040import logging
3141import base64
3242import hashlib
43+ import re
3344from pydantic_core import core_schema
3445
3546
@@ -375,7 +386,7 @@ def enhance_azure_search_response(self, response: Dict[str, Any]) -> Dict[str, A
375386 # Add citation section at the end
376387 if citation_details :
377388 citation_section = self ._format_citation_section (
378- citations , for_streaming = False
389+ citations , content , for_streaming = False
379390 )
380391 enhanced_content += citation_section
381392
@@ -574,6 +585,7 @@ async def stream_processor_with_citations(
574585
575586 try :
576587 full_response_buffer = ""
588+ response_content = "" # Track the actual response content
577589 citations_data = None
578590 citations_added = False
579591 all_chunks = []
@@ -586,6 +598,33 @@ async def stream_processor_with_citations(
586598 # Log chunk for debugging (only first 200 chars to avoid spam)
587599 log .debug (f"Processing chunk: { chunk_str [:200 ]} ..." )
588600
601+ # Extract content from delta messages to build the full response content
602+ try :
603+ lines = chunk_str .split ("\n " )
604+ for line in lines :
605+ if line .startswith ("data: " ) and line .strip () != "data: [DONE]" :
606+ json_str = line [6 :].strip ()
607+ if json_str and json_str != "[DONE]" :
608+ try :
609+ response_data = json .loads (json_str )
610+ if (
611+ isinstance (response_data , dict )
612+ and "choices" in response_data
613+ ):
614+ for choice in response_data ["choices" ]:
615+ if (
616+ "delta" in choice
617+ and "content" in choice ["delta" ]
618+ ):
619+ response_content += choice ["delta" ][
620+ "content"
621+ ]
622+ except json .JSONDecodeError :
623+ # Malformed or incomplete JSON is expected in streamed chunks; safely skip.
624+ pass
625+ except Exception as e :
626+ log .debug (f"Exception while processing chunk: { e } " )
627+
589628 # Look for citations in any part of the response
590629 if "citations" in chunk_str .lower () and not citations_data :
591630 log .debug ("Found 'citations' in chunk, attempting to parse..." )
@@ -674,8 +713,9 @@ async def stream_processor_with_citations(
674713 if citations_data and not citations_added :
675714 log .info ("Adding citation summary at end of stream..." )
676715
716+ # Pass the accumulated response content to filter citations
677717 citation_section = self ._format_citation_section (
678- citations_data , for_streaming = True
718+ citations_data , response_content , for_streaming = True
679719 )
680720 if citation_section :
681721 # Convert escaped newlines to actual newlines for display
@@ -740,14 +780,36 @@ async def stream_processor_with_citations(
740780 # Suppress close-time errors (e.g., SSL shutdown timeouts)
741781 pass
742782
783+ def _extract_referenced_citations (self , content : str ) -> Set [int ]:
784+ """
785+ Extract citation references (e.g., [doc1], [doc2]) from the content.
786+
787+ Args:
788+ content: The response content containing citation references
789+
790+ Returns:
791+ Set of citation indices that are referenced (e.g., {1, 2, 7, 8, 9})
792+ """
793+ # Find all [docN] references in the content
794+ pattern = r"\[doc(\d+)\]"
795+ matches = re .findall (pattern , content )
796+
797+ # Convert to integers and return as a set
798+ return {int (match ) for match in matches }
799+
743800 def _format_citation_section (
744- self , citations : List [Dict [str , Any ]], for_streaming : bool = False
801+ self ,
802+ citations : List [Dict [str , Any ]],
803+ content : str = "" ,
804+ for_streaming : bool = False ,
745805 ) -> str :
746806 """
747807 Creates a formatted citation section using collapsible details elements.
808+ Only includes citations that are actually referenced in the content.
748809
749810 Args:
750811 citations: List of citation objects
812+ content: The response content (used to filter only referenced citations)
751813 for_streaming: If True, format for streaming (with escaping), else for regular response
752814
753815 Returns:
@@ -756,44 +818,74 @@ def _format_citation_section(
756818 if not citations :
757819 return ""
758820
759- # Collect all citation details
821+ # Extract which citations are actually referenced in the content
822+ referenced_indices = self ._extract_referenced_citations (content )
823+
824+ # If we couldn't find any references, include all citations (backward compatibility)
825+ if not referenced_indices :
826+ referenced_indices = set (range (1 , len (citations ) + 1 ))
827+
828+ # Collect only referenced citation details
760829 citation_entries = []
761830
762831 for i , citation in enumerate (citations , 1 ):
832+ # Skip citations that are not referenced in the content
833+ if i not in referenced_indices :
834+ continue
835+
763836 if not isinstance (citation , dict ):
764837 continue
765838
766839 doc_ref = f"[doc{ i } ]"
767- title = citation .get ("title" , "Unknown Document" )
768- content = citation .get ("content" , "" )
769- filepath = citation .get ("filepath" )
770- url = citation .get ("url" )
771- chunk_id = citation .get ("chunk_id" , "0" )
840+
841+ # Get title with fallback to filepath or url
842+ title = citation .get ("title" , "" )
843+ # Check if title is empty (not just None) and use alternatives
844+ if not title or not title .strip ():
845+ # Try filepath first
846+ filepath = citation .get ("filepath" , "" )
847+ if filepath and filepath .strip ():
848+ title = filepath
849+ else :
850+ # Try url next
851+ url = citation .get ("url" , "" )
852+ if url and url .strip ():
853+ title = url
854+ else :
855+ # Final fallback
856+ title = "Unknown Document"
857+
858+ content_text = citation .get ("content" , "" )
859+ filepath = citation .get ("filepath" , "" )
860+ url = citation .get ("url" , "" )
861+ chunk_id = citation .get ("chunk_id" , "" )
772862
773863 # Build individual citation details
774864 citation_info = []
775865
776- if filepath :
866+ # Show filepath if available and not empty
867+ if filepath and filepath .strip ():
777868 citation_info .append (f"📁 **File:** `{ filepath } `" )
778- elif url :
869+ # Show URL if available, not empty, and no filepath was shown
870+ elif url and url .strip ():
779871 citation_info .append (f"🔗 **URL:** { url } " )
780872
781- citation_info .append (f"📄 **Chunk ID:** { chunk_id } " )
873+ # Show chunk_id if available and not empty
874+ if chunk_id is not None and str (chunk_id ).strip ():
875+ citation_info .append (f"📄 **Chunk ID:** { chunk_id } " )
782876
783877 # Add full content if available
784- if content :
878+ if content_text and str ( content_text ). strip () :
785879 try :
786880 # Clean content for display
787- clean_content = str (content ).strip ()
788- # Replace problematic characters for HTML display
789- clean_content = clean_content .replace ("\n " , " " ).replace ("\r " , " " )
881+ clean_content = str (content_text ).strip ()
790882 if for_streaming :
791883 # Additional escaping for streaming
792884 clean_content = clean_content .replace ("\\ " , "\\ \\ " ).replace (
793885 '"' , '\\ "'
794886 )
795887
796- citation_info .append (f "**Content:**" )
888+ citation_info .append ("**Content:**" )
797889 citation_info .append (f"> { clean_content } " )
798890 except Exception :
799891 citation_info .append ("**Content:** [Content unavailable]" )
@@ -809,6 +901,10 @@ def _format_citation_section(
809901
810902 citation_entries .append (citation_entry )
811903
904+ # Only create the section if we have citations to show
905+ if not citation_entries :
906+ return ""
907+
812908 # Combine all citations into main collapsible section
813909 if for_streaming :
814910 all_citations = "\\ n\\ n" .join (citation_entries )
0 commit comments