fix errors and add option to use azure openai with key

dayesouza · dayesouza · commit a6436fbfa6cd · 2024-07-08T15:11:01.000-03:00
diff --git a/app/pages/Settings.py b/app/pages/Settings.py
@@ -8,6 +8,7 @@
 from util.openai_wrapper import (
     UIOpenAIConfiguration,
     key,
+    openai_azure_auth_type,
     openai_azure_model_key,
     openai_endpoint_key,
     openai_type_key,
@@ -33,7 +34,6 @@ def main():
     st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_icon="app/myapp.ico", page_title='Intelligence Toolkit | Settings')
     load_multipage_app()
     openai_config = UIOpenAIConfiguration().get_configuration()
-    print('openai_config', openai_config.api_type)
     st.header("Settings")
     secrets_handler = SecretsHandler()
     
@@ -51,6 +51,14 @@ def main():
         st.rerun()
 
     if type_input == "Azure OpenAI":
+        types_az = ["Managed Identity", "Azure Key"]
+        index_az = types_az.index(openai_config.az_auth_type) if openai_config.az_auth_type in types_az else 0
+        type_input_az = st.radio("Azure OpenAI Auth Type", types_az, index=index_az, disabled=is_mode_cloud)
+        if type_input_az != openai_config.az_auth_type:
+            print('type_input_az', type_input_az)
+            print('openai_config.az_auth_type', openai_config.az_auth_type)
+            on_change(secrets_handler, openai_azure_auth_type, type_input_az)()
+            st.rerun()
         col1, col2, col3 = st.columns(3)
         with col1:
             endpoint = st.text_input("Azure OpenAI Endpoint", disabled=is_mode_cloud, type="password", value=openai_config.api_base)
@@ -69,9 +77,9 @@ def main():
             if version != openai_config.api_version:
                 on_change(secrets_handler, openai_version_key, version)()
                 st.rerun()
-    else:
+    if type_input == "OpenAI" or type_input_az != "Managed Identity":
         placeholder = "Enter key here..."
-        secret_input = st.text_input('Enter your OpenAI key', type="password", disabled=is_mode_cloud, placeholder=placeholder, value=secret)
+        secret_input = st.text_input('Enter your key', type="password", disabled=is_mode_cloud, placeholder=placeholder, value=secret)
         
         if secret and len(secret) > 0:
             st.info("Your key is saved securely.")
diff --git a/app/util/openai_wrapper.py b/app/util/openai_wrapper.py
@@ -10,6 +10,7 @@
 openai_version_key = 'openai_version'
 openai_endpoint_key = 'openai_endpoint'
 openai_azure_model_key = 'openai_azure_model'
+openai_azure_auth_type = 'openai_azure_auth_type'
 
 class UIOpenAIConfiguration():
     def __init__(
@@ -23,13 +24,15 @@ def get_configuration(self):
         endpoint = self._secrets.get_secret(openai_endpoint_key) or None
         secret_key = self._secrets.get_secret(key) or None
         model = self._secrets.get_secret(openai_azure_model_key) or None
+        az_auth_type = self._secrets.get_secret(openai_azure_auth_type) or None
         
         config = {
             'api_type': type,
             'api_version': version,
             'api_base': endpoint,
             'api_key': secret_key,
-            'model': model
+            'model': model,
+            'az_auth_type': az_auth_type
         }
         values = {k: v for k, v in config.items() if v is not None}
         return OpenAIConfiguration(values)
diff --git a/app/workflows/question_answering/functions.py b/app/workflows/question_answering/functions.py
@@ -18,9 +18,15 @@
 from python.AI.text_splitter import TextSplitter
 
 sv_home = SessionVariables('home')
-ai_configuration = UIOpenAIConfiguration().get_configuration()
 
-embedder = Embedder(ai_configuration, config.cache_dir)
+
+def embedder():
+    try:
+        ai_configuration = UIOpenAIConfiguration().get_configuration()
+        return Embedder(ai_configuration, config.cache_dir)
+    except Exception as e:
+        st.error(f'Error creating connection: {e}')
+        st.stop()
 
 def chunk_files(sv, files):
     pb = st.progress(0, 'Chunking files...')
@@ -52,11 +58,11 @@ def chunk_files(sv, files):
                 for chunk in chunks:
                     file_chunks.append((file, chunk))
                 file.set_text(doc_text)
-
+    functions_embedder = embedder()
     for cx, (file, chunk) in enumerate(file_chunks):
         pb.progress((cx+1) / len(file_chunks), f'Embedding chunk {cx+1} of {len(file_chunks)}...')
         formatted_chunk = chunk.replace("\n", " ")
-        chunk_vec = embedder.embed_store_one(formatted_chunk, sv_home.save_cache.value)
+        chunk_vec = functions_embedder.embed_store_one(formatted_chunk, sv_home.save_cache.value)
         file.add_chunk(chunk, np.array(chunk_vec), cx+1)   
     pb.empty()
 
diff --git a/app/workflows/question_answering/workflow.py b/app/workflows/question_answering/workflow.py
@@ -10,7 +10,6 @@
 import scipy.spatial.distance
 import streamlit as st
 import workflows.question_answering.classes as classes
-import workflows.question_answering.config as config
 import workflows.question_answering.functions as functions
 import workflows.question_answering.prompts as prompts
 from util import ui_components
@@ -110,8 +109,10 @@ def create(sv: SessionVariables, workflow = None):
             iteration = 0
             source_counts = Counter()
             used_chunks = set()
+            functions_embedder = functions.embedder()
+
             while True:
-                qe = np.array(functions.embedder.embed_store_one(question, sv_home.save_cache.value))
+                qe = np.array(functions_embedder.embed_store_one(question, sv_home.save_cache.value))
                 iteration += 1
                 cosine_distances = sorted([(t, c, scipy.spatial.distance.cosine(qe, v)) for (t, c, v) in all_units], key=lambda x:x[2], reverse=False)
                 chunk_index = sv.answering_target_matches.value
@@ -185,15 +186,16 @@ def create(sv: SessionVariables, workflow = None):
                 qas_raw = ui_components.generate_text(messages, callbacks=[on_callback])
                 status_history += qas_raw + '<br/><br/>'
                 try:
+                    functions_embedder = functions.embedder()
                     qas = json.loads(qas_raw)
                     for qa in qas:
                         q = qa['question']
                         a = qa['answer']
                         raw_refs = qa['source']
                         file_page_refs = [tuple([int(x[1:]) for x in r.split(';')]) for r in raw_refs]
                         
-                        q_vec = np.array(functions.embedder.embed_store_one(q, sv_home.save_cache.value))
-                        a_vec = np.array(functions.embedder.embed_store_one(a, sv_home.save_cache.value))
+                        q_vec = np.array(functions_embedder.embed_store_one(q, sv_home.save_cache.value))
+                        a_vec = np.array(functions_embedder.embedder.embed_store_one(a, sv_home.save_cache.value))
 
                         qid = sv.answering_next_q_id.value
                         sv.answering_next_q_id.value += 1
@@ -209,7 +211,7 @@ def create(sv: SessionVariables, workflow = None):
                         if t == 'chunk' and c[0].id == f.id and c[1] == cx:
                             all_units.remove((t, c, v))
 
-                    status_history += f'Augmenting user question with partial answers:<br/>'
+                    status_history += 'Augmenting user question with partial answers:<br/>'
                     new_question = functions.update_question(sv, sv.answering_question_history.value, new_questions, lazy_answering_placeholder, status_history)
                     status_history += new_question + '<br/><br/>'
                     sv.answering_question_history.value.append(new_question)
diff --git a/app/workflows/record_matching/functions.py b/app/workflows/record_matching/functions.py
@@ -1,13 +1,20 @@
 # Copyright (c) 2024 Microsoft Corporation. All rights reserved.
 # Licensed under the MIT license. See LICENSE file in the project.
 #
+import streamlit as st
 from util.openai_wrapper import UIOpenAIConfiguration
 from workflows.record_matching import config
 
 from python.AI.embedder import Embedder
 
-ai_configuration = UIOpenAIConfiguration().get_configuration()
-embedder = Embedder(ai_configuration, config.cache_dir)
+
+def embedder():
+    try:
+        ai_configuration = UIOpenAIConfiguration().get_configuration()
+        return Embedder(ai_configuration, config.cache_dir)
+    except Exception as e:
+        st.error(f'Error creating connection: {e}')
+        st.stop()
 
 def convert_to_sentences(df, skip):
     sentences = []
diff --git a/app/workflows/record_matching/workflow.py b/app/workflows/record_matching/workflow.py
@@ -194,7 +194,9 @@ def on_embedding_batch_change(current, total):
 
                             callback = classes.BatchEmbeddingCallback()
                             callback.on_embedding_batch_change = on_embedding_batch_change
-                            embeddings = functions.embedder.embed_store_many(all_sentences,[callback], sv_home.save_cache.value)
+                            functions_embedder = functions.embedder()
+
+                            embeddings = functions_embedder.embed_store_many(all_sentences,[callback], sv_home.save_cache.value)
                             pb.empty()
                             
                             nbrs = NearestNeighbors(n_neighbors=50, n_jobs=1, algorithm='auto', leaf_size=20, metric='cosine').fit(embeddings)
diff --git a/app/workflows/risk_networks/functions.py b/app/workflows/risk_networks/functions.py
@@ -6,14 +6,21 @@
 
 import networkx as nx
 import pandas as pd
+import streamlit as st
 import workflows.risk_networks.config as config
 from streamlit_agraph import Config, Edge, Node
 from util.openai_wrapper import UIOpenAIConfiguration
 
 from python.AI.embedder import Embedder
 
-ai_configuration = UIOpenAIConfiguration().get_configuration()
-embedder = Embedder(ai_configuration, config.cache_dir)
+
+def embedder():
+    try:
+        ai_configuration = UIOpenAIConfiguration().get_configuration()
+        return Embedder(ai_configuration, config.cache_dir)
+    except Exception as e:
+        st.error(f'Error creating connection: {e}')
+        st.stop()
 
 def hsl_to_hex(h, s, l):
     rgb = colorsys.hls_to_rgb(h / 360, l / 100, s / 100)
diff --git a/app/workflows/risk_networks/workflow.py b/app/workflows/risk_networks/workflow.py
@@ -255,7 +255,8 @@ def on_embedding_batch_change(current, total):
 
                 callback = classes.BatchEmbeddingCallback()
                 callback.on_embedding_batch_change = on_embedding_batch_change
-                embeddings = functions.embedder.embed_store_many(texts,[callback], sv_home.save_cache.value)
+                functions_embedder = functions.embedder()
+                embeddings = functions_embedder.embed_store_many(texts,[callback], sv_home.save_cache.value)
                 pb.empty()
                 
                 vals = [(n, t, e) for (n, t), e in zip(text_types, embeddings)]
diff --git a/python/AI/client.py b/python/AI/client.py
@@ -4,8 +4,9 @@
 import logging
 from typing import List
 
-from openai import AzureOpenAI, OpenAI
 from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+from openai import AzureOpenAI, OpenAI
+
 from .classes import LLMCallback
 from .defaults import API_BASE_REQUIRED_FOR_AZURE, DEFAULT_EMBEDDING_MODEL
 from .openai_configuration import OpenAIConfiguration
@@ -31,16 +32,24 @@ def create_openai_client(self) -> None:
                 api_base,
             )
 
-            token_provider = get_bearer_token_provider(
-                DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
-            )
+            if self.configuration.az_auth_type == 'Managed Identity':
+                token_provider = get_bearer_token_provider(
+                    DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
+                )
 
-            self._client = AzureOpenAI(
-                api_version=self.configuration.api_version,
-                # Azure-Specifics
-                azure_ad_token_provider=token_provider,
-                azure_endpoint=api_base,
-            )
+                self._client = AzureOpenAI(
+                    api_version=self.configuration.api_version,
+                    # Azure-Specifics
+                    azure_ad_token_provider=token_provider,
+                    azure_endpoint=api_base,
+                )
+            else:
+                self._client = AzureOpenAI(
+                    api_version=self.configuration.api_version,
+                    # Azure-Specifics
+                    azure_endpoint=api_base,
+                    api_key=self.configuration.api_key,
+                )
         else:
             log.info("Creating OpenAI client")
             self._client = OpenAI(
diff --git a/python/AI/defaults.py b/python/AI/defaults.py
@@ -7,6 +7,7 @@
 DEFAULT_LLM_MODEL = "gpt-4o"
 DEFAULT_AZURE_LLM_MODEL = "gpt-4o"
 DEFAULT_LLM_MAX_TOKENS = 4000
+DEFAULT_AZ_AUTH_TYPE = "Managed Identity"
 #
 # Text Embedding Parameters
 DEFAULT_EMBEDDING_MODEL = "text-embedding-ada-002"
diff --git a/python/AI/openai_configuration.py b/python/AI/openai_configuration.py
@@ -4,6 +4,7 @@
 import os
 
 from .defaults import (
+    DEFAULT_AZ_AUTH_TYPE,
     DEFAULT_AZURE_LLM_MODEL,
     DEFAULT_LLM_MAX_TOKENS,
     DEFAULT_LLM_MODEL,
@@ -31,6 +32,7 @@ class OpenAIConfiguration():
     _temperature: float | None
     _max_tokens: int | None
     _api_type: str
+    _az_auth_type: str
 
     def __init__(
         self,
@@ -44,6 +46,7 @@ def __init__(
         self._api_version = config.get("api_version", self._get_azure_openai_version())
         self._temperature = config.get("temperature", DEFAULT_TEMPERATURE)
         self._max_tokens = config.get("max_tokens", DEFAULT_LLM_MAX_TOKENS)
+        self._az_auth_type = config.get("az_auth_type", DEFAULT_AZ_AUTH_TYPE)
         self._api_type = config.get("api_type", oai_type)
 
 
@@ -98,3 +101,8 @@ def max_tokens(self) -> int | None:
     def api_type(self) -> str | None:
         """Type of the AI connection."""
         return self._api_type
+    
+    @property
+    def az_auth_type(self) -> str:
+        """Type of the Azure OpenAI connection."""
+        return self._az_auth_type

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@`
`7`	`7`	`DEFAULT_LLM_MODEL = "gpt-4o"`
`8`	`8`	`DEFAULT_AZURE_LLM_MODEL = "gpt-4o"`
`9`	`9`	`DEFAULT_LLM_MAX_TOKENS = 4000`
	`10`	`+DEFAULT_AZ_AUTH_TYPE = "Managed Identity"`
`10`	`11`	`#`
`11`	`12`	`# Text Embedding Parameters`
`12`	`13`	`DEFAULT_EMBEDDING_MODEL = "text-embedding-ada-002"`