From 6a64d2b08557313bf121eeb231d6fdc4811e6585 Mon Sep 17 00:00:00 2001 From: vishnuszipstack <117254672+vishnuszipstack@users.noreply.github.com> Date: Wed, 13 Nov 2024 10:02:33 +0530 Subject: [PATCH 1/5] added confidace score in metadata (#826) --- tools/structure/src/constants.py | 1 + tools/structure/src/main.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/structure/src/constants.py b/tools/structure/src/constants.py index ae5dfb133..0def2f8a1 100644 --- a/tools/structure/src/constants.py +++ b/tools/structure/src/constants.py @@ -72,4 +72,5 @@ class SettingsKeys: METADATA = "metadata" EPILOGUE = "epilogue" HIGHLIGHT_DATA = "highlight_data" + CONFIDENCE_DATA = "confidence_data" EXECUTION_RUN_DATA_FOLDER = "EXECUTION_RUN_DATA_FOLDER" diff --git a/tools/structure/src/main.py b/tools/structure/src/main.py index dfb71345f..58e8fbe2d 100644 --- a/tools/structure/src/main.py +++ b/tools/structure/src/main.py @@ -216,10 +216,16 @@ def run( epilogue = metadata.pop(SettingsKeys.EPILOGUE, None) if epilogue: try: - from helper import transform_dict # type: ignore [attr-defined] + from helper import ( # type: ignore [attr-defined] + get_confidence_data, + transform_dict, + ) highlight_data = transform_dict(epilogue, tool_data_dir) metadata[SettingsKeys.HIGHLIGHT_DATA] = highlight_data + metadata[SettingsKeys.CONFIDENCE_DATA] = get_confidence_data( + epilogue, tool_data_dir + ) except ImportError: self.stream_log( f"Highlight metadata is not added. {PAID_FEATURE_MSG}", From d3628c87c1414e71693f5ded13bdfda06f94bceb Mon Sep 17 00:00:00 2001 From: Tahier Hussain <89440263+tahierhussain@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:21:27 +0530 Subject: [PATCH 2/5] FIX: Handle Public/Protected Routes (#839) Made certain routes public that were previously inside the protected routes wrapper --- frontend/src/routes/Router.jsx | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/frontend/src/routes/Router.jsx b/frontend/src/routes/Router.jsx index b0cf28fc1..e2936ec54 100644 --- a/frontend/src/routes/Router.jsx +++ b/frontend/src/routes/Router.jsx @@ -113,20 +113,20 @@ function Router() { {/* protected routes */} } /> + {SelectProduct && ( + } /> + )} + {TrialRoutes && ( + } /> + )} + {PaymentSuccessful && ( + } /> + )} }> {MainAppRoute} {llmWhispererRouter && ( {llmWhispererRouter()} )} - {TrialRoutes && ( - } /> - )} - {SelectProduct && ( - } /> - )} - {PaymentSuccessful && ( - } /> - )} From 7f92acdb92adbd34978b0ffed6f483aab2f4c72c Mon Sep 17 00:00:00 2001 From: Tahier Hussain <89440263+tahierhussain@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:45:21 +0530 Subject: [PATCH 3/5] FIX: Remove HubSpot signup event request from hook (#841) Remove HubSpot signup event request from hook Co-authored-by: Hari John Kuriakose --- frontend/src/hooks/useSessionValid.js | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/frontend/src/hooks/useSessionValid.js b/frontend/src/hooks/useSessionValid.js index c77310a9e..4d26b422f 100644 --- a/frontend/src/hooks/useSessionValid.js +++ b/frontend/src/hooks/useSessionValid.js @@ -19,15 +19,6 @@ try { // Plugin not available } -// Import useGoogleTagManager hook -let hsSignupEvent; -try { - hsSignupEvent = - require("../plugins/hooks/useGoogleTagManager.js").useGoogleTagManager(); -} catch { - // Ignore if hook not available -} - let selectedProduct; let selectedProductStore; let PRODUCT_NAMES = {}; @@ -120,11 +111,6 @@ function useSessionValid() { } }); - const isNewOrg = setOrgRes?.data?.is_new_org || false; - if (isNewOrg && hsSignupEvent) { - hsSignupEvent(); - } - userAndOrgDetails = setOrgRes?.data?.user; userAndOrgDetails["orgName"] = setOrgRes?.data?.organization?.name; userAndOrgDetails["orgId"] = orgId; From cf9c7d6f25407e9e621d811f800f4adb8bb4911e Mon Sep 17 00:00:00 2001 From: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com> Date: Fri, 15 Nov 2024 10:42:33 +0530 Subject: [PATCH 4/5] FIX: Missing tool setting enable_highlight (#843) * Added missing changes in v2 Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> * Added missing constant * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- backend/prompt_studio/prompt_studio_core_v2/constants.py | 1 + .../prompt_studio_core_v2/prompt_studio_helper.py | 2 ++ .../prompt_studio_registry_helper.py | 6 ++++++ 3 files changed, 9 insertions(+) diff --git a/backend/prompt_studio/prompt_studio_core_v2/constants.py b/backend/prompt_studio/prompt_studio_core_v2/constants.py index 5e6c47884..9838892f2 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/constants.py +++ b/backend/prompt_studio/prompt_studio_core_v2/constants.py @@ -94,6 +94,7 @@ class ToolStudioPromptKeys: SUMMARIZE_AS_SOURCE = "summarize_as_source" VARIABLE_MAP = "variable_map" RECORD = "record" + ENABLE_HIGHLIGHT = "enable_highlight" class FileViewTypes: diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py index e325488d9..aef8fc012 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py +++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py @@ -810,6 +810,7 @@ def _fetch_response( tool_settings[TSPKeys.PREAMBLE] = tool.preamble tool_settings[TSPKeys.POSTAMBLE] = tool.postamble tool_settings[TSPKeys.GRAMMAR] = grammar_list + tool_settings[TSPKeys.ENABLE_HIGHLIGHT] = tool.enable_highlight tool_settings[TSPKeys.PLATFORM_POSTAMBLE] = getattr( settings, TSPKeys.PLATFORM_POSTAMBLE.upper(), "" ) @@ -1068,6 +1069,7 @@ def _fetch_single_pass_response( tool_settings[TSPKeys.CHUNK_SIZE] = default_profile.chunk_size tool_settings[TSPKeys.CHUNK_OVERLAP] = default_profile.chunk_overlap tool_settings[TSPKeys.ENABLE_CHALLENGE] = tool.enable_challenge + tool_settings[TSPKeys.ENABLE_HIGHLIGHT] = tool.enable_highlight tool_settings[TSPKeys.CHALLENGE_LLM] = challenge_llm for prompt in prompts: diff --git a/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py b/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py index c323f068b..157593cdd 100644 --- a/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py +++ b/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py @@ -73,6 +73,12 @@ def frame_spec(tool: CustomTool) -> Spec: "default": False, "description": "Enables SinglePass Extraction", }, + "enable_highlight": { + "type": "boolean", + "title": "Enable highlight", + "default": False, + "description": "Enables highlight", + }, } spec = Spec( From 20032898d2faf068561b4aedd6f542016a0cd30b Mon Sep 17 00:00:00 2001 From: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com> Date: Fri, 15 Nov 2024 11:42:22 +0530 Subject: [PATCH 5/5] [FIX] Changes to display chunk data properly (#821) * Converted context from string to array Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> * v2 changes Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> * Removed unwanted comment * Update backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py Co-authored-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com> Signed-off-by: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com> * Minor fix --------- Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com> Signed-off-by: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com> Co-authored-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com> Co-authored-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> Co-authored-by: Hari John Kuriakose --- .../output_manager_helper.py | 2 +- .../serializers.py | 7 +++ .../output_manager_helper.py | 2 +- .../serializers.py | 7 +++ .../prompt-card/OutputForIndex.jsx | 14 +----- .../src/unstract/prompt_service/helper.py | 4 +- .../src/unstract/prompt_service/main.py | 43 +++++++------------ 7 files changed, 34 insertions(+), 45 deletions(-) diff --git a/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py b/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py index f4d1d6624..c65ea94ea 100644 --- a/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py +++ b/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py @@ -152,7 +152,7 @@ def update_or_create_prompt_output( output=output, eval_metrics=eval_metrics, tool=tool, - context=context, + context=json.dumps(context), challenge_data=challenge_data, ) diff --git a/backend/prompt_studio/prompt_studio_output_manager/serializers.py b/backend/prompt_studio/prompt_studio_output_manager/serializers.py index 0fd7c4060..14840745a 100644 --- a/backend/prompt_studio/prompt_studio_output_manager/serializers.py +++ b/backend/prompt_studio/prompt_studio_output_manager/serializers.py @@ -1,3 +1,4 @@ +import json import logging from usage.helper import UsageHelper @@ -25,4 +26,10 @@ def to_representation(self, instance): ) token_usage = {} data["token_usage"] = token_usage + # Convert string to list + try: + data["context"] = json.loads(data["context"]) + except json.JSONDecodeError: + # Convert the old value of data["context"] to a list + data["context"] = [data["context"]] return data diff --git a/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py b/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py index b5ee4dae1..a82ed95bf 100644 --- a/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py +++ b/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py @@ -154,7 +154,7 @@ def update_or_create_prompt_output( output=output, eval_metrics=eval_metrics, tool=tool, - context=context, + context=json.dumps(context), challenge_data=challenge_data, ) diff --git a/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py b/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py index f3b769c1b..0d40826ec 100644 --- a/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py +++ b/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py @@ -1,3 +1,4 @@ +import json import logging from usage_v2.helper import UsageHelper @@ -25,4 +26,10 @@ def to_representation(self, instance): ) token_usage = {} data["token_usage"] = token_usage + # Convert string to list + try: + data["context"] = json.loads(data["context"]) + except json.JSONDecodeError: + # Convert the old value of data["context"] to a list + data["context"] = [data["context"]] return data diff --git a/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx b/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx index 506414174..0fa72073a 100644 --- a/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx +++ b/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx @@ -16,19 +16,7 @@ function OutputForIndex({ chunkData, setIsIndexOpen, isIndexOpen }) { const activeRef = useRef(null); useEffect(() => { - if (!chunkData) { - setChunks([]); - } - // Split chunkData into chunks using \f\n delimiter - const tempChunks = chunkData?.split("\f\n"); - // To remove " at the end - if (tempChunks?.length > 0) { - const lastChunk = tempChunks[tempChunks?.length - 1].trim(); - if (lastChunk === '\\n"' || lastChunk === "") { - tempChunks.pop(); - } - } - setChunks(tempChunks); + setChunks(chunkData || []); }, [chunkData]); // Debounced search handler diff --git a/prompt-service/src/unstract/prompt_service/helper.py b/prompt-service/src/unstract/prompt_service/helper.py index ee730173e..0754b74c7 100644 --- a/prompt-service/src/unstract/prompt_service/helper.py +++ b/prompt-service/src/unstract/prompt_service/helper.py @@ -83,11 +83,11 @@ def plugin_loader(app: Flask) -> None: initialize_plugin_endpoints(app=app) -def get_cleaned_context(context: str) -> str: +def get_cleaned_context(context: set[str]) -> list[str]: clean_context_plugin: dict[str, Any] = plugins.get(PSKeys.CLEAN_CONTEXT, {}) if clean_context_plugin: return clean_context_plugin["entrypoint_cls"].run(context=context) - return context + return list(context) def initialize_plugin_endpoints(app: Flask) -> None: diff --git a/prompt-service/src/unstract/prompt_service/main.py b/prompt-service/src/unstract/prompt_service/main.py index 15a46c8fd..8ba54d4ef 100644 --- a/prompt-service/src/unstract/prompt_service/main.py +++ b/prompt-service/src/unstract/prompt_service/main.py @@ -251,10 +251,10 @@ def prompt_processor() -> Any: raise api_error try: - context = "" + context: set[str] = set() if output[PSKeys.CHUNK_SIZE] == 0: # We can do this only for chunkless indexes - context: Optional[str] = index.query_index( + retrieved_context: Optional[str] = index.query_index( embedding_instance_id=output[PSKeys.EMBEDDING], vector_db_instance_id=output[PSKeys.VECTOR_DB], doc_id=doc_id, @@ -270,13 +270,13 @@ def prompt_processor() -> Any: # inconsistent, and not reproducible easily, # this is just a safety net. time.sleep(2) - context: Optional[str] = index.query_index( + retrieved_context: Optional[str] = index.query_index( embedding_instance_id=output[PSKeys.EMBEDDING], vector_db_instance_id=output[PSKeys.VECTOR_DB], doc_id=doc_id, usage_kwargs=usage_kwargs, ) - if context is None: + if retrieved_context is None: # TODO: Obtain user set name for vector DB msg = NO_CONTEXT_ERROR app.logger.error( @@ -294,6 +294,7 @@ def prompt_processor() -> Any: msg, ) raise APIError(message=msg) + context.add(retrieved_context) # TODO: Use vectorDB name when available publish_log( log_events_id, @@ -323,7 +324,7 @@ def prompt_processor() -> Any: tool_settings=tool_settings, output=output, llm=llm, - context=context, + context="\n".join(context), prompt="promptx", metadata=metadata, ) @@ -537,7 +538,7 @@ def prompt_processor() -> Any: llm=llm, challenge_llm=challenge_llm, run_id=run_id, - context=context, + context="\n".join(context), tool_settings=tool_settings, output=output, structured_output=structured_output, @@ -593,7 +594,7 @@ def prompt_processor() -> Any: try: evaluator = eval_plugin["entrypoint_cls"]( "", - context, + "\n".join(context), "", "", output, @@ -680,7 +681,7 @@ def run_retrieval( # type:ignore retrieval_type: str, metadata: dict[str, Any], ) -> tuple[str, str]: - context: str = "" + context: set[str] = set() prompt = output[PSKeys.PROMPTX] if retrieval_type == PSKeys.SUBQUESTION: subq_prompt: str = ( @@ -713,19 +714,11 @@ def run_retrieval( # type:ignore prompt=subq_prompt, ) subquestion_list = subquestions.split(",") - raw_retrieved_context = "" for each_subq in subquestion_list: retrieved_context = _retrieve_context( output, doc_id, vector_index, each_subq ) - # Not adding the potential for pinecode serverless - # inconsistency issue owing to risk of infinte loop - # and inablity to diffrentiate genuine cases of - # empty context. - raw_retrieved_context = "\f\n".join( - [raw_retrieved_context, retrieved_context] - ) - context = _remove_duplicate_nodes(raw_retrieved_context) + context.update(retrieved_context) if retrieval_type == PSKeys.SIMPLE: @@ -746,7 +739,7 @@ def run_retrieval( # type:ignore tool_settings=tool_settings, output=output, llm=llm, - context=context, + context="\n".join(context), prompt="promptx", metadata=metadata, ) @@ -754,13 +747,7 @@ def run_retrieval( # type:ignore return (answer, context) -def _remove_duplicate_nodes(retrieved_context: str) -> str: - context_set: set[str] = set(retrieved_context.split("\f\n")) - fomatted_context = "\f\n".join(context_set) - return fomatted_context - - -def _retrieve_context(output, doc_id, vector_index, answer) -> str: +def _retrieve_context(output, doc_id, vector_index, answer) -> set[str]: retriever = vector_index.as_retriever( similarity_top_k=output[PSKeys.SIMILARITY_TOP_K], filters=MetadataFilters( @@ -773,18 +760,18 @@ def _retrieve_context(output, doc_id, vector_index, answer) -> str: ), ) nodes = retriever.retrieve(answer) - text = "" + context: set[str] = set() for node in nodes: # ToDo: May have to fine-tune this value for node score or keep it # configurable at the adapter level if node.score > 0: - text += node.get_content() + "\f\n" + context.add(node.get_content()) else: app.logger.info( "Node score is less than 0. " f"Ignored: {node.node_id} with score {node.score}" ) - return text + return context def log_exceptions(e: HTTPException):