From 6a64d2b08557313bf121eeb231d6fdc4811e6585 Mon Sep 17 00:00:00 2001
From: vishnuszipstack <117254672+vishnuszipstack@users.noreply.github.com>
Date: Wed, 13 Nov 2024 10:02:33 +0530
Subject: [PATCH 1/5] added confidace score in metadata (#826)

---
 tools/structure/src/constants.py | 1 +
 tools/structure/src/main.py      | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/tools/structure/src/constants.py b/tools/structure/src/constants.py
index ae5dfb133..0def2f8a1 100644
--- a/tools/structure/src/constants.py
+++ b/tools/structure/src/constants.py
@@ -72,4 +72,5 @@ class SettingsKeys:
     METADATA = "metadata"
     EPILOGUE = "epilogue"
     HIGHLIGHT_DATA = "highlight_data"
+    CONFIDENCE_DATA = "confidence_data"
     EXECUTION_RUN_DATA_FOLDER = "EXECUTION_RUN_DATA_FOLDER"
diff --git a/tools/structure/src/main.py b/tools/structure/src/main.py
index dfb71345f..58e8fbe2d 100644
--- a/tools/structure/src/main.py
+++ b/tools/structure/src/main.py
@@ -216,10 +216,16 @@ def run(
             epilogue = metadata.pop(SettingsKeys.EPILOGUE, None)
             if epilogue:
                 try:
-                    from helper import transform_dict  # type: ignore [attr-defined]
+                    from helper import (  # type: ignore [attr-defined]
+                        get_confidence_data,
+                        transform_dict,
+                    )
 
                     highlight_data = transform_dict(epilogue, tool_data_dir)
                     metadata[SettingsKeys.HIGHLIGHT_DATA] = highlight_data
+                    metadata[SettingsKeys.CONFIDENCE_DATA] = get_confidence_data(
+                        epilogue, tool_data_dir
+                    )
                 except ImportError:
                     self.stream_log(
                         f"Highlight metadata is not added. {PAID_FEATURE_MSG}",

From d3628c87c1414e71693f5ded13bdfda06f94bceb Mon Sep 17 00:00:00 2001
From: Tahier Hussain <89440263+tahierhussain@users.noreply.github.com>
Date: Thu, 14 Nov 2024 14:21:27 +0530
Subject: [PATCH 2/5] FIX: Handle Public/Protected Routes (#839)

Made certain routes public that were previously inside the protected routes wrapper
---
 frontend/src/routes/Router.jsx | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/frontend/src/routes/Router.jsx b/frontend/src/routes/Router.jsx
index b0cf28fc1..e2936ec54 100644
--- a/frontend/src/routes/Router.jsx
+++ b/frontend/src/routes/Router.jsx
@@ -113,20 +113,20 @@ function Router() {
 
         {/* protected routes */}
         <Route path="setOrg" element={<SetOrgPage />} />
+        {SelectProduct && (
+          <Route path="selectProduct" element={<SelectProduct />} />
+        )}
+        {TrialRoutes && (
+          <Route path="/trial-expired" element={<TrialRoutes />} />
+        )}
+        {PaymentSuccessful && (
+          <Route path="/payment/success" element={<PaymentSuccessful />} />
+        )}
         <Route path="" element={<RequireAuth />}>
           <Route path="">{MainAppRoute}</Route>
           {llmWhispererRouter && (
             <Route path="llm-whisperer">{llmWhispererRouter()}</Route>
           )}
-          {TrialRoutes && (
-            <Route path="/trial-expired" element={<TrialRoutes />} />
-          )}
-          {SelectProduct && (
-            <Route path="selectProduct" element={<SelectProduct />} />
-          )}
-          {PaymentSuccessful && (
-            <Route path="/payment/success" element={<PaymentSuccessful />} />
-          )}
         </Route>
       </Route>
 

From 7f92acdb92adbd34978b0ffed6f483aab2f4c72c Mon Sep 17 00:00:00 2001
From: Tahier Hussain <89440263+tahierhussain@users.noreply.github.com>
Date: Thu, 14 Nov 2024 14:45:21 +0530
Subject: [PATCH 3/5] FIX: Remove HubSpot signup event request from hook (#841)

Remove HubSpot signup event request from  hook

Co-authored-by: Hari John Kuriakose <hari@zipstack.com>
---
 frontend/src/hooks/useSessionValid.js | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/frontend/src/hooks/useSessionValid.js b/frontend/src/hooks/useSessionValid.js
index c77310a9e..4d26b422f 100644
--- a/frontend/src/hooks/useSessionValid.js
+++ b/frontend/src/hooks/useSessionValid.js
@@ -19,15 +19,6 @@ try {
   // Plugin not available
 }
 
-// Import useGoogleTagManager hook
-let hsSignupEvent;
-try {
-  hsSignupEvent =
-    require("../plugins/hooks/useGoogleTagManager.js").useGoogleTagManager();
-} catch {
-  // Ignore if hook not available
-}
-
 let selectedProduct;
 let selectedProductStore;
 let PRODUCT_NAMES = {};
@@ -120,11 +111,6 @@ function useSessionValid() {
         }
       });
 
-      const isNewOrg = setOrgRes?.data?.is_new_org || false;
-      if (isNewOrg && hsSignupEvent) {
-        hsSignupEvent();
-      }
-
       userAndOrgDetails = setOrgRes?.data?.user;
       userAndOrgDetails["orgName"] = setOrgRes?.data?.organization?.name;
       userAndOrgDetails["orgId"] = orgId;

From cf9c7d6f25407e9e621d811f800f4adb8bb4911e Mon Sep 17 00:00:00 2001
From: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com>
Date: Fri, 15 Nov 2024 10:42:33 +0530
Subject: [PATCH 4/5] FIX: Missing tool setting enable_highlight (#843)

* Added missing changes in v2

Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>

* Added missing constant

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 backend/prompt_studio/prompt_studio_core_v2/constants.py    | 1 +
 .../prompt_studio_core_v2/prompt_studio_helper.py           | 2 ++
 .../prompt_studio_registry_helper.py                        | 6 ++++++
 3 files changed, 9 insertions(+)

diff --git a/backend/prompt_studio/prompt_studio_core_v2/constants.py b/backend/prompt_studio/prompt_studio_core_v2/constants.py
index 5e6c47884..9838892f2 100644
--- a/backend/prompt_studio/prompt_studio_core_v2/constants.py
+++ b/backend/prompt_studio/prompt_studio_core_v2/constants.py
@@ -94,6 +94,7 @@ class ToolStudioPromptKeys:
     SUMMARIZE_AS_SOURCE = "summarize_as_source"
     VARIABLE_MAP = "variable_map"
     RECORD = "record"
+    ENABLE_HIGHLIGHT = "enable_highlight"
 
 
 class FileViewTypes:
diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
index e325488d9..aef8fc012 100644
--- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
+++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
@@ -810,6 +810,7 @@ def _fetch_response(
         tool_settings[TSPKeys.PREAMBLE] = tool.preamble
         tool_settings[TSPKeys.POSTAMBLE] = tool.postamble
         tool_settings[TSPKeys.GRAMMAR] = grammar_list
+        tool_settings[TSPKeys.ENABLE_HIGHLIGHT] = tool.enable_highlight
         tool_settings[TSPKeys.PLATFORM_POSTAMBLE] = getattr(
             settings, TSPKeys.PLATFORM_POSTAMBLE.upper(), ""
         )
@@ -1068,6 +1069,7 @@ def _fetch_single_pass_response(
         tool_settings[TSPKeys.CHUNK_SIZE] = default_profile.chunk_size
         tool_settings[TSPKeys.CHUNK_OVERLAP] = default_profile.chunk_overlap
         tool_settings[TSPKeys.ENABLE_CHALLENGE] = tool.enable_challenge
+        tool_settings[TSPKeys.ENABLE_HIGHLIGHT] = tool.enable_highlight
         tool_settings[TSPKeys.CHALLENGE_LLM] = challenge_llm
 
         for prompt in prompts:
diff --git a/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py b/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py
index c323f068b..157593cdd 100644
--- a/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py
+++ b/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py
@@ -73,6 +73,12 @@ def frame_spec(tool: CustomTool) -> Spec:
                 "default": False,
                 "description": "Enables SinglePass Extraction",
             },
+            "enable_highlight": {
+                "type": "boolean",
+                "title": "Enable highlight",
+                "default": False,
+                "description": "Enables highlight",
+            },
         }
 
         spec = Spec(

From 20032898d2faf068561b4aedd6f542016a0cd30b Mon Sep 17 00:00:00 2001
From: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com>
Date: Fri, 15 Nov 2024 11:42:22 +0530
Subject: [PATCH 5/5] [FIX] Changes to display chunk data properly (#821)

* Converted context from string to array

Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>

* v2 changes

Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>

* Removed unwanted comment

* Update backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py

Co-authored-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com>
Signed-off-by: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com>

* Minor fix

---------

Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
Signed-off-by: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com>
Co-authored-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com>
Co-authored-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com>
Co-authored-by: Hari John Kuriakose <hari@zipstack.com>
---
 .../output_manager_helper.py                  |  2 +-
 .../serializers.py                            |  7 +++
 .../output_manager_helper.py                  |  2 +-
 .../serializers.py                            |  7 +++
 .../prompt-card/OutputForIndex.jsx            | 14 +-----
 .../src/unstract/prompt_service/helper.py     |  4 +-
 .../src/unstract/prompt_service/main.py       | 43 +++++++------------
 7 files changed, 34 insertions(+), 45 deletions(-)

diff --git a/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py b/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py
index f4d1d6624..c65ea94ea 100644
--- a/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py
+++ b/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py
@@ -152,7 +152,7 @@ def update_or_create_prompt_output(
                 output=output,
                 eval_metrics=eval_metrics,
                 tool=tool,
-                context=context,
+                context=json.dumps(context),
                 challenge_data=challenge_data,
             )
 
diff --git a/backend/prompt_studio/prompt_studio_output_manager/serializers.py b/backend/prompt_studio/prompt_studio_output_manager/serializers.py
index 0fd7c4060..14840745a 100644
--- a/backend/prompt_studio/prompt_studio_output_manager/serializers.py
+++ b/backend/prompt_studio/prompt_studio_output_manager/serializers.py
@@ -1,3 +1,4 @@
+import json
 import logging
 
 from usage.helper import UsageHelper
@@ -25,4 +26,10 @@ def to_representation(self, instance):
             )
             token_usage = {}
         data["token_usage"] = token_usage
+        # Convert string to list
+        try:
+            data["context"] = json.loads(data["context"])
+        except json.JSONDecodeError:
+            # Convert the old value of data["context"] to a list
+            data["context"] = [data["context"]]
         return data
diff --git a/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py b/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py
index b5ee4dae1..a82ed95bf 100644
--- a/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py
+++ b/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py
@@ -154,7 +154,7 @@ def update_or_create_prompt_output(
                 output=output,
                 eval_metrics=eval_metrics,
                 tool=tool,
-                context=context,
+                context=json.dumps(context),
                 challenge_data=challenge_data,
             )
 
diff --git a/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py b/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py
index f3b769c1b..0d40826ec 100644
--- a/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py
+++ b/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py
@@ -1,3 +1,4 @@
+import json
 import logging
 
 from usage_v2.helper import UsageHelper
@@ -25,4 +26,10 @@ def to_representation(self, instance):
             )
             token_usage = {}
         data["token_usage"] = token_usage
+        # Convert string to list
+        try:
+            data["context"] = json.loads(data["context"])
+        except json.JSONDecodeError:
+            # Convert the old value of data["context"] to a list
+            data["context"] = [data["context"]]
         return data
diff --git a/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx b/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx
index 506414174..0fa72073a 100644
--- a/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx
+++ b/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx
@@ -16,19 +16,7 @@ function OutputForIndex({ chunkData, setIsIndexOpen, isIndexOpen }) {
   const activeRef = useRef(null);
 
   useEffect(() => {
-    if (!chunkData) {
-      setChunks([]);
-    }
-    // Split chunkData into chunks using \f\n delimiter
-    const tempChunks = chunkData?.split("\f\n");
-    // To remove " at the end
-    if (tempChunks?.length > 0) {
-      const lastChunk = tempChunks[tempChunks?.length - 1].trim();
-      if (lastChunk === '\\n"' || lastChunk === "") {
-        tempChunks.pop();
-      }
-    }
-    setChunks(tempChunks);
+    setChunks(chunkData || []);
   }, [chunkData]);
 
   // Debounced search handler
diff --git a/prompt-service/src/unstract/prompt_service/helper.py b/prompt-service/src/unstract/prompt_service/helper.py
index ee730173e..0754b74c7 100644
--- a/prompt-service/src/unstract/prompt_service/helper.py
+++ b/prompt-service/src/unstract/prompt_service/helper.py
@@ -83,11 +83,11 @@ def plugin_loader(app: Flask) -> None:
     initialize_plugin_endpoints(app=app)
 
 
-def get_cleaned_context(context: str) -> str:
+def get_cleaned_context(context: set[str]) -> list[str]:
     clean_context_plugin: dict[str, Any] = plugins.get(PSKeys.CLEAN_CONTEXT, {})
     if clean_context_plugin:
         return clean_context_plugin["entrypoint_cls"].run(context=context)
-    return context
+    return list(context)
 
 
 def initialize_plugin_endpoints(app: Flask) -> None:
diff --git a/prompt-service/src/unstract/prompt_service/main.py b/prompt-service/src/unstract/prompt_service/main.py
index 15a46c8fd..8ba54d4ef 100644
--- a/prompt-service/src/unstract/prompt_service/main.py
+++ b/prompt-service/src/unstract/prompt_service/main.py
@@ -251,10 +251,10 @@ def prompt_processor() -> Any:
                 raise api_error
 
         try:
-            context = ""
+            context: set[str] = set()
             if output[PSKeys.CHUNK_SIZE] == 0:
                 # We can do this only for chunkless indexes
-                context: Optional[str] = index.query_index(
+                retrieved_context: Optional[str] = index.query_index(
                     embedding_instance_id=output[PSKeys.EMBEDDING],
                     vector_db_instance_id=output[PSKeys.VECTOR_DB],
                     doc_id=doc_id,
@@ -270,13 +270,13 @@ def prompt_processor() -> Any:
                     # inconsistent, and not reproducible easily,
                     # this is just a safety net.
                     time.sleep(2)
-                    context: Optional[str] = index.query_index(
+                    retrieved_context: Optional[str] = index.query_index(
                         embedding_instance_id=output[PSKeys.EMBEDDING],
                         vector_db_instance_id=output[PSKeys.VECTOR_DB],
                         doc_id=doc_id,
                         usage_kwargs=usage_kwargs,
                     )
-                    if context is None:
+                    if retrieved_context is None:
                         # TODO: Obtain user set name for vector DB
                         msg = NO_CONTEXT_ERROR
                         app.logger.error(
@@ -294,6 +294,7 @@ def prompt_processor() -> Any:
                             msg,
                         )
                         raise APIError(message=msg)
+                context.add(retrieved_context)
                 # TODO: Use vectorDB name when available
                 publish_log(
                     log_events_id,
@@ -323,7 +324,7 @@ def prompt_processor() -> Any:
                     tool_settings=tool_settings,
                     output=output,
                     llm=llm,
-                    context=context,
+                    context="\n".join(context),
                     prompt="promptx",
                     metadata=metadata,
                 )
@@ -537,7 +538,7 @@ def prompt_processor() -> Any:
                             llm=llm,
                             challenge_llm=challenge_llm,
                             run_id=run_id,
-                            context=context,
+                            context="\n".join(context),
                             tool_settings=tool_settings,
                             output=output,
                             structured_output=structured_output,
@@ -593,7 +594,7 @@ def prompt_processor() -> Any:
                     try:
                         evaluator = eval_plugin["entrypoint_cls"](
                             "",
-                            context,
+                            "\n".join(context),
                             "",
                             "",
                             output,
@@ -680,7 +681,7 @@ def run_retrieval(  # type:ignore
     retrieval_type: str,
     metadata: dict[str, Any],
 ) -> tuple[str, str]:
-    context: str = ""
+    context: set[str] = set()
     prompt = output[PSKeys.PROMPTX]
     if retrieval_type == PSKeys.SUBQUESTION:
         subq_prompt: str = (
@@ -713,19 +714,11 @@ def run_retrieval(  # type:ignore
             prompt=subq_prompt,
         )
         subquestion_list = subquestions.split(",")
-        raw_retrieved_context = ""
         for each_subq in subquestion_list:
             retrieved_context = _retrieve_context(
                 output, doc_id, vector_index, each_subq
             )
-            # Not adding the potential for pinecode serverless
-            # inconsistency issue owing to risk of infinte loop
-            # and inablity to diffrentiate genuine cases of
-            # empty context.
-            raw_retrieved_context = "\f\n".join(
-                [raw_retrieved_context, retrieved_context]
-            )
-        context = _remove_duplicate_nodes(raw_retrieved_context)
+            context.update(retrieved_context)
 
     if retrieval_type == PSKeys.SIMPLE:
 
@@ -746,7 +739,7 @@ def run_retrieval(  # type:ignore
         tool_settings=tool_settings,
         output=output,
         llm=llm,
-        context=context,
+        context="\n".join(context),
         prompt="promptx",
         metadata=metadata,
     )
@@ -754,13 +747,7 @@ def run_retrieval(  # type:ignore
     return (answer, context)
 
 
-def _remove_duplicate_nodes(retrieved_context: str) -> str:
-    context_set: set[str] = set(retrieved_context.split("\f\n"))
-    fomatted_context = "\f\n".join(context_set)
-    return fomatted_context
-
-
-def _retrieve_context(output, doc_id, vector_index, answer) -> str:
+def _retrieve_context(output, doc_id, vector_index, answer) -> set[str]:
     retriever = vector_index.as_retriever(
         similarity_top_k=output[PSKeys.SIMILARITY_TOP_K],
         filters=MetadataFilters(
@@ -773,18 +760,18 @@ def _retrieve_context(output, doc_id, vector_index, answer) -> str:
         ),
     )
     nodes = retriever.retrieve(answer)
-    text = ""
+    context: set[str] = set()
     for node in nodes:
         # ToDo: May have to fine-tune this value for node score or keep it
         # configurable at the adapter level
         if node.score > 0:
-            text += node.get_content() + "\f\n"
+            context.add(node.get_content())
         else:
             app.logger.info(
                 "Node score is less than 0. "
                 f"Ignored: {node.node_id} with score {node.score}"
             )
-    return text
+    return context
 
 
 def log_exceptions(e: HTTPException):