From 6a64d2b08557313bf121eeb231d6fdc4811e6585 Mon Sep 17 00:00:00 2001
From: vishnuszipstack <117254672+vishnuszipstack@users.noreply.github.com>
Date: Wed, 13 Nov 2024 10:02:33 +0530
Subject: [PATCH 1/5] added confidace score in metadata (#826)
---
tools/structure/src/constants.py | 1 +
tools/structure/src/main.py | 8 +++++++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/tools/structure/src/constants.py b/tools/structure/src/constants.py
index ae5dfb133..0def2f8a1 100644
--- a/tools/structure/src/constants.py
+++ b/tools/structure/src/constants.py
@@ -72,4 +72,5 @@ class SettingsKeys:
METADATA = "metadata"
EPILOGUE = "epilogue"
HIGHLIGHT_DATA = "highlight_data"
+ CONFIDENCE_DATA = "confidence_data"
EXECUTION_RUN_DATA_FOLDER = "EXECUTION_RUN_DATA_FOLDER"
diff --git a/tools/structure/src/main.py b/tools/structure/src/main.py
index dfb71345f..58e8fbe2d 100644
--- a/tools/structure/src/main.py
+++ b/tools/structure/src/main.py
@@ -216,10 +216,16 @@ def run(
epilogue = metadata.pop(SettingsKeys.EPILOGUE, None)
if epilogue:
try:
- from helper import transform_dict # type: ignore [attr-defined]
+ from helper import ( # type: ignore [attr-defined]
+ get_confidence_data,
+ transform_dict,
+ )
highlight_data = transform_dict(epilogue, tool_data_dir)
metadata[SettingsKeys.HIGHLIGHT_DATA] = highlight_data
+ metadata[SettingsKeys.CONFIDENCE_DATA] = get_confidence_data(
+ epilogue, tool_data_dir
+ )
except ImportError:
self.stream_log(
f"Highlight metadata is not added. {PAID_FEATURE_MSG}",
From d3628c87c1414e71693f5ded13bdfda06f94bceb Mon Sep 17 00:00:00 2001
From: Tahier Hussain <89440263+tahierhussain@users.noreply.github.com>
Date: Thu, 14 Nov 2024 14:21:27 +0530
Subject: [PATCH 2/5] FIX: Handle Public/Protected Routes (#839)
Made certain routes public that were previously inside the protected routes wrapper
---
frontend/src/routes/Router.jsx | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/frontend/src/routes/Router.jsx b/frontend/src/routes/Router.jsx
index b0cf28fc1..e2936ec54 100644
--- a/frontend/src/routes/Router.jsx
+++ b/frontend/src/routes/Router.jsx
@@ -113,20 +113,20 @@ function Router() {
{/* protected routes */}
} />
+ {SelectProduct && (
+ } />
+ )}
+ {TrialRoutes && (
+ } />
+ )}
+ {PaymentSuccessful && (
+ } />
+ )}
}>
{MainAppRoute}
{llmWhispererRouter && (
{llmWhispererRouter()}
)}
- {TrialRoutes && (
- } />
- )}
- {SelectProduct && (
- } />
- )}
- {PaymentSuccessful && (
- } />
- )}
From 7f92acdb92adbd34978b0ffed6f483aab2f4c72c Mon Sep 17 00:00:00 2001
From: Tahier Hussain <89440263+tahierhussain@users.noreply.github.com>
Date: Thu, 14 Nov 2024 14:45:21 +0530
Subject: [PATCH 3/5] FIX: Remove HubSpot signup event request from hook (#841)
Remove HubSpot signup event request from hook
Co-authored-by: Hari John Kuriakose
---
frontend/src/hooks/useSessionValid.js | 14 --------------
1 file changed, 14 deletions(-)
diff --git a/frontend/src/hooks/useSessionValid.js b/frontend/src/hooks/useSessionValid.js
index c77310a9e..4d26b422f 100644
--- a/frontend/src/hooks/useSessionValid.js
+++ b/frontend/src/hooks/useSessionValid.js
@@ -19,15 +19,6 @@ try {
// Plugin not available
}
-// Import useGoogleTagManager hook
-let hsSignupEvent;
-try {
- hsSignupEvent =
- require("../plugins/hooks/useGoogleTagManager.js").useGoogleTagManager();
-} catch {
- // Ignore if hook not available
-}
-
let selectedProduct;
let selectedProductStore;
let PRODUCT_NAMES = {};
@@ -120,11 +111,6 @@ function useSessionValid() {
}
});
- const isNewOrg = setOrgRes?.data?.is_new_org || false;
- if (isNewOrg && hsSignupEvent) {
- hsSignupEvent();
- }
-
userAndOrgDetails = setOrgRes?.data?.user;
userAndOrgDetails["orgName"] = setOrgRes?.data?.organization?.name;
userAndOrgDetails["orgId"] = orgId;
From cf9c7d6f25407e9e621d811f800f4adb8bb4911e Mon Sep 17 00:00:00 2001
From: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com>
Date: Fri, 15 Nov 2024 10:42:33 +0530
Subject: [PATCH 4/5] FIX: Missing tool setting enable_highlight (#843)
* Added missing changes in v2
Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
* Added missing constant
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
---------
Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
backend/prompt_studio/prompt_studio_core_v2/constants.py | 1 +
.../prompt_studio_core_v2/prompt_studio_helper.py | 2 ++
.../prompt_studio_registry_helper.py | 6 ++++++
3 files changed, 9 insertions(+)
diff --git a/backend/prompt_studio/prompt_studio_core_v2/constants.py b/backend/prompt_studio/prompt_studio_core_v2/constants.py
index 5e6c47884..9838892f2 100644
--- a/backend/prompt_studio/prompt_studio_core_v2/constants.py
+++ b/backend/prompt_studio/prompt_studio_core_v2/constants.py
@@ -94,6 +94,7 @@ class ToolStudioPromptKeys:
SUMMARIZE_AS_SOURCE = "summarize_as_source"
VARIABLE_MAP = "variable_map"
RECORD = "record"
+ ENABLE_HIGHLIGHT = "enable_highlight"
class FileViewTypes:
diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
index e325488d9..aef8fc012 100644
--- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
+++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
@@ -810,6 +810,7 @@ def _fetch_response(
tool_settings[TSPKeys.PREAMBLE] = tool.preamble
tool_settings[TSPKeys.POSTAMBLE] = tool.postamble
tool_settings[TSPKeys.GRAMMAR] = grammar_list
+ tool_settings[TSPKeys.ENABLE_HIGHLIGHT] = tool.enable_highlight
tool_settings[TSPKeys.PLATFORM_POSTAMBLE] = getattr(
settings, TSPKeys.PLATFORM_POSTAMBLE.upper(), ""
)
@@ -1068,6 +1069,7 @@ def _fetch_single_pass_response(
tool_settings[TSPKeys.CHUNK_SIZE] = default_profile.chunk_size
tool_settings[TSPKeys.CHUNK_OVERLAP] = default_profile.chunk_overlap
tool_settings[TSPKeys.ENABLE_CHALLENGE] = tool.enable_challenge
+ tool_settings[TSPKeys.ENABLE_HIGHLIGHT] = tool.enable_highlight
tool_settings[TSPKeys.CHALLENGE_LLM] = challenge_llm
for prompt in prompts:
diff --git a/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py b/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py
index c323f068b..157593cdd 100644
--- a/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py
+++ b/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py
@@ -73,6 +73,12 @@ def frame_spec(tool: CustomTool) -> Spec:
"default": False,
"description": "Enables SinglePass Extraction",
},
+ "enable_highlight": {
+ "type": "boolean",
+ "title": "Enable highlight",
+ "default": False,
+ "description": "Enables highlight",
+ },
}
spec = Spec(
From 20032898d2faf068561b4aedd6f542016a0cd30b Mon Sep 17 00:00:00 2001
From: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com>
Date: Fri, 15 Nov 2024 11:42:22 +0530
Subject: [PATCH 5/5] [FIX] Changes to display chunk data properly (#821)
* Converted context from string to array
Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
* v2 changes
Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
* Removed unwanted comment
* Update backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py
Co-authored-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com>
Signed-off-by: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com>
* Minor fix
---------
Signed-off-by: Deepak <89829542+Deepak-Kesavan@users.noreply.github.com>
Signed-off-by: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com>
Co-authored-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com>
Co-authored-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com>
Co-authored-by: Hari John Kuriakose
---
.../output_manager_helper.py | 2 +-
.../serializers.py | 7 +++
.../output_manager_helper.py | 2 +-
.../serializers.py | 7 +++
.../prompt-card/OutputForIndex.jsx | 14 +-----
.../src/unstract/prompt_service/helper.py | 4 +-
.../src/unstract/prompt_service/main.py | 43 +++++++------------
7 files changed, 34 insertions(+), 45 deletions(-)
diff --git a/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py b/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py
index f4d1d6624..c65ea94ea 100644
--- a/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py
+++ b/backend/prompt_studio/prompt_studio_output_manager/output_manager_helper.py
@@ -152,7 +152,7 @@ def update_or_create_prompt_output(
output=output,
eval_metrics=eval_metrics,
tool=tool,
- context=context,
+ context=json.dumps(context),
challenge_data=challenge_data,
)
diff --git a/backend/prompt_studio/prompt_studio_output_manager/serializers.py b/backend/prompt_studio/prompt_studio_output_manager/serializers.py
index 0fd7c4060..14840745a 100644
--- a/backend/prompt_studio/prompt_studio_output_manager/serializers.py
+++ b/backend/prompt_studio/prompt_studio_output_manager/serializers.py
@@ -1,3 +1,4 @@
+import json
import logging
from usage.helper import UsageHelper
@@ -25,4 +26,10 @@ def to_representation(self, instance):
)
token_usage = {}
data["token_usage"] = token_usage
+ # Convert string to list
+ try:
+ data["context"] = json.loads(data["context"])
+ except json.JSONDecodeError:
+ # Convert the old value of data["context"] to a list
+ data["context"] = [data["context"]]
return data
diff --git a/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py b/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py
index b5ee4dae1..a82ed95bf 100644
--- a/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py
+++ b/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py
@@ -154,7 +154,7 @@ def update_or_create_prompt_output(
output=output,
eval_metrics=eval_metrics,
tool=tool,
- context=context,
+ context=json.dumps(context),
challenge_data=challenge_data,
)
diff --git a/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py b/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py
index f3b769c1b..0d40826ec 100644
--- a/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py
+++ b/backend/prompt_studio/prompt_studio_output_manager_v2/serializers.py
@@ -1,3 +1,4 @@
+import json
import logging
from usage_v2.helper import UsageHelper
@@ -25,4 +26,10 @@ def to_representation(self, instance):
)
token_usage = {}
data["token_usage"] = token_usage
+ # Convert string to list
+ try:
+ data["context"] = json.loads(data["context"])
+ except json.JSONDecodeError:
+ # Convert the old value of data["context"] to a list
+ data["context"] = [data["context"]]
return data
diff --git a/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx b/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx
index 506414174..0fa72073a 100644
--- a/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx
+++ b/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx
@@ -16,19 +16,7 @@ function OutputForIndex({ chunkData, setIsIndexOpen, isIndexOpen }) {
const activeRef = useRef(null);
useEffect(() => {
- if (!chunkData) {
- setChunks([]);
- }
- // Split chunkData into chunks using \f\n delimiter
- const tempChunks = chunkData?.split("\f\n");
- // To remove " at the end
- if (tempChunks?.length > 0) {
- const lastChunk = tempChunks[tempChunks?.length - 1].trim();
- if (lastChunk === '\\n"' || lastChunk === "") {
- tempChunks.pop();
- }
- }
- setChunks(tempChunks);
+ setChunks(chunkData || []);
}, [chunkData]);
// Debounced search handler
diff --git a/prompt-service/src/unstract/prompt_service/helper.py b/prompt-service/src/unstract/prompt_service/helper.py
index ee730173e..0754b74c7 100644
--- a/prompt-service/src/unstract/prompt_service/helper.py
+++ b/prompt-service/src/unstract/prompt_service/helper.py
@@ -83,11 +83,11 @@ def plugin_loader(app: Flask) -> None:
initialize_plugin_endpoints(app=app)
-def get_cleaned_context(context: str) -> str:
+def get_cleaned_context(context: set[str]) -> list[str]:
clean_context_plugin: dict[str, Any] = plugins.get(PSKeys.CLEAN_CONTEXT, {})
if clean_context_plugin:
return clean_context_plugin["entrypoint_cls"].run(context=context)
- return context
+ return list(context)
def initialize_plugin_endpoints(app: Flask) -> None:
diff --git a/prompt-service/src/unstract/prompt_service/main.py b/prompt-service/src/unstract/prompt_service/main.py
index 15a46c8fd..8ba54d4ef 100644
--- a/prompt-service/src/unstract/prompt_service/main.py
+++ b/prompt-service/src/unstract/prompt_service/main.py
@@ -251,10 +251,10 @@ def prompt_processor() -> Any:
raise api_error
try:
- context = ""
+ context: set[str] = set()
if output[PSKeys.CHUNK_SIZE] == 0:
# We can do this only for chunkless indexes
- context: Optional[str] = index.query_index(
+ retrieved_context: Optional[str] = index.query_index(
embedding_instance_id=output[PSKeys.EMBEDDING],
vector_db_instance_id=output[PSKeys.VECTOR_DB],
doc_id=doc_id,
@@ -270,13 +270,13 @@ def prompt_processor() -> Any:
# inconsistent, and not reproducible easily,
# this is just a safety net.
time.sleep(2)
- context: Optional[str] = index.query_index(
+ retrieved_context: Optional[str] = index.query_index(
embedding_instance_id=output[PSKeys.EMBEDDING],
vector_db_instance_id=output[PSKeys.VECTOR_DB],
doc_id=doc_id,
usage_kwargs=usage_kwargs,
)
- if context is None:
+ if retrieved_context is None:
# TODO: Obtain user set name for vector DB
msg = NO_CONTEXT_ERROR
app.logger.error(
@@ -294,6 +294,7 @@ def prompt_processor() -> Any:
msg,
)
raise APIError(message=msg)
+ context.add(retrieved_context)
# TODO: Use vectorDB name when available
publish_log(
log_events_id,
@@ -323,7 +324,7 @@ def prompt_processor() -> Any:
tool_settings=tool_settings,
output=output,
llm=llm,
- context=context,
+ context="\n".join(context),
prompt="promptx",
metadata=metadata,
)
@@ -537,7 +538,7 @@ def prompt_processor() -> Any:
llm=llm,
challenge_llm=challenge_llm,
run_id=run_id,
- context=context,
+ context="\n".join(context),
tool_settings=tool_settings,
output=output,
structured_output=structured_output,
@@ -593,7 +594,7 @@ def prompt_processor() -> Any:
try:
evaluator = eval_plugin["entrypoint_cls"](
"",
- context,
+ "\n".join(context),
"",
"",
output,
@@ -680,7 +681,7 @@ def run_retrieval( # type:ignore
retrieval_type: str,
metadata: dict[str, Any],
) -> tuple[str, str]:
- context: str = ""
+ context: set[str] = set()
prompt = output[PSKeys.PROMPTX]
if retrieval_type == PSKeys.SUBQUESTION:
subq_prompt: str = (
@@ -713,19 +714,11 @@ def run_retrieval( # type:ignore
prompt=subq_prompt,
)
subquestion_list = subquestions.split(",")
- raw_retrieved_context = ""
for each_subq in subquestion_list:
retrieved_context = _retrieve_context(
output, doc_id, vector_index, each_subq
)
- # Not adding the potential for pinecode serverless
- # inconsistency issue owing to risk of infinte loop
- # and inablity to diffrentiate genuine cases of
- # empty context.
- raw_retrieved_context = "\f\n".join(
- [raw_retrieved_context, retrieved_context]
- )
- context = _remove_duplicate_nodes(raw_retrieved_context)
+ context.update(retrieved_context)
if retrieval_type == PSKeys.SIMPLE:
@@ -746,7 +739,7 @@ def run_retrieval( # type:ignore
tool_settings=tool_settings,
output=output,
llm=llm,
- context=context,
+ context="\n".join(context),
prompt="promptx",
metadata=metadata,
)
@@ -754,13 +747,7 @@ def run_retrieval( # type:ignore
return (answer, context)
-def _remove_duplicate_nodes(retrieved_context: str) -> str:
- context_set: set[str] = set(retrieved_context.split("\f\n"))
- fomatted_context = "\f\n".join(context_set)
- return fomatted_context
-
-
-def _retrieve_context(output, doc_id, vector_index, answer) -> str:
+def _retrieve_context(output, doc_id, vector_index, answer) -> set[str]:
retriever = vector_index.as_retriever(
similarity_top_k=output[PSKeys.SIMILARITY_TOP_K],
filters=MetadataFilters(
@@ -773,18 +760,18 @@ def _retrieve_context(output, doc_id, vector_index, answer) -> str:
),
)
nodes = retriever.retrieve(answer)
- text = ""
+ context: set[str] = set()
for node in nodes:
# ToDo: May have to fine-tune this value for node score or keep it
# configurable at the adapter level
if node.score > 0:
- text += node.get_content() + "\f\n"
+ context.add(node.get_content())
else:
app.logger.info(
"Node score is less than 0. "
f"Ignored: {node.node_id} with score {node.score}"
)
- return text
+ return context
def log_exceptions(e: HTTPException):