From f217c5f2c31e68b814644e310c22d92a58429e3e Mon Sep 17 00:00:00 2001 From: Daniel Swiecki Date: Thu, 15 Jun 2023 14:09:08 -0400 Subject: [PATCH 1/7] UPDATED: Added support for Azure and API key - Added properties `azureProxy`, `azureEndpoint`, `azureApiKey`, and `openAIEndpoint` to the `VectorStorage` class - Modified the constructor to accept these new options - UpdatedembedTexts` function to use the Azure endpoint and API key if provided, or fallback to the OpenAI API key - Adjusted the code to handle the different scenarios for API key and endpoint - Updated the `IVSOptions` interface to include the new options --- README.md | 8 +++- src/VectorStorage.ts | 94 ++++++++++++++++++++++++++++------------- src/types/IVSOptions.ts | 4 ++ 3 files changed, 75 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 5126d2b..8e29668 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,13 @@ Here is a basic example of how to use the VectorStorage class: import { VectorStorage } from "vector-storage"; // Create an instance of VectorStorage -const vectorStore = new VectorStorage({ openAIApiKey: "your-openai-api-key" }); +const vectorStore = new VectorStorage({ + azureProxy: "your-azure-endpoint-that-manages-your-api" + azureEndpoint: "your-azure-endpoint", + azureApiKey: "your-azure-api-key", + openAIEndpoint: "https://api.openai.com/v1/embeddings", + openAIApiKey: "your-openai-api-key" + }); // Add a text document to the store await vectorStore.addText("The quick brown fox jumps over the lazy dog.", { diff --git a/src/VectorStorage.ts b/src/VectorStorage.ts index 69c9cae..c7b6bb9 100644 --- a/src/VectorStorage.ts +++ b/src/VectorStorage.ts @@ -12,22 +12,30 @@ export class VectorStorage { private readonly maxSizeInMB: number; private readonly debounceTime: number; private readonly openaiModel: string; + private readonly openaiEndpoint?: string; private readonly openaiApiKey?: string; + private readonly azureEndpoint?: string; + private readonly azureApiKey?: string; + private readonly azureProxy?: string; private readonly embedTextsFn: (texts: string[]) => Promise; - + constructor(options: IVSOptions = {}) { this.maxSizeInMB = options.maxSizeInMB ?? constants.DEFAULT_MAX_SIZE_IN_MB; this.debounceTime = options.debounceTime ?? constants.DEFAULT_DEBOUNCE_TIME; this.openaiModel = options.openaiModel ?? constants.DEFAULT_OPENAI_MODEL; this.embedTextsFn = options.embedTextsFn ?? this.embedTexts; // Use the custom function if provided, else use the default one + this.openaiEndpoint = options.openAIEndpoint; this.openaiApiKey = options.openAIApiKey; - if (!this.openaiApiKey && !options.embedTextsFn) { - console.error('VectorStorage: pass as an option either an OpenAI API key or a custom embedTextsFn function.'); + this.azureEndpoint = options.azureEndpoint; + this.azureApiKey = options.azureApiKey; + this.azureProxy = options.azureProxy; + if (!this.azureProxy && !this.azureApiKey && !this.openaiApiKey && !options.embedTextsFn) { + console.error('VectorStorage: pass as an option either an api key or a custom embedTextsFn function.'); } else { this.loadFromIndexDbStorage(); } } - + public async addText(text: string, metadata: T): Promise> { // Create a document from the text and metadata const doc: IVSDocument = { @@ -40,7 +48,7 @@ export class VectorStorage { const docs = await this.addDocuments([doc]); return docs[0]; } - + public async addTexts(texts: string[], metadatas: T[]): Promise>> { if (texts.length !== metadatas.length) { throw new Error('The lengths of texts and metadata arrays must match.'); @@ -54,7 +62,7 @@ export class VectorStorage { })); return await this.addDocuments(docs); } - + public async similaritySearch(params: IVSSimilaritySearchParams): Promise<{ similarItems: Array>; query: { text: string; embedding: number[] }; @@ -82,7 +90,7 @@ export class VectorStorage { similarItems: results, }; } - + private async initDB(): Promise> { return await openDB('VectorStorageDatabase', undefined, { upgrade(db) { @@ -99,7 +107,7 @@ export class VectorStorage { }, }); } - + private async addDocuments(documents: Array>): Promise>> { // filter out already existing documents const newDocuments = documents.filter((doc) => !this.documents.some((d) => d.text === doc.text)); @@ -120,37 +128,63 @@ export class VectorStorage { await this.saveToIndexDbStorage(); return newDocuments; } - + private async embedTexts(texts: string[]): Promise { - const response = await fetch(constants.OPENAI_API_URL, { - body: JSON.stringify({ - input: texts, - model: this.openaiModel, - }), - headers: { - Authorization: `Bearer ${this.openaiApiKey}`, - 'Content-Type': 'application/json', - }, - method: 'POST', - }); - + let response; + if (this.azureProxy) { + response = await fetch(this.azureProxy, { + body: JSON.stringify({ + model: this.openaiModel, + texts, + }), + headers: { + 'Content-Type': 'application/json', + }, + method: 'POST', + }); + } else if (this.azureEndpoint && this.azureApiKey) { + response = await fetch(this.azureEndpoint, { + body: JSON.stringify({ + input: texts, + model: this.openaiModel, + }), + headers: { + 'Content-Type': 'application/json', + 'api-key': this.azureApiKey, + }, + method: 'POST', + }); + } else { + response = await fetch(this.openaiEndpoint ?? constants.OPENAI_API_URL, { + body: JSON.stringify({ + input: texts, + model: this.openaiModel, + }), + headers: { + Authorization: `Bearer ${this.openaiApiKey}`, + 'Content-Type': 'application/json', + }, + method: 'POST', + }); + } + if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } - + const responseData = (await response.json()) as ICreateEmbeddingResponse; return responseData.data.map((data) => data.embedding); } - + private async embedText(query: string): Promise { return (await this.embedTextsFn([query]))[0]; } - + private calculateMagnitude(embedding: number[]): number { const queryMagnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0)); return queryMagnitude; } - + private calculateSimilarityScores(filteredDocuments: Array>, queryVector: number[], queryMagnitude: number): Array<[IVSDocument, number]> { return filteredDocuments.map((doc) => { const dotProduct = doc.vector!.reduce((sum, val, i) => sum + val * queryVector[i], 0); @@ -159,13 +193,13 @@ export class VectorStorage { return [doc, score]; }); } - + private updateHitCounters(results: Array>): void { results.forEach((doc) => { doc.hits = (doc.hits ?? 0) + 1; // Update hit counter }); } - + private async loadFromIndexDbStorage(): Promise { if (!this.db) { this.db = await this.initDB(); @@ -173,7 +207,7 @@ export class VectorStorage { this.documents = await this.db.getAll('documents'); this.removeDocsLRU(); } - + private async saveToIndexDbStorage(): Promise { if (!this.db) { this.db = await this.initDB(); @@ -190,12 +224,12 @@ export class VectorStorage { console.error('Failed to save to IndexedDB:', error.message); } } - + private removeDocsLRU(): void { if (getObjectSizeInMB(this.documents) > this.maxSizeInMB) { // Sort documents by hit counter (ascending) and then by timestamp (ascending) this.documents.sort((a, b) => (a.hits ?? 0) - (b.hits ?? 0) || a.timestamp - b.timestamp); - + // Remove documents until the size is below the limit while (getObjectSizeInMB(this.documents) > this.maxSizeInMB) { this.documents.shift(); diff --git a/src/types/IVSOptions.ts b/src/types/IVSOptions.ts index 1f4222b..76516cf 100644 --- a/src/types/IVSOptions.ts +++ b/src/types/IVSOptions.ts @@ -1,4 +1,8 @@ export interface IVSOptions { + azureProxy?: string; // Azure endpoint which manages your api key + azureEndpoint?: string; // Azure endpoint + azureApiKey?: string; // Azure api key + openAIEndpoint?: string; // The OpenAI API key used for generating embeddings. openAIApiKey?: string; // The OpenAI API key used for generating embeddings. maxSizeInMB?: number; // The maximum size of the storage in megabytes. Defaults to 4.8. Cannot exceed 5. debounceTime?: number; // The debounce time in milliseconds for saving to local storage. Defaults to 0. From 9d25279e45cf76b796898c3a69e0827079eb8171 Mon Sep 17 00:00:00 2001 From: Daniel Swiecki Date: Fri, 16 Jun 2023 07:13:54 -0400 Subject: [PATCH 2/7] added prepare so it builds in my project --- package.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 1f63bca..0531afe 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,8 @@ "build": "tsc", "bump": "npm version patch", "package": "npm run lint && npm run build && npm run bump && npm publish", - "lint": "eslint --ext .ts src" + "lint": "eslint --ext .ts src", + "prepare": "npm run build" }, "author": "Nitai Aharoni", "license": "MIT", From ec49de245ec1c6673ff365a2d6426b9c2bf43251 Mon Sep 17 00:00:00 2001 From: Daniel Swiecki Date: Fri, 16 Jun 2023 16:17:32 -0400 Subject: [PATCH 3/7] UPDATED: method VectorStorage class. - The clearAll method clears all documents in the clearing the documents, the updated storage is saved to IndexDB. --- src/VectorStorage.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/VectorStorage.ts b/src/VectorStorage.ts index c7b6bb9..8e2c65c 100644 --- a/src/VectorStorage.ts +++ b/src/VectorStorage.ts @@ -90,6 +90,14 @@ export class VectorStorage { similarItems: results, }; } + + private async clearAll(): Promise { + while (this.documents.length > 0) { + this.documents.shift(); + } + await this.saveToIndexDbStorage(); + } + private async initDB(): Promise> { return await openDB('VectorStorageDatabase', undefined, { From 8d4c6db7874c6129a9d452f94417c6abac369e52 Mon Sep 17 00:00:00 2001 From: Daniel Swiecki Date: Fri, 16 Jun 2023 16:20:23 -0400 Subject: [PATCH 4/7] UPDATED: method VectorStorage class. - The clearAll method clears all documents in the clearing the documents, the updated storage is saved to IndexDB. --- src/VectorStorage.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/VectorStorage.ts b/src/VectorStorage.ts index 8e2c65c..7818363 100644 --- a/src/VectorStorage.ts +++ b/src/VectorStorage.ts @@ -91,7 +91,7 @@ export class VectorStorage { }; } - private async clearAll(): Promise { + public async clearAll(): Promise { while (this.documents.length > 0) { this.documents.shift(); } From baed280071a979cec2f245b06a9e6aef7767f613 Mon Sep 17 00:00:00 2001 From: Daniel Swiecki Date: Fri, 16 Jun 2023 16:33:53 -0400 Subject: [PATCH 5/7] UPDATED: to VectorStorage - Implemented clearMatching method in VectorStorage class, which removes matching documents based on filter options --- src/VectorStorage.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/VectorStorage.ts b/src/VectorStorage.ts index 7818363..74d430a 100644 --- a/src/VectorStorage.ts +++ b/src/VectorStorage.ts @@ -1,6 +1,7 @@ import { ICreateEmbeddingResponse } from './types/ICreateEmbeddingResponse'; import { IDBPDatabase, openDB } from 'idb'; import { IVSDocument, IVSSimilaritySearchItem } from './types/IVSDocument'; +import { IVSFilterOptions } from './types/IVSFilterOptions'; import { IVSOptions } from './types/IVSOptions'; import { IVSSimilaritySearchParams } from './types/IVSSimilaritySearchParams'; import { constants } from './common/constants'; @@ -98,6 +99,17 @@ export class VectorStorage { await this.saveToIndexDbStorage(); } + public async clearMatching(filterOptions: IVSFilterOptions): Promise { + const filteredDocuments = filterDocuments(this.documents, filterOptions); + filteredDocuments.forEach((doc) => { + const index = this.documents.findIndex((d) => d.text === doc.text); + if (index !== -1) { + this.documents.splice(index, 1); + } + }); + await this.saveToIndexDbStorage(); + } + private async initDB(): Promise> { return await openDB('VectorStorageDatabase', undefined, { From c1c71a5906cdd8099a161ecfe80a89caf3372591 Mon Sep 17 00:00:00 2001 From: Daniel Swiecki Date: Sat, 17 Jun 2023 05:58:50 -0400 Subject: [PATCH 6/7] UPDATED: VectorStorage class now has a method to retain matching documents. - Added new method `retainMatching to the VectorStorage class. - This method filters the documents based on the given filter options. - The filtered documents are stored in the `documents` property. - After filtering, documents are saved to the index DB storage. --- src/VectorStorage.ts | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/VectorStorage.ts b/src/VectorStorage.ts index 74d430a..a11c343 100644 --- a/src/VectorStorage.ts +++ b/src/VectorStorage.ts @@ -99,17 +99,11 @@ export class VectorStorage { await this.saveToIndexDbStorage(); } - public async clearMatching(filterOptions: IVSFilterOptions): Promise { + public async retainMatching(filterOptions: IVSFilterOptions): Promise { const filteredDocuments = filterDocuments(this.documents, filterOptions); - filteredDocuments.forEach((doc) => { - const index = this.documents.findIndex((d) => d.text === doc.text); - if (index !== -1) { - this.documents.splice(index, 1); - } - }); + this.documents = filteredDocuments; await this.saveToIndexDbStorage(); } - private async initDB(): Promise> { return await openDB('VectorStorageDatabase', undefined, { From 63b30126f1c6fc3155bf850f76a161844701de05 Mon Sep 17 00:00:00 2001 From: Daniel Swiecki Date: Sat, 17 Jun 2023 20:54:37 -0400 Subject: [PATCH 7/7] UPDATED: Changes to filterDocuments function in helpers.ts- Modified the matchesCriteria function to handle arrays in the criteria metadata - If the criteria metadata value is an array, it checks if the document metadata value is included in the array; otherwise, it checks for strict equality - This allows for more flexible filtering options for documents based on metadata --- src/common/helpers.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/common/helpers.ts b/src/common/helpers.ts index 3f64376..de88ff7 100644 --- a/src/common/helpers.ts +++ b/src/common/helpers.ts @@ -17,9 +17,11 @@ export function filterDocuments(documents: Array>, filterOption function matchesCriteria(document: IVSDocument, criteria: IVSFilterCriteria): boolean { if (criteria.metadata) { for (const key in criteria.metadata) { - if (document.metadata[key] !== criteria.metadata[key]) { - return false; - } + if (Array.isArray(criteria.metadata[key])) { + if (!criteria.metadata[key].includes(document.metadata[key])) { + return false; + } + } else if (document.metadata[key] !== criteria.metadata[key]) return false; } } if (criteria.text) {