diff --git a/README.md b/README.md index 4704eacb4..15bdd5519 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ The following collection shows Milvus versions and recommended PyMilvus versions | 2.1.\* | 2.1.3 | | 2.2.\* | 2.2.15 | | 2.3.\* | 2.3.7 | -| 2.4.\* | 2.4.0 | +| 2.4.\* | 2.4.9 | ## Installation @@ -43,7 +43,7 @@ $ pip3 install pymilvus[bulk_writer] # for bulk_writer You can install a specific version of PyMilvus by: ```shell -$ pip3 install pymilvus==2.3.7 +$ pip3 install pymilvus==2.4.9 ``` You can upgrade PyMilvus to the latest version by: diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 000000000..df635b4e6 --- /dev/null +++ b/examples/README.md @@ -0,0 +1 @@ +# Examples diff --git a/examples/milvus_client/alias.py b/examples/alias.py similarity index 100% rename from examples/milvus_client/alias.py rename to examples/alias.py diff --git a/examples/example_bulkinsert_json.py b/examples/bulk_import/example_bulkinsert_json.py similarity index 100% rename from examples/example_bulkinsert_json.py rename to examples/bulk_import/example_bulkinsert_json.py diff --git a/examples/example_bulkinsert_numpy.py b/examples/bulk_import/example_bulkinsert_numpy.py similarity index 100% rename from examples/example_bulkinsert_numpy.py rename to examples/bulk_import/example_bulkinsert_numpy.py diff --git a/examples/example_bulkwriter.py b/examples/bulk_import/example_bulkwriter.py similarity index 100% rename from examples/example_bulkwriter.py rename to examples/bulk_import/example_bulkwriter.py diff --git a/examples/data/train_embeddings.csv b/examples/bulk_import/train_embeddings.csv similarity index 100% rename from examples/data/train_embeddings.csv rename to examples/bulk_import/train_embeddings.csv diff --git a/examples/example_tls1.py b/examples/cert/example_tls1.py similarity index 100% rename from examples/example_tls1.py rename to examples/cert/example_tls1.py diff --git a/examples/example_tls2.py b/examples/cert/example_tls2.py similarity index 100% rename from examples/example_tls2.py rename to examples/cert/example_tls2.py diff --git a/examples/compact.py b/examples/compact.py new file mode 100644 index 000000000..5aaa73f2d --- /dev/null +++ b/examples/compact.py @@ -0,0 +1,83 @@ +import time +import numpy as np +from pymilvus import ( + MilvusClient, +) + +fmt = "\n=== {:30} ===\n" +dim = 8 +collection_name = "hello_milvus" +milvus_client = MilvusClient("http://localhost:19530") + +has_collection = milvus_client.has_collection(collection_name, timeout=5) +if has_collection: + milvus_client.drop_collection(collection_name) +milvus_client.create_collection(collection_name, dim, consistency_level="Strong", metric_type="L2") + +rng = np.random.default_rng(seed=19530) +rows = [ + {"id": 1, "vector": rng.random((1, dim))[0], "a": 100}, + {"id": 2, "vector": rng.random((1, dim))[0], "b": 200}, + {"id": 3, "vector": rng.random((1, dim))[0], "c": 300}, + {"id": 4, "vector": rng.random((1, dim))[0], "d": 400}, + {"id": 5, "vector": rng.random((1, dim))[0], "e": 500}, + {"id": 6, "vector": rng.random((1, dim))[0], "f": 600}, +] + +print(fmt.format("Start inserting entities")) +insert_result = milvus_client.insert(collection_name, rows) +print(fmt.format("Inserting entities done")) +print(insert_result) + +upsert_ret = milvus_client.upsert(collection_name, {"id": 2 , "vector": rng.random((1, dim))[0], "g": 100}) +print(upsert_ret) + +print(fmt.format("Start flush")) +milvus_client.flush(collection_name) +print(fmt.format("flush done")) + +result = milvus_client.query(collection_name, "", output_fields = ["count(*)"]) +print(f"final entities in {collection_name} is {result[0]['count(*)']}") + +rows = [ + {"id": 7, "vector": rng.random((1, dim))[0], "g": 700}, + {"id": 8, "vector": rng.random((1, dim))[0], "h": 800}, + {"id": 9, "vector": rng.random((1, dim))[0], "i": 900}, + {"id": 10, "vector": rng.random((1, dim))[0], "j": 1000}, + {"id": 11, "vector": rng.random((1, dim))[0], "k": 1100}, + {"id": 12, "vector": rng.random((1, dim))[0], "l": 1200}, +] + +print(fmt.format("Start inserting entities")) +insert_result = milvus_client.insert(collection_name, rows) +print(fmt.format("Inserting entities done")) +print(insert_result) + +print(fmt.format("Start flush")) +milvus_client.flush(collection_name) +print(fmt.format("flush done")) + +result = milvus_client.query(collection_name, "", output_fields = ["count(*)"]) +print(f"final entities in {collection_name} is {result[0]['count(*)']}") + +print(fmt.format("Start compact")) +job_id = milvus_client.compact(collection_name) +print(f"job_id:{job_id}") + +cnt = 0 +state = milvus_client.get_compaction_state(job_id) +while (state != "Completed" and cnt < 10): + time.sleep(1.0) + state = milvus_client.get_compaction_state(job_id) + print(f"compaction state: {state}") + cnt += 1 + +if state == "Completed": + print(fmt.format("compact done")) +else: + print(fmt.format("compact timeout")) + +result = milvus_client.query(collection_name, "", output_fields = ["count(*)"]) +print(f"final entities in {collection_name} is {result[0]['count(*)']}") + +milvus_client.drop_collection(collection_name) diff --git a/examples/multithreading_hello_milvus.py b/examples/concurrency/multithreading_hello_milvus.py similarity index 100% rename from examples/multithreading_hello_milvus.py rename to examples/concurrency/multithreading_hello_milvus.py diff --git a/examples/milvus_client/customize_schema.py b/examples/customize_schema.py similarity index 100% rename from examples/milvus_client/customize_schema.py rename to examples/customize_schema.py diff --git a/examples/milvus_client/customize_schema_auto_id.py b/examples/customize_schema_auto_id.py similarity index 100% rename from examples/milvus_client/customize_schema_auto_id.py rename to examples/customize_schema_auto_id.py diff --git a/examples/bfloat16_example.py b/examples/datatypes/bfloat16_example.py similarity index 100% rename from examples/bfloat16_example.py rename to examples/datatypes/bfloat16_example.py diff --git a/examples/binary_example.py b/examples/datatypes/binary_example.py similarity index 100% rename from examples/binary_example.py rename to examples/datatypes/binary_example.py diff --git a/examples/dynamic_field.py b/examples/datatypes/dynamic_field.py similarity index 100% rename from examples/dynamic_field.py rename to examples/datatypes/dynamic_field.py diff --git a/examples/example_str.py b/examples/datatypes/example_str.py similarity index 100% rename from examples/example_str.py rename to examples/datatypes/example_str.py diff --git a/examples/float16_example.py b/examples/datatypes/float16_example.py similarity index 100% rename from examples/float16_example.py rename to examples/datatypes/float16_example.py diff --git a/examples/fuzzy_match.py b/examples/datatypes/fuzzy_match.py similarity index 100% rename from examples/fuzzy_match.py rename to examples/datatypes/fuzzy_match.py diff --git a/examples/hello_milvus_array.py b/examples/datatypes/hello_milvus_array.py similarity index 100% rename from examples/hello_milvus_array.py rename to examples/datatypes/hello_milvus_array.py diff --git a/examples/hello_sparse.py b/examples/datatypes/hello_sparse.py similarity index 100% rename from examples/hello_sparse.py rename to examples/datatypes/hello_sparse.py diff --git a/examples/flush.py b/examples/flush.py new file mode 100644 index 000000000..c192a6812 --- /dev/null +++ b/examples/flush.py @@ -0,0 +1,57 @@ +import time +import numpy as np +from pymilvus import ( + MilvusClient, +) + +fmt = "\n=== {:30} ===\n" +dim = 8 +collection_name = "hello_milvus" +milvus_client = MilvusClient("http://localhost:19530") + +has_collection = milvus_client.has_collection(collection_name, timeout=5) +if has_collection: + milvus_client.drop_collection(collection_name) +milvus_client.create_collection(collection_name, dim, consistency_level="Strong", metric_type="L2") + +rng = np.random.default_rng(seed=19530) +rows = [ + {"id": 1, "vector": rng.random((1, dim))[0], "a": 100}, + {"id": 2, "vector": rng.random((1, dim))[0], "b": 200}, + {"id": 3, "vector": rng.random((1, dim))[0], "c": 300}, + {"id": 4, "vector": rng.random((1, dim))[0], "d": 400}, + {"id": 5, "vector": rng.random((1, dim))[0], "e": 500}, + {"id": 6, "vector": rng.random((1, dim))[0], "f": 600}, +] + +print(fmt.format("Start inserting entities")) +insert_result = milvus_client.insert(collection_name, rows) +print(fmt.format("Inserting entities done")) +print(insert_result) + +upsert_ret = milvus_client.upsert(collection_name, {"id": 2 , "vector": rng.random((1, dim))[0], "g": 100}) +print(upsert_ret) + +print(fmt.format("Start flush")) +milvus_client.flush(collection_name) +print(fmt.format("flush done")) + + +result = milvus_client.query(collection_name, "", output_fields = ["count(*)"]) +print(f"final entities in {collection_name} is {result[0]['count(*)']}") + + +print(f"start to delete by specifying filter in collection {collection_name}") +delete_result = milvus_client.delete(collection_name, ids=[6]) +print(delete_result) + + +print(fmt.format("Start flush")) +milvus_client.flush(collection_name) +print(fmt.format("flush done")) + + +result = milvus_client.query(collection_name, "", output_fields = ["count(*)"]) +print(f"final entities in {collection_name} is {result[0]['count(*)']}") + +milvus_client.drop_collection(collection_name) diff --git a/examples/get_server_version.py b/examples/get_server_version.py new file mode 100644 index 000000000..16b8bc708 --- /dev/null +++ b/examples/get_server_version.py @@ -0,0 +1,8 @@ +from pymilvus import ( + MilvusClient, +) + +milvus_client = MilvusClient("http://localhost:19530") + +version = milvus_client.get_server_version() +print(f"server version: {version}") diff --git a/examples/example_gpu_brute_force.py b/examples/gpu_indx/example_gpu_brute_force.py similarity index 100% rename from examples/example_gpu_brute_force.py rename to examples/gpu_indx/example_gpu_brute_force.py diff --git a/examples/example_gpu_cagra.py b/examples/gpu_indx/example_gpu_cagra.py similarity index 100% rename from examples/example_gpu_cagra.py rename to examples/gpu_indx/example_gpu_cagra.py diff --git a/examples/hybrid_search.py b/examples/hybrid_search.py index 6a13045f0..28ae0b309 100644 --- a/examples/hybrid_search.py +++ b/examples/hybrid_search.py @@ -1,9 +1,7 @@ import numpy as np from pymilvus import ( - connections, - utility, - FieldSchema, CollectionSchema, DataType, - Collection, + MilvusClient, + DataType, AnnSearchRequest, RRFRanker, WeightedRanker, ) @@ -11,25 +9,26 @@ search_latency_fmt = "search latency = {:.4f}s" num_entities, dim = 3000, 8 -print(fmt.format("start connecting to Milvus")) -connections.connect("default", host="localhost", port="19530") +collection_name = "hello_milvus" +milvus_client = MilvusClient("http://localhost:19530") -has = utility.has_collection("hello_milvus") -print(f"Does collection hello_milvus exist in Milvus: {has}") -if has: - utility.drop_collection("hello_milvus") +has_collection = milvus_client.has_collection(collection_name, timeout=5) +if has_collection: + milvus_client.drop_collection(collection_name) -fields = [ - FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100), - FieldSchema(name="random", dtype=DataType.DOUBLE), - FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim), - FieldSchema(name="embeddings2", dtype=DataType.FLOAT_VECTOR, dim=dim) -] +schema = milvus_client.create_schema(auto_id=False, description="hello_milvus is the simplest demo to introduce the APIs") +schema.add_field("pk", DataType.VARCHAR, is_primary=True, max_length=100) +schema.add_field("random", DataType.DOUBLE) +schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=dim) +schema.add_field("embeddings2", DataType.FLOAT_VECTOR, dim=dim) -schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs") +index_params = milvus_client.prepare_index_params() +index_params.add_index(field_name = "embeddings", index_type = "IVF_FLAT", metric_type="L2", nlist=128) +index_params.add_index(field_name = "embeddings2",index_type = "IVF_FLAT", metric_type="L2", nlist=128) print(fmt.format("Create collection `hello_milvus`")) -hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong", num_shards = 4) + +milvus_client.create_collection(collection_name, schema=schema, index_params=index_params, consistency_level="Strong") print(fmt.format("Start inserting entities")) rng = np.random.default_rng(seed=19530) @@ -41,29 +40,19 @@ rng.random((num_entities, dim)), # field embeddings2, supports numpy.ndarray and list ] -insert_result = hello_milvus.insert(entities) +rows = [ {"pk": entities[0][i], "random": entities[1][i], "embeddings": entities[2][i], "embeddings2": entities[3][i]} for i in range (num_entities)] -hello_milvus.flush() -print(f"Number of entities in Milvus: {hello_milvus.num_entities}") # check the num_entities +insert_result = milvus_client.insert(collection_name, rows) -print(fmt.format("Start Creating index IVF_FLAT")) -index = { - "index_type": "IVF_FLAT", - "metric_type": "L2", - "params": {"nlist": 128}, -} - -hello_milvus.create_index("embeddings", index) -hello_milvus.create_index("embeddings2", index) print(fmt.format("Start loading")) -hello_milvus.load() +milvus_client.load_collection(collection_name) field_names = ["embeddings", "embeddings2"] +field_names = ["embeddings"] req_list = [] nq = 1 -weights = [0.2, 0.3] default_limit = 5 vectors_to_search = [] @@ -79,15 +68,8 @@ req = AnnSearchRequest(**search_param) req_list.append(req) -hybrid_res = hello_milvus.hybrid_search(req_list, WeightedRanker(*weights), default_limit, output_fields=["random"]) - -print("rank by WightedRanker") -for hits in hybrid_res: - for hit in hits: - print(f" hybrid search hit: {hit}") - print("rank by RRFRanker") -hybrid_res = hello_milvus.hybrid_search(req_list, RRFRanker(), default_limit, output_fields=["random"]) +hybrid_res = milvus_client.hybrid_search(collection_name, req_list, RRFRanker(), default_limit, output_fields=["random"]) for hits in hybrid_res: for hit in hits: print(f" hybrid search hit: {hit}") diff --git a/examples/hello_hybrid_sparse_dense.py b/examples/hybrid_search/hello_hybrid_sparse_dense.py similarity index 100% rename from examples/hello_hybrid_sparse_dense.py rename to examples/hybrid_search/hello_hybrid_sparse_dense.py diff --git a/examples/hybrid_search/hybrid_search.py b/examples/hybrid_search/hybrid_search.py new file mode 100644 index 000000000..6a13045f0 --- /dev/null +++ b/examples/hybrid_search/hybrid_search.py @@ -0,0 +1,93 @@ +import numpy as np +from pymilvus import ( + connections, + utility, + FieldSchema, CollectionSchema, DataType, + Collection, + AnnSearchRequest, RRFRanker, WeightedRanker, +) + +fmt = "\n=== {:30} ===\n" +search_latency_fmt = "search latency = {:.4f}s" +num_entities, dim = 3000, 8 + +print(fmt.format("start connecting to Milvus")) +connections.connect("default", host="localhost", port="19530") + +has = utility.has_collection("hello_milvus") +print(f"Does collection hello_milvus exist in Milvus: {has}") +if has: + utility.drop_collection("hello_milvus") + +fields = [ + FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100), + FieldSchema(name="random", dtype=DataType.DOUBLE), + FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim), + FieldSchema(name="embeddings2", dtype=DataType.FLOAT_VECTOR, dim=dim) +] + +schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs") + +print(fmt.format("Create collection `hello_milvus`")) +hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong", num_shards = 4) + +print(fmt.format("Start inserting entities")) +rng = np.random.default_rng(seed=19530) +entities = [ + # provide the pk field because `auto_id` is set to False + [str(i) for i in range(num_entities)], + rng.random(num_entities).tolist(), # field random, only supports list + rng.random((num_entities, dim)), # field embeddings, supports numpy.ndarray and list + rng.random((num_entities, dim)), # field embeddings2, supports numpy.ndarray and list +] + +insert_result = hello_milvus.insert(entities) + +hello_milvus.flush() +print(f"Number of entities in Milvus: {hello_milvus.num_entities}") # check the num_entities + +print(fmt.format("Start Creating index IVF_FLAT")) +index = { + "index_type": "IVF_FLAT", + "metric_type": "L2", + "params": {"nlist": 128}, +} + +hello_milvus.create_index("embeddings", index) +hello_milvus.create_index("embeddings2", index) + +print(fmt.format("Start loading")) +hello_milvus.load() + +field_names = ["embeddings", "embeddings2"] + +req_list = [] +nq = 1 +weights = [0.2, 0.3] +default_limit = 5 +vectors_to_search = [] + +for i in range(len(field_names)): + # 4. generate search data + vectors_to_search = rng.random((nq, dim)) + search_param = { + "data": vectors_to_search, + "anns_field": field_names[i], + "param": {"metric_type": "L2"}, + "limit": default_limit, + "expr": "random > 0.5"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + +hybrid_res = hello_milvus.hybrid_search(req_list, WeightedRanker(*weights), default_limit, output_fields=["random"]) + +print("rank by WightedRanker") +for hits in hybrid_res: + for hit in hits: + print(f" hybrid search hit: {hit}") + +print("rank by RRFRanker") +hybrid_res = hello_milvus.hybrid_search(req_list, RRFRanker(), default_limit, output_fields=["random"]) +for hits in hybrid_res: + for hit in hits: + print(f" hybrid search hit: {hit}") diff --git a/examples/milvus_client/index.py b/examples/index.py similarity index 100% rename from examples/milvus_client/index.py rename to examples/index.py diff --git a/examples/milvus_client/index_params.py b/examples/index_params.py similarity index 100% rename from examples/milvus_client/index_params.py rename to examples/index_params.py diff --git a/examples/milvus_client/hybrid_search.py b/examples/milvus_client/hybrid_search.py deleted file mode 100644 index 28ae0b309..000000000 --- a/examples/milvus_client/hybrid_search.py +++ /dev/null @@ -1,75 +0,0 @@ -import numpy as np -from pymilvus import ( - MilvusClient, - DataType, - AnnSearchRequest, RRFRanker, WeightedRanker, -) - -fmt = "\n=== {:30} ===\n" -search_latency_fmt = "search latency = {:.4f}s" -num_entities, dim = 3000, 8 - -collection_name = "hello_milvus" -milvus_client = MilvusClient("http://localhost:19530") - -has_collection = milvus_client.has_collection(collection_name, timeout=5) -if has_collection: - milvus_client.drop_collection(collection_name) - -schema = milvus_client.create_schema(auto_id=False, description="hello_milvus is the simplest demo to introduce the APIs") -schema.add_field("pk", DataType.VARCHAR, is_primary=True, max_length=100) -schema.add_field("random", DataType.DOUBLE) -schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=dim) -schema.add_field("embeddings2", DataType.FLOAT_VECTOR, dim=dim) - -index_params = milvus_client.prepare_index_params() -index_params.add_index(field_name = "embeddings", index_type = "IVF_FLAT", metric_type="L2", nlist=128) -index_params.add_index(field_name = "embeddings2",index_type = "IVF_FLAT", metric_type="L2", nlist=128) - -print(fmt.format("Create collection `hello_milvus`")) - -milvus_client.create_collection(collection_name, schema=schema, index_params=index_params, consistency_level="Strong") - -print(fmt.format("Start inserting entities")) -rng = np.random.default_rng(seed=19530) -entities = [ - # provide the pk field because `auto_id` is set to False - [str(i) for i in range(num_entities)], - rng.random(num_entities).tolist(), # field random, only supports list - rng.random((num_entities, dim)), # field embeddings, supports numpy.ndarray and list - rng.random((num_entities, dim)), # field embeddings2, supports numpy.ndarray and list -] - -rows = [ {"pk": entities[0][i], "random": entities[1][i], "embeddings": entities[2][i], "embeddings2": entities[3][i]} for i in range (num_entities)] - -insert_result = milvus_client.insert(collection_name, rows) - - -print(fmt.format("Start loading")) -milvus_client.load_collection(collection_name) - -field_names = ["embeddings", "embeddings2"] -field_names = ["embeddings"] - -req_list = [] -nq = 1 -default_limit = 5 -vectors_to_search = [] - -for i in range(len(field_names)): - # 4. generate search data - vectors_to_search = rng.random((nq, dim)) - search_param = { - "data": vectors_to_search, - "anns_field": field_names[i], - "param": {"metric_type": "L2"}, - "limit": default_limit, - "expr": "random > 0.5"} - req = AnnSearchRequest(**search_param) - req_list.append(req) - -print("rank by RRFRanker") -hybrid_res = milvus_client.hybrid_search(collection_name, req_list, RRFRanker(), default_limit, output_fields=["random"]) -for hits in hybrid_res: - for hit in hits: - print(f" hybrid search hit: {hit}") diff --git a/examples/milvus_client/partition.py b/examples/milvus_client/partition.py deleted file mode 100644 index 7466c034a..000000000 --- a/examples/milvus_client/partition.py +++ /dev/null @@ -1,85 +0,0 @@ -import time -import numpy as np -from pymilvus import ( - MilvusClient, -) - -fmt = "\n=== {:30} ===\n" -dim = 8 -collection_name = "hello_milvus" -milvus_client = MilvusClient("http://localhost:19530") - -has_collection = milvus_client.has_collection(collection_name, timeout=5) -if has_collection: - milvus_client.drop_collection(collection_name) -milvus_client.create_collection(collection_name, dim, consistency_level="Strong", metric_type="L2") - -print(fmt.format(" all collections ")) -print(milvus_client.list_collections()) - -print(fmt.format(f"schema of collection {collection_name}")) -print(milvus_client.describe_collection(collection_name)) - -rng = np.random.default_rng(seed=19530) - -milvus_client.create_partition(collection_name, partition_name = "p1") -milvus_client.insert(collection_name, {"id": 1, "vector": rng.random((1, dim))[0], "a": 100}, partition_name = "p1") -milvus_client.insert(collection_name, {"id": 2, "vector": rng.random((1, dim))[0], "b": 200}, partition_name = "p1") -milvus_client.insert(collection_name, {"id": 3, "vector": rng.random((1, dim))[0], "c": 300}, partition_name = "p1") - -milvus_client.create_partition(collection_name, partition_name = "p2") -milvus_client.insert(collection_name, {"id": 4, "vector": rng.random((1, dim))[0], "e": 400}, partition_name = "p2") -milvus_client.insert(collection_name, {"id": 5, "vector": rng.random((1, dim))[0], "f": 500}, partition_name = "p2") -milvus_client.insert(collection_name, {"id": 6, "vector": rng.random((1, dim))[0], "g": 600}, partition_name = "p2") - -has_p1 = milvus_client.has_partition(collection_name, "p1") -print("has partition p1", has_p1) - -has_p3 = milvus_client.has_partition(collection_name, "p3") -print("has partition p3", has_p3) - -partitions = milvus_client.list_partitions(collection_name) -print("partitions:", partitions) - -milvus_client.release_collection(collection_name) -milvus_client.load_partitions(collection_name, partition_names =["p1", "p2"]) - -print(fmt.format("Start search in partiton p1")) -vectors_to_search = rng.random((1, dim)) -result = milvus_client.search(collection_name, vectors_to_search, limit=3, output_fields=["pk", "a", "b"], partition_names = ["p1"]) -for hits in result: - for hit in hits: - print(f"hit: {hit}") - -milvus_client.release_partitions(collection_name, partition_names = ["p1"]) -milvus_client.drop_partition(collection_name, partition_name = "p1", timeout = 2.0) -print("successfully drop partition p1") - -try: - milvus_client.drop_partition(collection_name, partition_name = "p2", timeout = 2.0) -except Exception as e: - print(f"cacthed {e}") - -has_p1 = milvus_client.has_partition(collection_name, "p1") -print("has partition of p1:", has_p1) - -print(fmt.format("Start query by specifying primary keys")) -query_results = milvus_client.query(collection_name, ids=[2]) -assert len(query_results) == 0 - -print(fmt.format("Start query by specifying primary keys")) -query_results = milvus_client.query(collection_name, ids=[4]) -print(query_results[0]) - -print(fmt.format("Start query by specifying filtering expression")) -query_results = milvus_client.query(collection_name, filter= "f == 500") -for ret in query_results: - print(ret) - -print(fmt.format(f"Start search with retrieve serveral fields.")) -result = milvus_client.search(collection_name, vectors_to_search, limit=3, output_fields=["pk", "a", "b"]) -for hits in result: - for hit in hits: - print(f"hit: {hit}") - -milvus_client.drop_collection(collection_name) diff --git a/examples/hello_model.py b/examples/milvus_model/hello_model.py similarity index 100% rename from examples/hello_model.py rename to examples/milvus_model/hello_model.py diff --git a/examples/milvus_client/non_ascii_encode.py b/examples/non_ascii_encode.py similarity index 100% rename from examples/milvus_client/non_ascii_encode.py rename to examples/non_ascii_encode.py diff --git a/examples/old_style_example.py b/examples/old_style_example.py deleted file mode 100644 index 205bde80c..000000000 --- a/examples/old_style_example.py +++ /dev/null @@ -1,152 +0,0 @@ -import random - -from pymilvus import Milvus, DataType - -# This example shows how to: -# 1. connect to Milvus server -# 2. create a collection -# 3. insert entities -# 4. create index -# 5. search - -_HOST = '127.0.0.1' -_PORT = '19530' - -# Const names -_COLLECTION_NAME = 'demo' -_ID_FIELD_NAME = 'id_field' -_VECTOR_FIELD_NAME = 'float_vector_field' - -# Vector parameters -_DIM = 128 -_INDEX_FILE_SIZE = 32 # max file size of stored index - -# Index parameters -_METRIC_TYPE = 'L2' -_INDEX_TYPE = 'IVF_FLAT' -_NLIST = 1024 -_NPROBE = 16 -_TOPK = 10 - -# Create milvus client instance -milvus = Milvus(_HOST, _PORT) - - -def create_collection(name): - id_field = { - "name": _ID_FIELD_NAME, - "type": DataType.INT64, - "auto_id": True, - "is_primary": True, - } - vector_field = { - "name": _VECTOR_FIELD_NAME, - "type": DataType.FLOAT_VECTOR, - "params": {"dim": _DIM}, - } - fields = {"fields": [id_field, vector_field]} - - milvus.create_collection(collection_name=name, fields=fields) - print("collection created:", name) - - -def drop_collection(name): - if milvus.has_collection(name): - milvus.drop_collection(name) - print("collection dropped:", name) - - -def list_collections(): - collections = milvus.list_collections() - print("list collection:") - print(collections) - - -def get_collection_stats(name): - stats = milvus.get_collection_stats(name) - print("collection stats:") - print(stats) - - -def insert(name, num, dim): - vectors = [[random.random() for _ in range(dim)] for _ in range(num)] - entities = [ - {"name": _VECTOR_FIELD_NAME, "type": DataType.FLOAT_VECTOR, "values": vectors}, - ] - ids = milvus.insert(name, entities) - return ids, vectors - - -def flush(name): - milvus.flush([name]) - - -def create_index(name, field_name): - index_param = { - "metric_type": _METRIC_TYPE, - "index_type": _INDEX_TYPE, - "params": {"nlist": _NLIST} - } - milvus.create_index(name, field_name, index_param) - print("Create index: {}".format(index_param)) - - -def drop_index(name, field_name): - milvus.drop_index(name, field_name) - print("Drop index:", field_name) - - -def load_collection(name): - milvus.load_collection(name) - - -def release_collection(name): - milvus.release_collection(name) - - -def search(name, vector_field, search_vectors, ids): - nq = len(search_vectors) - search_params = {"metric_type": _METRIC_TYPE, "params": {"nprobe": _NPROBE}} - results = milvus.search(name, search_vectors, vector_field, param=search_params, limit=_TOPK) - for i in range(nq): - if results[i][0].distance == 0.0 or results[i][0].id == ids[0]: - print("OK! search results: ", results[i][0].entity) - else: - print("FAIL! search results: ", results[i][0].entity) - - -def main(): - name = _COLLECTION_NAME - vector_field = _VECTOR_FIELD_NAME - - drop_collection(name) - create_collection(name) - - # show collections - list_collections() - - # generate 10000 vectors with 128 dimension - ids, vectors = insert(name, 10000, _DIM) - - # flush - flush(name) - - # show row_count - get_collection_stats(name) - - # create index - create_index(name, vector_field) - - # load - load_collection(name) - - # search - search(name, vector_field, vectors[:10], ids) - - drop_index(name, vector_field) - release_collection(name) - drop_collection(name) - - -if __name__ == '__main__': - main() diff --git a/examples/old_style_example_index.py b/examples/old_style_example_index.py deleted file mode 100644 index 3ca3487c6..000000000 --- a/examples/old_style_example_index.py +++ /dev/null @@ -1,155 +0,0 @@ -""" -This is an example of creating index - -We will be using films.csv file, You can obtain it from here -(https://raw.githubusercontent.com/milvus-io/pymilvus/0.3.0/examples/films.csv) -There are 4 coloumns in films.csv, they are `id`, `title`, `release_year` and `embedding`, all -the data are from MovieLens `ml-latest-small` data except id and embedding, those two columns are fake values. - -We will be using `films.csv` to demenstrate how can we build index and search by index on Milvus. -We assuming you have read `example.py` and have a basic conception about how to communicate with Milvus using -pymilvus - -This example is runable for Milvus(0.11.x) and pymilvus(0.3.x). -""" -import csv -import random -from pprint import pprint - -from pymilvus import Milvus, DataType - -_HOST = '127.0.0.1' -_PORT = '19530' -client = Milvus(_HOST, _PORT) - -collection_name = 'demo_index' -if collection_name in client.list_collections(): - client.drop_collection(collection_name) - -collection_param = { - "fields": [ - {"name": "id", "type": DataType.INT64, "is_primary": True}, - {"name": "release_year", "type": DataType.INT64}, - {"name": "embedding", "type": DataType.FLOAT_VECTOR, "params": {"dim": 8}}, - ], -} - -client.create_collection(collection_name, collection_param) - -# ------ -# Basic create index: -# Now that we have a collection in Milvus with `segment_row_limit` 4096, we can create index or -# insert entities. -# -# We can execute `create_index` BEFORE we insert any entites or AFTER. However Milvus won't actually -# start build index task if the segment row count is smaller than `segment_row_limit`. So if we want -# to make Milvus build index right away, we need to insert number of entities larger than -# `segment_row_limit` -# -# We are going to use data in `films.csv` so you can checkout the structure. And we need to group -# data with same fields together, so here is a example of how we obtain the data in files and transfer -# them into what we need. -# ------ - -ids = [] # ids -titles = [] # titles -release_years = [] # release year -embeddings = [] # embeddings -films = [] -with open('films.csv', 'r') as csvfile: - reader = csv.reader(csvfile) - films = [film for film in reader] - -for film in films: - ids.append(int(film[0])) - titles.append(film[1]) - release_years.append(int(film[2])) - embeddings.append(list(map(float, film[3][1:][:-1].split(',')))) - -hybrid_entities = [ - {"name": "id", "values": ids, "type": DataType.INT64}, - {"name": "release_year", "values": release_years, "type": DataType.INT64}, - {"name": "embedding", "values": embeddings, "type": DataType.FLOAT_VECTOR}, -] - -# ------ -# Basic insert: -# After preparing the data, we are going to insert them into our collection. -# The number of films inserted should be 8657. -# ------ -ids = client.insert(collection_name, hybrid_entities) - -client.flush([collection_name]) -after_flush_counts = client.get_collection_stats(collection_name) -print(" > There are {} films in collection `{}` after flush".format(after_flush_counts, collection_name)) - -# ------ -# Basic create index: -# Now that we have insert all the films into Milvus, we are going to build index with these datas. -# -# While build index, we have to indicate which `field` to build index for, the `index_type`, -# `metric_type` and params for the specific index type. In our case, we want to build a `IVF_FLAT` -# index, so the specific params are "nlist". See pymilvus documentation -# (https://milvus-io.github.io/milvus-sdk-python/pythondoc/v0.3.0/index.html) for `index_type` we -# support and the params accordingly -# -# If there are already index for a collection and you run `create_index` with different params the -# older index will be replaced by new one. -# ------ -client.create_index(collection_name, "embedding", - {"index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 100}}) - -# ------ -# Basic create index: -# We can get the detail of the index by `describe_collection` -# ------ -info = client.describe_collection(collection_name) -pprint(info) - -# ------ -# Basic load collection: -# Before search, we need to load collection data into memory. -# ------ -client.load_collection(collection_name) - -# ------ -# Basic search with expressions: -# If we want to use index, the specific index params need to be provided, in our case, the "params" -# should be "nprobe", if no "params" given, Milvus will complain about it and raise a exception. -# ------ -embedding2search = [[random.random() for _ in range(8)] for _ in range(1)] -search_param = { - "data": embedding2search, - "anns_field": "embedding", - "param": {"metric_type": "L2", "params": {"nprobe": 8}}, - "limit": 3, - "output_fields": ["release_year"], - "expression": "release_year in [1995, 2002]", -} - -# ------ -# Basic hybrid search entities -# ------ -results = client.search(collection_name, **search_param) -for entities in results: - for topk_film in entities: - current_entity = topk_film.entity - print("==") - print(f"- id: {topk_film.id}") - print(f"- title: {titles[topk_film.id]}") - print(f"- distance: {topk_film.distance}") - print(f"- release_year: {current_entity.release_year}") - -# ------ -# Basic delete index: -# You can drop index we create -# ------ -client.drop_index(collection_name, "embedding") - -if collection_name in client.list_collections(): - client.drop_collection(collection_name) - -# ------ -# Summary: -# Now we've went through some basic build index operations, hope it's helpful! -# ------ diff --git a/examples/old_style_example_str.py b/examples/old_style_example_str.py deleted file mode 100644 index 83ec354ae..000000000 --- a/examples/old_style_example_str.py +++ /dev/null @@ -1,163 +0,0 @@ -import random - -from pymilvus import Milvus, DataType - -# This example shows how to: -# 1. connect to Milvus server -# 2. create a collection -# 3. insert entities -# 4. create index -# 5. search - -_HOST = '127.0.0.1' -_PORT = '19530' - -# Const names -_COLLECTION_NAME = 'demo' -_ID_FIELD_NAME = 'id_field' -_VECTOR_FIELD_NAME = 'float_vector_field' - -# Vector parameters -_DIM = 128 -_INDEX_FILE_SIZE = 32 # max file size of stored index - -# Index parameters -_METRIC_TYPE = 'L2' -_INDEX_TYPE = 'IVF_FLAT' -_NLIST = 1024 -_NPROBE = 16 -_TOPK = 10 - -# string -_STR_FIELD_NAME = "str" -_MAX_LEN = 1024 - -# Create milvus client instance -milvus = Milvus(_HOST, _PORT) - - -def create_collection(name): - id_field = { - "name": _ID_FIELD_NAME, - "type": DataType.INT64, - "auto_id": True, - "is_primary": True, - } - vector_field = { - "name": _VECTOR_FIELD_NAME, - "type": DataType.FLOAT_VECTOR, - "params": {"dim": _DIM}, - } - str_field = { - "name": _STR_FIELD_NAME, - "type": DataType.VARCHAR, - "params": {"max_length": _MAX_LEN}, - } - fields = {"fields": [id_field, vector_field, str_field]} - - milvus.create_collection(collection_name=name, fields=fields) - print("collection created:", name) - - -def drop_collection(name): - if milvus.has_collection(name): - milvus.drop_collection(name) - print("collection dropped:", name) - - -def list_collections(): - collections = milvus.list_collections() - print("list collection:") - print(collections) - - -def get_collection_stats(name): - stats = milvus.get_collection_stats(name) - print("collection stats:") - print(stats) - - -def insert(name, num, dim): - vectors = [[random.random() for _ in range(dim)] for _ in range(num)] - strs = [str(random.random()) for _ in range(num)] - entities = [ - {"name": _VECTOR_FIELD_NAME, "type": DataType.FLOAT_VECTOR, "values": vectors}, - {"name": _STR_FIELD_NAME, "type": DataType.VARCHAR, "values": strs}, - ] - ids = milvus.insert(name, entities) - return ids, vectors - - -def flush(name): - milvus.flush([name]) - - -def create_index(name, field_name): - index_param = { - "metric_type": _METRIC_TYPE, - "index_type": _INDEX_TYPE, - "params": {"nlist": _NLIST} - } - milvus.create_index(name, field_name, index_param) - print("Create index: {}".format(index_param)) - - -def drop_index(name, field_name): - milvus.drop_index(name, field_name) - print("Drop index:", field_name) - - -def load_collection(name): - milvus.load_collection(name) - - -def release_collection(name): - milvus.release_collection(name) - - -def search(name, vector_field, search_vectors, ids): - nq = len(search_vectors) - search_params = {"metric_type": _METRIC_TYPE, "params": {"nprobe": _NPROBE}} - results = milvus.search(name, search_vectors, vector_field, param=search_params, limit=_TOPK) - for i in range(nq): - if results[i][0].distance == 0.0 or results[i][0].id == ids[0]: - print("OK! search results: ", results[i][0].entity) - else: - print("FAIL! search results: ", results[i][0].entity) - - -def main(): - name = _COLLECTION_NAME - vector_field = _VECTOR_FIELD_NAME - - drop_collection(name) - create_collection(name) - - # show collections - list_collections() - - # generate 10000 vectors with 128 dimension - ids, vectors = insert(name, 10000, _DIM) - - # flush - flush(name) - - # show row_count - get_collection_stats(name) - - # create index - create_index(name, vector_field) - - # load - load_collection(name) - - # search - search(name, vector_field, vectors[:10], ids) - - drop_index(name, vector_field) - release_collection(name) - drop_collection(name) - - -if __name__ == '__main__': - main() diff --git a/examples/old_style_query.py b/examples/old_style_query.py deleted file mode 100644 index 0a031cd9f..000000000 --- a/examples/old_style_query.py +++ /dev/null @@ -1,62 +0,0 @@ -import random - -from pymilvus import Milvus, DataType - -if __name__ == "__main__": - c = Milvus("localhost", "19530") - - collection_name = f"test_{random.randint(10000, 99999)}" - - c.create_collection(collection_name, {"fields": [ - { - "name": "f1", - "type": DataType.FLOAT_VECTOR, - "metric_type": "L2", - "params": {"dim": 4}, - "indexes": [{"metric_type": "L2"}] - }, - { - "name": "age", - "type": DataType.FLOAT, - }, - { - "name": "id", - "type": DataType.INT64, - "auto_id": True, - "is_primary": True, - } - ], - }, orm=True) - - assert c.has_collection(collection_name) - - ids = c.insert(collection_name, [ - {"name": "f1", "type": DataType.FLOAT_VECTOR, "values": [[1.1, 2.2, 3.3, 4.4], [5.5, 6.6, 7.7, 8.8]]}, - {"name": "age", "type": DataType.FLOAT, "values": [3.45, 8.9]} - ], orm=True) - - c.flush([collection_name]) - - c.load_collection(collection_name) - - ############################################################# - search_params = {"metric_type": "L2", "params": {"nprobe": 1}} - - results = c.search(collection_name, [[1.1, 2.2, 3.3, 4.4]], - "f1", param=search_params, limit=2, output_fields=["id"]) - - print("search results: ", results[0][0].entity, " + ", results[0][1].entity) - # - # print("Test entity.get: ", results[0][0].entity.get("age")) - # print("Test entity.value_of_field: ", results[0][0].entity.value_of_field("age")) - # print("Test entity.fields: ", results[0][0].entity.fields) - ############################################################# - - ids_expr = ",".join(str(x) for x in ids.primary_keys) - - expr = "id in [ " + ids_expr + " ] " - - print(expr) - - res = c.query(collection_name, expr) - print(res) diff --git a/examples/collection.py b/examples/orm/collection.py similarity index 100% rename from examples/collection.py rename to examples/orm/collection.py diff --git a/examples/database.py b/examples/orm/database.py similarity index 100% rename from examples/database.py rename to examples/orm/database.py diff --git a/examples/example.py b/examples/orm/example.py similarity index 100% rename from examples/example.py rename to examples/orm/example.py diff --git a/examples/example_group_by.py b/examples/orm/example_group_by.py similarity index 100% rename from examples/example_group_by.py rename to examples/orm/example_group_by.py diff --git a/examples/example_index.py b/examples/orm/example_index.py similarity index 100% rename from examples/example_index.py rename to examples/orm/example_index.py diff --git a/examples/hello_cost.py b/examples/orm/hello_cost.py similarity index 100% rename from examples/hello_cost.py rename to examples/orm/hello_cost.py diff --git a/examples/hello_milvus.ipynb b/examples/orm/hello_milvus.ipynb similarity index 100% rename from examples/hello_milvus.ipynb rename to examples/orm/hello_milvus.ipynb diff --git a/examples/hello_milvus.py b/examples/orm/hello_milvus.py similarity index 100% rename from examples/hello_milvus.py rename to examples/orm/hello_milvus.py diff --git a/examples/hello_milvus_delete.py b/examples/orm/hello_milvus_delete.py similarity index 100% rename from examples/hello_milvus_delete.py rename to examples/orm/hello_milvus_delete.py diff --git a/examples/inverted_index_example.py b/examples/orm/inverted_index_example.py similarity index 100% rename from examples/inverted_index_example.py rename to examples/orm/inverted_index_example.py diff --git a/examples/iterator.py b/examples/orm/iterator.py similarity index 100% rename from examples/iterator.py rename to examples/orm/iterator.py diff --git a/examples/orm/partition.py b/examples/orm/partition.py new file mode 100644 index 000000000..3c6bb30ce --- /dev/null +++ b/examples/orm/partition.py @@ -0,0 +1,148 @@ +# Copyright (C) 2019-2020 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under the License. + +from pymilvus import ( + connections, list_collections, has_partition, + FieldSchema, CollectionSchema, DataType, + Collection, Partition +) + +import random +import string + +default_dim = 128 +default_nb = 3000 +default_float_vec_field_name = "float_vector" +default_segment_row_limit = 1000 + + +all_index_types = [ + "FLAT", + "IVF_FLAT", + "IVF_SQ8", + # "IVF_SQ8_HYBRID", + "IVF_PQ", + "HNSW", + # "NSG", + "ANNOY", + "RHNSW_FLAT", + "RHNSW_PQ", + "RHNSW_SQ", + "BIN_FLAT", + "BIN_IVF_FLAT" +] + +default_index_params = [ + {"nlist": 128}, + {"nlist": 128}, + {"nlist": 128}, + # {"nlist": 128}, + {"nlist": 128, "m": 16, "nbits": 8}, + {"M": 48, "efConstruction": 500}, + # {"search_length": 50, "out_degree": 40, "candidate_pool_size": 100, "knng": 50}, + {"n_trees": 50}, + {"M": 48, "efConstruction": 500}, + {"M": 48, "efConstruction": 500, "PQM": 64}, + {"M": 48, "efConstruction": 500}, + {"nlist": 128}, + {"nlist": 128} +] + + +default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} + + +def gen_default_fields(auto_id=True): + default_fields = [ + FieldSchema(name="count", dtype=DataType.INT64, is_primary=True), + FieldSchema(name="float", dtype=DataType.FLOAT), + FieldSchema(name=default_float_vec_field_name, dtype=DataType.FLOAT_VECTOR, dim=default_dim) + ] + default_schema = CollectionSchema(fields=default_fields, description="test collection", + segment_row_limit=default_segment_row_limit, auto_id=False) + return default_schema + + +def gen_data(nb): + entities = [ + [i for i in range(nb)], + [float(i) for i in range(nb)], + [[random.random() for _ in range(dim)] for _ in range(num)], + ] + return entities + + +def gen_unique_str(str_value=None): + prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) + return "collection_" + prefix if str_value is None else str_value + "_" + prefix + + +def binary_support(): + return ["BIN_FLAT", "BIN_IVF_FLAT"] + + +def gen_simple_index(): + index_params = [] + for i in range(len(all_index_types)): + if all_index_types[i] in binary_support(): + continue + dic = {"index_type": all_index_types[i], "metric_type": "L2"} + dic.update({"params": default_index_params[i]}) + index_params.append(dic) + return index_params + + +def test_partition(): + connections.connect(alias="default") + print("create collection") + collection = Collection(name=gen_unique_str(), schema=gen_default_fields()) + print("create partition") + partition = Partition(collection, name=gen_unique_str()) + print(list_collections()) + assert has_partition(collection.name, partition.name) is True + + data = gen_data(default_nb) + print("insert data to partition") + res = partition.insert(data) + collection.flush() + print(res.insert_count) + assert partition.is_empty is False + assert partition.num_entities == default_nb + + print("start to create index") + index = { + "index_type": "IVF_FLAT", + "metric_type": "L2", + "params": {"nlist": 128}, + } + collection.create_index(default_float_vec_field_name, index) + + print("load partition") + partition.load() + topK = 5 + round_decimal = 3 + search_params = {"metric_type": "L2", "params": {"nprobe": 10}} + print("search partition") + res = partition.search(data[2][-2:], "float_vector", search_params, topK, "count > 100", round_decimal=round_decimal) + for hits in res: + for hit in hits: + print(hit) + + print("release partition") + partition.release() + print("drop partition") + partition.drop() + print("drop collection") + collection.drop() + + +if __name__ == "__main__": + test_partition() diff --git a/examples/resource_group.py b/examples/orm/resource_group.py similarity index 100% rename from examples/resource_group.py rename to examples/orm/resource_group.py diff --git a/examples/resource_group_declarative_api.py b/examples/orm/resource_group_declarative_api.py similarity index 100% rename from examples/resource_group_declarative_api.py rename to examples/orm/resource_group_declarative_api.py diff --git a/examples/role_and_privilege.py b/examples/orm/role_and_privilege.py similarity index 100% rename from examples/role_and_privilege.py rename to examples/orm/role_and_privilege.py diff --git a/examples/search_with_template_expression.py b/examples/orm/search_with_template_expression.py similarity index 100% rename from examples/search_with_template_expression.py rename to examples/orm/search_with_template_expression.py diff --git a/examples/user.py b/examples/orm/user.py similarity index 100% rename from examples/user.py rename to examples/orm/user.py diff --git a/examples/partition.py b/examples/partition.py index 3c6bb30ce..7466c034a 100644 --- a/examples/partition.py +++ b/examples/partition.py @@ -1,148 +1,85 @@ -# Copyright (C) 2019-2020 Zilliz. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under the License. - +import time +import numpy as np from pymilvus import ( - connections, list_collections, has_partition, - FieldSchema, CollectionSchema, DataType, - Collection, Partition + MilvusClient, ) -import random -import string - -default_dim = 128 -default_nb = 3000 -default_float_vec_field_name = "float_vector" -default_segment_row_limit = 1000 - - -all_index_types = [ - "FLAT", - "IVF_FLAT", - "IVF_SQ8", - # "IVF_SQ8_HYBRID", - "IVF_PQ", - "HNSW", - # "NSG", - "ANNOY", - "RHNSW_FLAT", - "RHNSW_PQ", - "RHNSW_SQ", - "BIN_FLAT", - "BIN_IVF_FLAT" -] - -default_index_params = [ - {"nlist": 128}, - {"nlist": 128}, - {"nlist": 128}, - # {"nlist": 128}, - {"nlist": 128, "m": 16, "nbits": 8}, - {"M": 48, "efConstruction": 500}, - # {"search_length": 50, "out_degree": 40, "candidate_pool_size": 100, "knng": 50}, - {"n_trees": 50}, - {"M": 48, "efConstruction": 500}, - {"M": 48, "efConstruction": 500, "PQM": 64}, - {"M": 48, "efConstruction": 500}, - {"nlist": 128}, - {"nlist": 128} -] - - -default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} - - -def gen_default_fields(auto_id=True): - default_fields = [ - FieldSchema(name="count", dtype=DataType.INT64, is_primary=True), - FieldSchema(name="float", dtype=DataType.FLOAT), - FieldSchema(name=default_float_vec_field_name, dtype=DataType.FLOAT_VECTOR, dim=default_dim) - ] - default_schema = CollectionSchema(fields=default_fields, description="test collection", - segment_row_limit=default_segment_row_limit, auto_id=False) - return default_schema - - -def gen_data(nb): - entities = [ - [i for i in range(nb)], - [float(i) for i in range(nb)], - [[random.random() for _ in range(dim)] for _ in range(num)], - ] - return entities - - -def gen_unique_str(str_value=None): - prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) - return "collection_" + prefix if str_value is None else str_value + "_" + prefix - - -def binary_support(): - return ["BIN_FLAT", "BIN_IVF_FLAT"] - - -def gen_simple_index(): - index_params = [] - for i in range(len(all_index_types)): - if all_index_types[i] in binary_support(): - continue - dic = {"index_type": all_index_types[i], "metric_type": "L2"} - dic.update({"params": default_index_params[i]}) - index_params.append(dic) - return index_params - - -def test_partition(): - connections.connect(alias="default") - print("create collection") - collection = Collection(name=gen_unique_str(), schema=gen_default_fields()) - print("create partition") - partition = Partition(collection, name=gen_unique_str()) - print(list_collections()) - assert has_partition(collection.name, partition.name) is True - - data = gen_data(default_nb) - print("insert data to partition") - res = partition.insert(data) - collection.flush() - print(res.insert_count) - assert partition.is_empty is False - assert partition.num_entities == default_nb - - print("start to create index") - index = { - "index_type": "IVF_FLAT", - "metric_type": "L2", - "params": {"nlist": 128}, - } - collection.create_index(default_float_vec_field_name, index) - - print("load partition") - partition.load() - topK = 5 - round_decimal = 3 - search_params = {"metric_type": "L2", "params": {"nprobe": 10}} - print("search partition") - res = partition.search(data[2][-2:], "float_vector", search_params, topK, "count > 100", round_decimal=round_decimal) - for hits in res: - for hit in hits: - print(hit) - - print("release partition") - partition.release() - print("drop partition") - partition.drop() - print("drop collection") - collection.drop() - - -if __name__ == "__main__": - test_partition() +fmt = "\n=== {:30} ===\n" +dim = 8 +collection_name = "hello_milvus" +milvus_client = MilvusClient("http://localhost:19530") + +has_collection = milvus_client.has_collection(collection_name, timeout=5) +if has_collection: + milvus_client.drop_collection(collection_name) +milvus_client.create_collection(collection_name, dim, consistency_level="Strong", metric_type="L2") + +print(fmt.format(" all collections ")) +print(milvus_client.list_collections()) + +print(fmt.format(f"schema of collection {collection_name}")) +print(milvus_client.describe_collection(collection_name)) + +rng = np.random.default_rng(seed=19530) + +milvus_client.create_partition(collection_name, partition_name = "p1") +milvus_client.insert(collection_name, {"id": 1, "vector": rng.random((1, dim))[0], "a": 100}, partition_name = "p1") +milvus_client.insert(collection_name, {"id": 2, "vector": rng.random((1, dim))[0], "b": 200}, partition_name = "p1") +milvus_client.insert(collection_name, {"id": 3, "vector": rng.random((1, dim))[0], "c": 300}, partition_name = "p1") + +milvus_client.create_partition(collection_name, partition_name = "p2") +milvus_client.insert(collection_name, {"id": 4, "vector": rng.random((1, dim))[0], "e": 400}, partition_name = "p2") +milvus_client.insert(collection_name, {"id": 5, "vector": rng.random((1, dim))[0], "f": 500}, partition_name = "p2") +milvus_client.insert(collection_name, {"id": 6, "vector": rng.random((1, dim))[0], "g": 600}, partition_name = "p2") + +has_p1 = milvus_client.has_partition(collection_name, "p1") +print("has partition p1", has_p1) + +has_p3 = milvus_client.has_partition(collection_name, "p3") +print("has partition p3", has_p3) + +partitions = milvus_client.list_partitions(collection_name) +print("partitions:", partitions) + +milvus_client.release_collection(collection_name) +milvus_client.load_partitions(collection_name, partition_names =["p1", "p2"]) + +print(fmt.format("Start search in partiton p1")) +vectors_to_search = rng.random((1, dim)) +result = milvus_client.search(collection_name, vectors_to_search, limit=3, output_fields=["pk", "a", "b"], partition_names = ["p1"]) +for hits in result: + for hit in hits: + print(f"hit: {hit}") + +milvus_client.release_partitions(collection_name, partition_names = ["p1"]) +milvus_client.drop_partition(collection_name, partition_name = "p1", timeout = 2.0) +print("successfully drop partition p1") + +try: + milvus_client.drop_partition(collection_name, partition_name = "p2", timeout = 2.0) +except Exception as e: + print(f"cacthed {e}") + +has_p1 = milvus_client.has_partition(collection_name, "p1") +print("has partition of p1:", has_p1) + +print(fmt.format("Start query by specifying primary keys")) +query_results = milvus_client.query(collection_name, ids=[2]) +assert len(query_results) == 0 + +print(fmt.format("Start query by specifying primary keys")) +query_results = milvus_client.query(collection_name, ids=[4]) +print(query_results[0]) + +print(fmt.format("Start query by specifying filtering expression")) +query_results = milvus_client.query(collection_name, filter= "f == 500") +for ret in query_results: + print(ret) + +print(fmt.format(f"Start search with retrieve serveral fields.")) +result = milvus_client.search(collection_name, vectors_to_search, limit=3, output_fields=["pk", "a", "b"]) +for hits in result: + for hit in hits: + print(f"hit: {hit}") + +milvus_client.drop_collection(collection_name) diff --git a/examples/milvus_client/rbac.py b/examples/rbac.py similarity index 100% rename from examples/milvus_client/rbac.py rename to examples/rbac.py diff --git a/examples/milvus_client/simple.py b/examples/simple.py similarity index 100% rename from examples/milvus_client/simple.py rename to examples/simple.py diff --git a/examples/milvus_client/simple_auto_id.py b/examples/simple_auto_id.py similarity index 100% rename from examples/milvus_client/simple_auto_id.py rename to examples/simple_auto_id.py diff --git a/examples/milvus_client/simple_cost.py b/examples/simple_cost.py similarity index 100% rename from examples/milvus_client/simple_cost.py rename to examples/simple_cost.py diff --git a/examples/milvus_client/sparse.py b/examples/sparse.py similarity index 100% rename from examples/milvus_client/sparse.py rename to examples/sparse.py diff --git a/pymilvus/client/entity_helper.py b/pymilvus/client/entity_helper.py index 0fbdd295e..baa0c0713 100644 --- a/pymilvus/client/entity_helper.py +++ b/pymilvus/client/entity_helper.py @@ -228,7 +228,7 @@ def convert_to_array(obj: List[Any], field_info: Any): field_data.string_data.data.extend(obj) return field_data raise ParamError( - message=f"UnSupported element type: {element_type} for Array field: {field_info.get('name')}" + message=f"Unsupported element type: {element_type} for Array field: {field_info.get('name')}" ) @@ -394,7 +394,7 @@ def pack_field_value_to_field_data( % (field_name, "array", type(field_value)) ) from e else: - raise ParamError(message=f"UnSupported data type: {field_type}") + raise ParamError(message=f"Unsupported data type: {field_type}") # TODO: refactor here. @@ -520,7 +520,7 @@ def entity_to_field_data(entity: Any, field_info: Any): % (field_name, "sparse_float_vector", type(entity.get("values")[0])) ) from e else: - raise ParamError(message=f"UnSupported data type: {entity_type}") + raise ParamError(message=f"Unsupported data type: {entity_type}") return field_data diff --git a/pymilvus/client/grpc_handler.py b/pymilvus/client/grpc_handler.py index 6889c7562..821f9c325 100644 --- a/pymilvus/client/grpc_handler.py +++ b/pymilvus/client/grpc_handler.py @@ -535,7 +535,7 @@ def _prepare_batch_insert_request( if param and not isinstance(param, milvus_types.InsertRequest): raise ParamError(message="The value of key 'insert_param' is invalid") if not isinstance(entities, list): - raise ParamError(message="None entities, please provide valid entities.") + raise ParamError(message="'entities' must be a list, please provide valid entity data.") schema = kwargs.get("schema") if not schema: @@ -633,7 +633,7 @@ def _prepare_batch_upsert_request( if param and not isinstance(param, milvus_types.UpsertRequest): raise ParamError(message="The value of key 'upsert_param' is invalid") if not isinstance(entities, list): - raise ParamError(message="None entities, please provide valid entities.") + raise ParamError(message="'entities' must be a list, please provide valid entity data.") schema = kwargs.get("schema") if not schema: @@ -690,7 +690,7 @@ def _prepare_row_upsert_request( **kwargs, ): if not isinstance(rows, list): - raise ParamError(message="None rows, please provide valid row data.") + raise ParamError(message="'rows' must be a list, please provide valid row data.") fields_info, enable_dynamic = self._get_info(collection_name, timeout, **kwargs) return Prepare.row_upsert_param( diff --git a/pymilvus/client/types.py b/pymilvus/client/types.py index e3d2d53e3..68f07e569 100644 --- a/pymilvus/client/types.py +++ b/pymilvus/client/types.py @@ -243,6 +243,10 @@ def __init__( self.in_timeout = in_timeout self.completed = completed + @property + def state_name(self): + return self.state.name + def __repr__(self) -> str: return f""" CompactionState diff --git a/pymilvus/milvus_client/milvus_client.py b/pymilvus/milvus_client/milvus_client.py index a25d5c620..3be7a62d0 100644 --- a/pymilvus/milvus_client/milvus_client.py +++ b/pymilvus/milvus_client/milvus_client.py @@ -33,7 +33,7 @@ class MilvusClient: """The Milvus Client""" - # pylint: disable=logging-too-many-args, too-many-instance-attributes, import-outside-toplevel + # pylint: disable=logging-too-many-args, too-many-instance-attributes def __init__( self, @@ -951,9 +951,7 @@ def drop_role(self, role_name: str, timeout: Optional[float] = None, **kwargs): conn = self._get_connection() conn.drop_role(role_name, timeout=timeout, **kwargs) - def describe_role( - self, role_name: str, timeout: Optional[float] = None, **kwargs - ) -> List[Dict]: + def describe_role(self, role_name: str, timeout: Optional[float] = None, **kwargs) -> Dict: conn = self._get_connection() db_name = kwargs.pop("db_name", "") try: @@ -1047,6 +1045,94 @@ def list_databases(self, **kwargs) -> List[str]: conn = self._get_connection() return conn.list_database(**kwargs) + def flush( + self, + collection_name: str, + timeout: Optional[float] = None, + **kwargs, + ): + """Seal all segments in the collection. Inserts after flushing will be written into + new segments. + + Args: + collection_name(``string``): The name of collection. + timeout (float): an optional duration of time in seconds to allow for the RPCs. + If timeout is not set, the client keeps waiting until the server + responds or an error occurs. + + Raises: + MilvusException: If anything goes wrong. + """ + conn = self._get_connection() + conn.flush([collection_name], timeout=timeout, **kwargs) + + def compact( + self, + collection_name: str, + is_clustering: Optional[bool] = False, + timeout: Optional[float] = None, + **kwargs, + ) -> int: + """Compact merge the small segments in a collection + + Args: + timeout (``float``, optional): An optional duration of time in seconds to allow + for the RPC. When timeout is set to None, client waits until server response + or error occur. + is_clustering (``bool``, optional): Option to trigger clustering compaction. + + Raises: + MilvusException: If anything goes wrong. + + Returns: + int: An integer represents the server's compaction job. You can use this job ID + for subsequent state inquiries. + """ + conn = self._get_connection() + return conn.compact(collection_name, is_clustering=is_clustering, timeout=timeout, **kwargs) + + def get_compaction_state( + self, + job_id: int, + timeout: Optional[float] = None, + **kwargs, + ) -> str: + """Get the state of compaction job + + Args: + job_id (``int``): The ID of the compaction job. + timeout (``float``, optional): An optional duration of time in seconds to allow + for the RPC. When timeout is set to None, client waits until server response + or error occur. + Returns: + str: the state of this compaction job. Possible values are "UndefiedState", "Executing" + and "Completed". + """ + conn = self._get_connection() + result = conn.get_compaction_state(job_id, timeout=timeout, **kwargs) + return result.state_name + + def get_server_version( + self, + timeout: Optional[float] = None, + **kwargs, + ) -> str: + """Get the running server's version + + Args: + timeout (``float``, optional): A duration of time in seconds to allow for the RPC. + If timeout is set to None, the client keeps waiting until the server + responds or an error occurs. + + Returns: + str: A string represent the server's version. + + Raises: + MilvusException: If anything goes wrong + """ + conn = self._get_connection() + return conn.get_server_version(timeout=timeout, **kwargs) + def create_privilege_group( self, group_name: str, @@ -1060,7 +1146,6 @@ def create_privilege_group( timeout (``float``, optional): An optional duration of time in seconds to allow for the RPC. When timeout is set to None, client waits until server response or error occur. - Raises: MilvusException: If anything goes wrong. """ @@ -1077,10 +1162,6 @@ def drop_privilege_group( Args: group_name (``str``): The name of the privilege group. - timeout (``float``, optional): An optional duration of time in seconds to allow - for the RPC. When timeout is set to None, client waits until server response - or error occur. - Raises: MilvusException: If anything goes wrong. """ diff --git a/pymilvus/orm/connections.py b/pymilvus/orm/connections.py index 29a6c5511..2d0f1952d 100644 --- a/pymilvus/orm/connections.py +++ b/pymilvus/orm/connections.py @@ -363,6 +363,7 @@ def connect( "http", "https", "tcp", + "grpc", ]: # start and connect milvuslite if not kwargs["uri"].endswith(".db"): diff --git a/pymilvus/settings.py b/pymilvus/settings.py index 841df6e03..c4e745073 100644 --- a/pymilvus/settings.py +++ b/pymilvus/settings.py @@ -1,21 +1,18 @@ -import contextlib import logging.config +import os -import environs +from dotenv import load_dotenv -env = environs.Env() - -with contextlib.suppress(Exception): - env.read_env(".env") +load_dotenv() class Config: # legacy env MILVUS_DEFAULT_CONNECTION, not recommended - LEGACY_URI = env.str("MILVUS_DEFAULT_CONNECTION", "") - MILVUS_URI = env.str("MILVUS_URI", LEGACY_URI) + LEGACY_URI = str(os.getenv("MILVUS_DEFAULT_CONNECTION", "")) + MILVUS_URI = str(os.getenv("MILVUS_URI", LEGACY_URI)) - MILVUS_CONN_ALIAS = env.str("MILVUS_CONN_ALIAS", "default") - MILVUS_CONN_TIMEOUT = env.float("MILVUS_CONN_TIMEOUT", 10) + MILVUS_CONN_ALIAS = str(os.getenv("MILVUS_CONN_ALIAS", "default")) + MILVUS_CONN_TIMEOUT = float(os.getenv("MILVUS_CONN_TIMEOUT", "10.0")) # legacy configs: DEFAULT_USING = MILVUS_CONN_ALIAS diff --git a/pyproject.toml b/pyproject.toml index f99a10b05..291167985 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies=[ "setuptools<70.1;python_version<='3.8'", "grpcio>=1.49.1", "protobuf>=3.20.0", - "environs<=9.5.0", + "python-dotenv>=1.0.1, <2.0.0", "ujson>=2.0.0", "pandas>=1.2.4", "numpy<1.25.0;python_version<='3.8'", diff --git a/requirements.txt b/requirements.txt index f6277f28d..840ece254 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ build==0.4.0 -certifi==2023.7.22 +certifi==2024.7.4 chardet==4.0.0 -environs==9.5.0 +python-dotenv>=1.0.1, <2.0.0 grpcio==1.62.2 grpcio-testing==1.62.2 grpcio-tools==1.62.2 @@ -13,7 +13,7 @@ pyparsing==2.4.7 six==1.16.0 toml==0.10.2 ujson>=2.0.0 -urllib3==1.26.18 +urllib3==1.26.19 m2r==0.3.1 Sphinx==4.0.0 sphinx-copybutton