diff --git a/results/20240710/clean_results_clustering.json b/results/20240710/clean_results_clustering.json new file mode 100644 index 0000000..583893a --- /dev/null +++ b/results/20240710/clean_results_clustering.json @@ -0,0 +1,250 @@ +[ + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "tie", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718332422.6822 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_a", + "judge": "arena_user_", + "anony": false, + "tstamp": 1718333412.8882 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_b", + "judge": "arena_user_", + "anony": false, + "tstamp": 1718333421.926 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": false, + "tstamp": 1718333506.3524 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": false, + "tstamp": 1718333516.9516 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718380450.2734 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718380457.7335 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718380484.2097 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718398616.9362 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718398905.8906 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718399121.936 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718399477.4472 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718399580.2151 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718400198.2998 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718433375.9982 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718433458.6481 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718491384.3589 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "tie (bothbad)", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718509578.4313 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718509885.7196 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718509914.1989 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718573717.9521 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_a", + "judge": "arena_user_", + "anony": false, + "tstamp": 1718573914.4381 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "tie", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718575366.9167 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "tie (bothbad)", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718637515.9208 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "tie", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718639811.3453 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718660760.7331 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718901033.9403 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1719274013.9183 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1720120597.967 + }, + { + "model_a": "intfloat/e5-mistral-7b-instruct", + "model_b": "GritLM/GritLM-7B", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1720599005.0941 + }, + { + "model_a": "intfloat/e5-mistral-7b-instruct", + "model_b": "Salesforce/SFR-Embedding-2_R", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1720599059.1815 + } +] \ No newline at end of file diff --git a/results/20240710/clean_results_sts.json b/results/20240710/clean_results_sts.json new file mode 100644 index 0000000..bd27b11 --- /dev/null +++ b/results/20240710/clean_results_sts.json @@ -0,0 +1,210 @@ +[ + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "tie", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718332446.2263 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": false, + "tstamp": 1718333443.4238 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718380491.8835 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718380498.8377 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718381397.9454 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718381406.661 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718381414.6276 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718381427.7404 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718381435.1426 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "tie", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718552020.2614 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "tie (bothbad)", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718585974.6677 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718587113.0162 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718587294.6318 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718587390.2657 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718588294.7339 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718588421.4318 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718588476.9698 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718588626.7569 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718588670.9896 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718660865.6009 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718667230.1398 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "tie", + "judge": "arena_user_", + "anony": false, + "tstamp": 1718667293.6909 + }, + { + "model_a": "intfloat/multilingual-e5-small", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "tie (bothbad)", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718667775.5946 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1718901145.8347 + }, + { + "model_a": "sentence-transformers/all-MiniLM-L6-v2", + "model_b": "intfloat/multilingual-e5-small", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1720120638.7783 + }, + { + "model_a": "nomic-ai/nomic-embed-text-v1.5", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1720599219.1566 + } +] \ No newline at end of file diff --git a/results/latest/clean_battle_clustering.json b/results/latest/clean_battle_clustering.json index 22079fa..583893a 100644 --- a/results/latest/clean_battle_clustering.json +++ b/results/latest/clean_battle_clustering.json @@ -230,5 +230,21 @@ "judge": "arena_user_", "anony": true, "tstamp": 1720120597.967 + }, + { + "model_a": "intfloat/e5-mistral-7b-instruct", + "model_b": "GritLM/GritLM-7B", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1720599005.0941 + }, + { + "model_a": "intfloat/e5-mistral-7b-instruct", + "model_b": "Salesforce/SFR-Embedding-2_R", + "winner": "model_b", + "judge": "arena_user_", + "anony": true, + "tstamp": 1720599059.1815 } ] \ No newline at end of file diff --git a/results/latest/clean_battle_sts.json b/results/latest/clean_battle_sts.json index b5d4f38..bd27b11 100644 --- a/results/latest/clean_battle_sts.json +++ b/results/latest/clean_battle_sts.json @@ -198,5 +198,13 @@ "judge": "arena_user_", "anony": true, "tstamp": 1720120638.7783 + }, + { + "model_a": "nomic-ai/nomic-embed-text-v1.5", + "model_b": "sentence-transformers/all-MiniLM-L6-v2", + "winner": "model_a", + "judge": "arena_user_", + "anony": true, + "tstamp": 1720599219.1566 } ] \ No newline at end of file