Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate users scraping #124

Merged
merged 9 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions sarc/ldap/acquire.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,16 @@ def run(
cache_policy=cache_policy,
)

LD_users = fetch_mymila(
cfg,
LD_users,
cache_policy=cache_policy,
)
# MyMila scraping "NotImplementedError" is temporary ignored until we have a working fetching implementation,
# or a working workaround using CSV cache.
with using_trace(
"sarc.ldap.acquire", "fetch_mymila", exception_types=(NotImplementedError,)
Copy link
Collaborator Author

@nurbal nurbal Jun 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Je l'ai fait comme ça, de manière à gérer les deux cas:

  • en l'absence de fichier CSV de mymila (dans ce cas la lecture du cache échoue), fetch_mymila est appelée, une exception "Not Implemented" est lancée et gracieusement catchée, et MonMila est donc simplement ignoré
  • si on a le CSV, il est manuellement placé dans le répertoire de cache et directement lu.

Pour l'heure le traitement de ce fichier est l'objet d'une autre PR ( #120 ) mais pas encore Ok. Donc on ne place simplement pas le CSV en prod et le code fonctionne en l'état.

) as span:
LD_users = fetch_mymila(
cfg,
LD_users,
cache_policy=cache_policy,
)

# For each supervisor or co-supervisor, look for a mila_email_username
# matching the display name. If None has been found, the previous value remains
Expand Down
10 changes: 9 additions & 1 deletion sarc/ldap/revision.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,15 @@ def is_date_missing(date):
return date is None or date == DEFAULT_DATE


def has_changed(user_db, user_latest, excluded=("_id",)):
def has_changed(
user_db,
user_latest,
excluded=(
"_id",
"record_start",
"record_end",
),
):
keys = set(list(user_db.keys()) + list(user_latest.keys()))

for k in keys:
Expand Down
3 changes: 3 additions & 0 deletions scripts/systemd/scrapers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
SCRIPT=$(readlink -f "$0")
SCRIPTPATH=$(dirname "$SCRIPT")
cd $SCRIPTPATH/../../
# scraping jobs
sudo -u sarc SARC_MODE=scraping SARC_CONFIG=$SCRIPTPATH/../../config/sarc-prod.json ../.local/bin/poetry run sarc acquire jobs -c narval cedar beluga graham mila -d auto
# scraping users
sudo -u sarc SARC_MODE=scraping SARC_CONFIG=$SCRIPTPATH/../../config/sarc-prod.json ../.local/bin/poetry run sarc acquire users
40 changes: 25 additions & 15 deletions tests/functional/cli/acquire/test_acquire_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,22 @@ def test_acquire_users(cli_main, patch_return_values, mock_file, captrace):
# as everything goes well without corner cases.
# We will test logging in test_acquire_users_prompt below.
spans = captrace.get_finished_spans()
assert len(spans) == 1
assert spans[0].name == "match_drac_to_mila_accounts"
assert len(spans) == 2

assert spans[0].name == "fetch_mymila"
assert spans[0].status.status_code == StatusCode.OK
assert len(spans[0].events) == 4
assert len(spans[0].events) == 0

assert spans[1].name == "match_drac_to_mila_accounts"
assert spans[1].status.status_code == StatusCode.OK
assert len(spans[1].events) == 4
assert (
spans[0].events[0].name
spans[1].events[0].name
== "Loading mila_ldap, drac_roles and drac_members from files ..."
)
assert spans[0].events[1].name == "Loading matching config from file ..."
assert spans[0].events[2].name == "Matching DRAC/CC to mila accounts ..."
assert spans[0].events[3].name == "Committing matches to database ..."
assert spans[1].events[1].name == "Loading matching config from file ..."
assert spans[1].events[2].name == "Matching DRAC/CC to mila accounts ..."
assert spans[1].events[3].name == "Committing matches to database ..."


@pytest.mark.parametrize(
Expand Down Expand Up @@ -398,15 +403,20 @@ def test_acquire_users_prompt(

# Check traces
spans = captrace.get_finished_spans()
assert len(spans) == 1
assert spans[0].name == "match_drac_to_mila_accounts"
assert len(spans) == 2

assert spans[0].name == "fetch_mymila"
assert spans[0].status.status_code == StatusCode.OK
assert len(spans[0].events) == 5
assert len(spans[0].events) == 0

assert spans[1].name == "match_drac_to_mila_accounts"
assert spans[1].status.status_code == StatusCode.OK
assert len(spans[1].events) == 5
assert (
spans[0].events[0].name
spans[1].events[0].name
== "Loading mila_ldap, drac_roles and drac_members from files ..."
)
assert spans[0].events[1].name == "Loading matching config from file ..."
assert spans[0].events[2].name == "Matching DRAC/CC to mila accounts ..."
assert spans[0].events[3].name == "Committing matches to database ..."
assert spans[0].events[4].name == "Saving 1 manual matches ..."
assert spans[1].events[1].name == "Loading matching config from file ..."
assert spans[1].events[2].name == "Matching DRAC/CC to mila accounts ..."
assert spans[1].events[3].name == "Committing matches to database ..."
assert spans[1].events[4].name == "Saving 1 manual matches ..."
71 changes: 70 additions & 1 deletion tests/functional/ldap/test_acquire_ldap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import sarc.account_matching.make_matches
import sarc.ldap.acquire
from sarc.ldap.api import get_user
from sarc.ldap.api import get_user, get_users


@pytest.mark.usefixtures("empty_read_write_db")
Expand Down Expand Up @@ -68,6 +68,75 @@ def test_acquire_ldap(patch_return_values, mock_file):
assert js_user is None


@pytest.mark.usefixtures("empty_read_write_db")
def test_acquire_ldap_revision_change(patch_return_values, mock_file):
"""
Test two LDAP acquisition, with a change in the LDAP data.
This should result in a new record in the database.
Then, one third acquisition, with no change in the LDAP data.
This should result in no change in the database.
"""
nbr_users = 3

patch_return_values(
{
"sarc.ldap.read_mila_ldap.query_ldap": fake_raw_ldap_data(nbr_users),
"sarc.ldap.mymila.query_mymila_csv": [],
}
)

# Patch the built-in `open()` function for each file path
with patch("builtins.open", side_effect=mock_file):
sarc.ldap.acquire.run()

# inspect database to check the number of records
# should be nbr_users
users = get_users(latest=False)
nb_users_1 = len(users)
assert nb_users_1 == nbr_users

# re-acquire the same data
with patch("builtins.open", side_effect=mock_file):
sarc.ldap.acquire.run()

# inspect database to check the number of records
# should be the same
users = get_users(latest=False)
assert len(users) == nb_users_1

# change fake data
patch_return_values(
{
"sarc.ldap.read_mila_ldap.query_ldap": fake_raw_ldap_data(
nbr_users,
hardcoded_values_by_user={
2: { # The first user who is not a prof is the one with index 2
"supervisor": "[email protected]"
}
},
)
}
)

# re-acquire the new data
with patch("builtins.open", side_effect=mock_file):
sarc.ldap.acquire.run()

# inspect database to check the number of records
# should be incremented by 1
users = get_users(latest=False)
assert len(users) == nb_users_1 + 1

# re-acquire the same data
with patch("builtins.open", side_effect=mock_file):
sarc.ldap.acquire.run()

# inspect database to check the number of records
# should be the same
users = get_users(latest=False)
assert len(users) == nb_users_1 + 1


@pytest.mark.usefixtures("empty_read_write_db")
def test_merge_ldap_and_mymila(patch_return_values, mock_file):
nbr_users = 10
Expand Down
Loading