-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add local authority data utility functions
functions for: * matching council name to local authority code * adding region and county based on local authority code * adding gss code based on local authority code * adding type, mapit code etc These all take a pandas dataframe as an argument and return the same.
- Loading branch information
Showing
1 changed file
with
115 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
from os.path import join | ||
import pandas as pd | ||
|
||
from ..dataset import get_dataset_df | ||
|
||
def fix_council_name(council: str) -> str: | ||
return ( | ||
council.replace("council", "") | ||
.replace(" - unitary", "") | ||
.replace("(unitary)", "") | ||
.strip() | ||
) | ||
|
||
def add_local_authority_code(df: pd.DataFrame) -> pd.DataFrame: | ||
""" | ||
Add the local-authority-code to the dataframe | ||
""" | ||
|
||
name_to_code = get_dataset_df( | ||
repo="uk_local_authority_names_and_codes", | ||
package="uk_la_past_current", | ||
version="1", | ||
file="lookup_name_to_registry.csv", | ||
) | ||
df["council_lower"] = df["council"].str.lower().apply(fix_council_name) | ||
name_to_code["council_lower"] = ( | ||
name_to_code["la-name"].str.lower().apply(fix_council_name) | ||
) | ||
df = df.merge(name_to_code, on="council_lower", how="left") | ||
|
||
# local-authority-code is in last position, move it to the start of the dataframe | ||
cols = list(df.columns) | ||
cols.insert(0, cols.pop(-1)) | ||
df = df[cols] | ||
df = df.drop(columns=["council_lower", "la-name"]) | ||
return df | ||
|
||
def add_region_and_county(df: pd.DataFrame) -> pd.DataFrame: | ||
name_to_code = get_dataset_df( | ||
repo="uk_local_authority_names_and_codes", | ||
package="uk_la_past_current", | ||
version="1", | ||
file="uk_local_authorities_current.csv", | ||
) | ||
|
||
rows = len(df["council"]) | ||
df["region"] = pd.Series([None] * rows, index=df.index) | ||
df["county"] = pd.Series([None] * rows, index=df.index) | ||
|
||
for index, row in df.iterrows(): | ||
authority_code = row["local-authority-code"] | ||
if not pd.isnull(authority_code): | ||
authority_match = name_to_code[ | ||
name_to_code["local-authority-code"] == authority_code | ||
] | ||
df.at[index, "region"] = authority_match["region"].values[0] | ||
df.at[index, "county"] = authority_match["county-la"].values[0] | ||
|
||
return df | ||
|
||
|
||
def add_gss_codes(df: pd.DataFrame) -> pd.DataFrame: | ||
name_to_code = get_dataset_df( | ||
repo="uk_local_authority_names_and_codes", | ||
package="uk_la_past_current", | ||
version="1", | ||
file="uk_local_authorities_current.csv", | ||
) | ||
|
||
rows = len(df["council"]) | ||
df["gss_code"] = pd.Series([None] * rows, index=df.index) | ||
|
||
for index, row in df.iterrows(): | ||
authority_code = row["local-authority-code"] | ||
if not pd.isnull(authority_code): | ||
authority_match = name_to_code[ | ||
name_to_code["local-authority-code"] == authority_code | ||
] | ||
df.at[index, "gss_code"] = name_to_code["gss-code"].values[0] | ||
|
||
return df | ||
|
||
|
||
def add_extra_authority_info(df: pd.DataFrame) -> pd.DataFrame: | ||
name_to_code = get_dataset_df( | ||
repo="uk_local_authority_names_and_codes", | ||
package="uk_la_past_current", | ||
version="1", | ||
file="uk_local_authorities_current.csv", | ||
) | ||
|
||
extra_df = name_to_code[ | ||
[ | ||
"local-authority-code", | ||
"local-authority-type", | ||
"wdtk-id", | ||
"mapit-area-code", | ||
"nation", | ||
"gss-code", | ||
] | ||
] | ||
|
||
# the info sheet may contain updated version of columns previously | ||
# loaded to sheet, need to drop them before the merge | ||
# ignore errors in case columns are not present | ||
columns_to_drop = [x for x in extra_df.columns if x != "local-authority-code"] | ||
df = df.drop(columns=columns_to_drop, errors="ignore") | ||
|
||
# merge two dataframes using the authority_code as the common reference | ||
extra_df = extra_df.merge(df, on="local-authority-code", how="left") | ||
|
||
is_non_english = extra_df["nation"].isin(["Wales", "Scotland", "Northern Ireland"]) | ||
extra_df.loc[is_non_english, "local-authority-type"] = "UA" | ||
|
||
return extra_df |