diff --git a/cluster_conformers/cluster_monomers.py b/cluster_conformers/cluster_monomers.py index a18e600..7606e53 100644 --- a/cluster_conformers/cluster_monomers.py +++ b/cluster_conformers/cluster_monomers.py @@ -222,6 +222,7 @@ def _generate_ca_matx(self, pdbe_chain_id: str) -> "tuple(dict)": if not path_save_this_ca_matx.with_suffix(".npz").exists() or self.force: pdbe_chain_id_split = pdbe_chain_id.split("_") + # Extract x, y, z, and UNP index info from mmCIF/chain mmcif = io_utils.load_mmcif(self.mmcif_paths[pdbe_chain_id_split[0]]) xyz_unp_dict = parsing_utils.parse_mmcif(mmcif, pdbe_chain_id_split[1]) @@ -278,7 +279,7 @@ def ca_distance(self, path_save: PosixPath = None) -> None: # Dir to save the raw UniProt residue IDs as 1D np.array()s self.path_save_unps = path_save.joinpath("unp_residue_ids") - self.path_save_unps.mkdir(exist_ok=True) + self.path_save_unps.mkdir(parents=True, exist_ok=True) self.path_save_base_ca = path_save @@ -464,6 +465,10 @@ def cluster( """ logger.info("Generating distance difference matrices...") + if not path_save_dd_matx: + raise ValueError("Path to save distance difference matrices not specified.") + + path_save_dd_matx.mkdir(parents=True, exist_ok=True) self.score_matx, self.label_matx = self.build_clustering_inputs( path_save_dd_matx ) @@ -518,6 +523,8 @@ def cluster( # Write out clustering results if path specified if path_save_cluster_results: + path_save_cluster_results.mkdir(parents=True, exist_ok=True) + # Save clustering results path_save_all_conf = path_save_cluster_results.joinpath( f"{self.unp}_sum_based_clustering_results.csv" @@ -710,7 +717,7 @@ def render_dendrogram( fig, ax = plt.subplots(1, 1) - logger.info("Rendering dendogram") + logger.info("Rendering dendrogram") cluster_chains.plot_dendrogram( unp, ax, diff --git a/examples/run_O34926.sh b/examples/run_O34926.sh index 216dafa..22bdb76 100755 --- a/examples/run_O34926.sh +++ b/examples/run_O34926.sh @@ -1,27 +1,29 @@ #!/bin/sh -rm benchmark_data/examples/O34926/O34926_ca_distances/* -rm benchmark_data/examples/O34926/O34926_ca_distances/unp_residue_ids/* -rm benchmark_data/examples/O34926/O34926_distance_differences/* -rm benchmark_data/examples/O34926/O34926_cluster_results/* +path_mmcifs="benchmark_data/examples/O34926/O34926_updated_mmcif" +path_ca_distances="benchmark_data/examples/O34926/O34926_ca_distances" +path_distance_differences="benchmark_data/examples/O34926/O34926_distance_differences" +path_cluster_results="benchmark_data/examples/O34926/O34926_cluster_results" -# mprof run --python +rm $path_ca_distances/* +rm $path_ca_distances/unp_residue_ids/* +rm $path_distance_differences/* +rm $path_cluster_results/* -python3 find_conformers.py -u "O34926" \ - -m benchmark_data/examples/O34926/O34926_updated_mmcif/3nc7_updated.cif A B \ - -m benchmark_data/examples/O34926/O34926_updated_mmcif/3nc5_updated.cif A B \ - -m benchmark_data/examples/O34926/O34926_updated_mmcif/3nc3_updated.cif A B \ - -c benchmark_data/examples/O34926/O34926_ca_distances/ \ - -s benchmark_data/examples/O34926/O34926_cluster_results/ \ - -d benchmark_data/examples/O34926/O34926_distance_differences/ \ - -m benchmark_data/examples/O34926/O34926_updated_mmcif/3nc6_updated.cif A B \ - -i 3nc6 \ - -f \ - -g benchmark_data/examples/O34926/O34926_cluster_results/ png svg \ +python3 find_conformers.py \ + -u "O34926" \ + -m $path_mmcifs/3nc7_updated.cif A B \ + -m $path_mmcifs/3nc5_updated.cif A B \ + -m $path_mmcifs/3nc3_updated.cif A B \ + -m $path_mmcifs/3nc6_updated.cif A B \ + -c $path_ca_distances \ + -d $path_distance_differences \ + -s $path_cluster_results \ + -g $path_cluster_results png svg \ -0 1 \ - -1 405 + -1 405 \ + -f \ # -v \ - # -a benchmark_data/examples/O34926/O34926_alpha_fold_mmcifs - -# -o benchmark_data/examples/O34926/O34926_distance_difference_maps/ \ -# -w benchmark_data/examples/O34926/O34926_cluster_results/ png svg \ + # -a benchmark_data/examples/O34926/O34926_alpha_fold_mmcifs \ + # -o benchmark_data/examples/O34926/O34926_distance_difference_maps/ \ + # -w benchmark_data/examples/O34926/O34926_cluster_results/ png svg \ diff --git a/examples/run_P15291.sh b/examples/run_P15291.sh index 58b1c73..d9b3a87 100755 --- a/examples/run_P15291.sh +++ b/examples/run_P15291.sh @@ -1,5 +1,7 @@ #!/bin/sh + + python3 find_conformers.py -u "P15291" \ -m benchmark_data/examples/P15291/P15291_updated_mmcif/2fyb_updated.cif A \ -m benchmark_data/examples/P15291/P15291_updated_mmcif/2fyd_updated.cif B D \ diff --git a/run_uniref50.sh b/run_uniref50.sh new file mode 100755 index 0000000..33ac513 --- /dev/null +++ b/run_uniref50.sh @@ -0,0 +1,62 @@ +#!/bin/sh + +path_base="/media/jellaway/FlashData/EMBL-EBI/funclan_work/clustering_w_uniref" +path_cluster_results="$path_base/cluster_results_uniref50" + + +path_mmcifs="$path_base/reindexed_mmcifs_combined/uniref50_mmcifs" +path_ca_distances="$path_cluster_results/ca_distances" +path_distance_differences="$path_cluster_results/distance_differences" +path_cluster_results="$path_cluster_results/cluster_results" + +rm $path_ca_distances/* +rm $path_ca_distances/unp_residue_ids/* +rm $path_distance_differences/* +rm $path_cluster_results/* + +python3 find_conformers.py \ + -u "uniref50" \ + -m $path_mmcifs/6mkh_updated.cif A \ + -m $path_mmcifs/6bsr_updated.cif A \ + -m $path_mmcifs/8f3v_updated.cif A \ + -m $path_mmcifs/6g88_updated.cif A B C \ + -m $path_mmcifs/6mki_updated.cif A \ + -m $path_mmcifs/5dvy_updated.cif A \ + -m $path_mmcifs/8f3t_updated.cif A \ + -m $path_mmcifs/8f3x_updated.cif A \ + -m $path_mmcifs/8f3f_updated.cif A \ + -m $path_mmcifs/8u55_updated.cif A \ + -m $path_mmcifs/8f3l_updated.cif A \ + -m $path_mmcifs/8f67_updated.cif A B C \ + -m $path_mmcifs/8f3s_updated.cif A \ + -m $path_mmcifs/6g0k_updated.cif A B C \ + -m $path_mmcifs/8f3n_updated.cif A \ + -m $path_mmcifs/8f3i_updated.cif A \ + -m $path_mmcifs/8f3o_updated.cif A \ + -m $path_mmcifs/8f3y_updated.cif A \ + -m $path_mmcifs/8f3p_updated.cif A \ + -m $path_mmcifs/8f3q_updated.cif A \ + -m $path_mmcifs/8f3m_updated.cif A \ + -m $path_mmcifs/6mkf_updated.cif A \ + -m $path_mmcifs/8f3z_updated.cif A \ + -m $path_mmcifs/6bsq_updated.cif A \ + -m $path_mmcifs/6mkj_updated.cif A \ + -m $path_mmcifs/6mkg_updated.cif A \ + -m $path_mmcifs/8f3g_updated.cif A \ + -m $path_mmcifs/8f3h_updated.cif A \ + -m $path_mmcifs/8f3r_updated.cif A \ + -m $path_mmcifs/8f3w_updated.cif A \ + -m $path_mmcifs/6mka_updated.cif A \ + -m $path_mmcifs/8f3j_updated.cif A \ + -m $path_mmcifs/8f3u_updated.cif A \ + -m $path_mmcifs/5e31_updated.cif A \ + -c $path_ca_distances \ + -d $path_distance_differences \ + -s $path_cluster_results \ + -g $path_cluster_results png svg \ + -f \ + -n 8 \ + # -v \ + # -a benchmark_data/examples/O34926/O34926_alpha_fold_mmcifs \ + # -o benchmark_data/examples/O34926/O34926_distance_difference_maps/ \ + # -w benchmark_data/examples/O34926/O34926_cluster_results/ png svg \