Skip to content

Commit

Permalink
Testing at lower seqID
Browse files Browse the repository at this point in the history
  • Loading branch information
Joseph-Ellaway committed Jun 11, 2024
1 parent 09cf897 commit 7d619ce
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 23 deletions.
11 changes: 9 additions & 2 deletions cluster_conformers/cluster_monomers.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ def _generate_ca_matx(self, pdbe_chain_id: str) -> "tuple(dict)":
if not path_save_this_ca_matx.with_suffix(".npz").exists() or self.force:

pdbe_chain_id_split = pdbe_chain_id.split("_")

# Extract x, y, z, and UNP index info from mmCIF/chain
mmcif = io_utils.load_mmcif(self.mmcif_paths[pdbe_chain_id_split[0]])
xyz_unp_dict = parsing_utils.parse_mmcif(mmcif, pdbe_chain_id_split[1])
Expand Down Expand Up @@ -278,7 +279,7 @@ def ca_distance(self, path_save: PosixPath = None) -> None:

# Dir to save the raw UniProt residue IDs as 1D np.array()s
self.path_save_unps = path_save.joinpath("unp_residue_ids")
self.path_save_unps.mkdir(exist_ok=True)
self.path_save_unps.mkdir(parents=True, exist_ok=True)

self.path_save_base_ca = path_save

Expand Down Expand Up @@ -464,6 +465,10 @@ def cluster(
"""

logger.info("Generating distance difference matrices...")
if not path_save_dd_matx:
raise ValueError("Path to save distance difference matrices not specified.")

path_save_dd_matx.mkdir(parents=True, exist_ok=True)
self.score_matx, self.label_matx = self.build_clustering_inputs(
path_save_dd_matx
)
Expand Down Expand Up @@ -518,6 +523,8 @@ def cluster(
# Write out clustering results if path specified
if path_save_cluster_results:

path_save_cluster_results.mkdir(parents=True, exist_ok=True)

# Save clustering results
path_save_all_conf = path_save_cluster_results.joinpath(
f"{self.unp}_sum_based_clustering_results.csv"
Expand Down Expand Up @@ -710,7 +717,7 @@ def render_dendrogram(

fig, ax = plt.subplots(1, 1)

logger.info("Rendering dendogram")
logger.info("Rendering dendrogram")
cluster_chains.plot_dendrogram(
unp,
ax,
Expand Down
44 changes: 23 additions & 21 deletions examples/run_O34926.sh
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
#!/bin/sh

rm benchmark_data/examples/O34926/O34926_ca_distances/*
rm benchmark_data/examples/O34926/O34926_ca_distances/unp_residue_ids/*
rm benchmark_data/examples/O34926/O34926_distance_differences/*
rm benchmark_data/examples/O34926/O34926_cluster_results/*
path_mmcifs="benchmark_data/examples/O34926/O34926_updated_mmcif"
path_ca_distances="benchmark_data/examples/O34926/O34926_ca_distances"
path_distance_differences="benchmark_data/examples/O34926/O34926_distance_differences"
path_cluster_results="benchmark_data/examples/O34926/O34926_cluster_results"

# mprof run --python
rm $path_ca_distances/*
rm $path_ca_distances/unp_residue_ids/*
rm $path_distance_differences/*
rm $path_cluster_results/*

python3 find_conformers.py -u "O34926" \
-m benchmark_data/examples/O34926/O34926_updated_mmcif/3nc7_updated.cif A B \
-m benchmark_data/examples/O34926/O34926_updated_mmcif/3nc5_updated.cif A B \
-m benchmark_data/examples/O34926/O34926_updated_mmcif/3nc3_updated.cif A B \
-c benchmark_data/examples/O34926/O34926_ca_distances/ \
-s benchmark_data/examples/O34926/O34926_cluster_results/ \
-d benchmark_data/examples/O34926/O34926_distance_differences/ \
-m benchmark_data/examples/O34926/O34926_updated_mmcif/3nc6_updated.cif A B \
-i 3nc6 \
-f \
-g benchmark_data/examples/O34926/O34926_cluster_results/ png svg \
python3 find_conformers.py \
-u "O34926" \
-m $path_mmcifs/3nc7_updated.cif A B \
-m $path_mmcifs/3nc5_updated.cif A B \
-m $path_mmcifs/3nc3_updated.cif A B \
-m $path_mmcifs/3nc6_updated.cif A B \
-c $path_ca_distances \
-d $path_distance_differences \
-s $path_cluster_results \
-g $path_cluster_results png svg \
-0 1 \
-1 405
-1 405 \
-f \
# -v \
# -a benchmark_data/examples/O34926/O34926_alpha_fold_mmcifs

# -o benchmark_data/examples/O34926/O34926_distance_difference_maps/ \
# -w benchmark_data/examples/O34926/O34926_cluster_results/ png svg \
# -a benchmark_data/examples/O34926/O34926_alpha_fold_mmcifs \
# -o benchmark_data/examples/O34926/O34926_distance_difference_maps/ \
# -w benchmark_data/examples/O34926/O34926_cluster_results/ png svg \
2 changes: 2 additions & 0 deletions examples/run_P15291.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/sh



python3 find_conformers.py -u "P15291" \
-m benchmark_data/examples/P15291/P15291_updated_mmcif/2fyb_updated.cif A \
-m benchmark_data/examples/P15291/P15291_updated_mmcif/2fyd_updated.cif B D \
Expand Down
62 changes: 62 additions & 0 deletions run_uniref50.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/sh

path_base="/media/jellaway/FlashData/EMBL-EBI/funclan_work/clustering_w_uniref"
path_cluster_results="$path_base/cluster_results_uniref50"


path_mmcifs="$path_base/reindexed_mmcifs_combined/uniref50_mmcifs"
path_ca_distances="$path_cluster_results/ca_distances"
path_distance_differences="$path_cluster_results/distance_differences"
path_cluster_results="$path_cluster_results/cluster_results"

rm $path_ca_distances/*
rm $path_ca_distances/unp_residue_ids/*
rm $path_distance_differences/*
rm $path_cluster_results/*

python3 find_conformers.py \
-u "uniref50" \
-m $path_mmcifs/6mkh_updated.cif A \
-m $path_mmcifs/6bsr_updated.cif A \
-m $path_mmcifs/8f3v_updated.cif A \
-m $path_mmcifs/6g88_updated.cif A B C \
-m $path_mmcifs/6mki_updated.cif A \
-m $path_mmcifs/5dvy_updated.cif A \
-m $path_mmcifs/8f3t_updated.cif A \
-m $path_mmcifs/8f3x_updated.cif A \
-m $path_mmcifs/8f3f_updated.cif A \
-m $path_mmcifs/8u55_updated.cif A \
-m $path_mmcifs/8f3l_updated.cif A \
-m $path_mmcifs/8f67_updated.cif A B C \
-m $path_mmcifs/8f3s_updated.cif A \
-m $path_mmcifs/6g0k_updated.cif A B C \
-m $path_mmcifs/8f3n_updated.cif A \
-m $path_mmcifs/8f3i_updated.cif A \
-m $path_mmcifs/8f3o_updated.cif A \
-m $path_mmcifs/8f3y_updated.cif A \
-m $path_mmcifs/8f3p_updated.cif A \
-m $path_mmcifs/8f3q_updated.cif A \
-m $path_mmcifs/8f3m_updated.cif A \
-m $path_mmcifs/6mkf_updated.cif A \
-m $path_mmcifs/8f3z_updated.cif A \
-m $path_mmcifs/6bsq_updated.cif A \
-m $path_mmcifs/6mkj_updated.cif A \
-m $path_mmcifs/6mkg_updated.cif A \
-m $path_mmcifs/8f3g_updated.cif A \
-m $path_mmcifs/8f3h_updated.cif A \
-m $path_mmcifs/8f3r_updated.cif A \
-m $path_mmcifs/8f3w_updated.cif A \
-m $path_mmcifs/6mka_updated.cif A \
-m $path_mmcifs/8f3j_updated.cif A \
-m $path_mmcifs/8f3u_updated.cif A \
-m $path_mmcifs/5e31_updated.cif A \
-c $path_ca_distances \
-d $path_distance_differences \
-s $path_cluster_results \
-g $path_cluster_results png svg \
-f \
-n 8 \
# -v \
# -a benchmark_data/examples/O34926/O34926_alpha_fold_mmcifs \
# -o benchmark_data/examples/O34926/O34926_distance_difference_maps/ \
# -w benchmark_data/examples/O34926/O34926_cluster_results/ png svg \

0 comments on commit 7d619ce

Please sign in to comment.