From 933187c6876b2c5b934c6e6300cf356508704caf Mon Sep 17 00:00:00 2001 From: Joe Harvey Date: Tue, 1 Oct 2019 11:41:22 +0100 Subject: [PATCH] Taiyaki 5.0 --- CHANGELOG.md | 15 ++++++++++++++- Makefile | 8 +++----- README.md | 22 ++++++++++++---------- bin/basecall.py | 10 +++++----- bin/train_abinitio.py | 4 +++- bin/train_flipflop.py | 5 +++-- bin/train_squiggle.py | 2 +- requirements.txt | 2 +- setup.py | 2 +- taiyaki/__init__.py | 4 ++-- taiyaki/layers.py | 23 ++++------------------- test/unit/test_layers.py | 2 +- 12 files changed, 50 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 470aea3..426cef8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,20 @@ Version numbers: major.minor.patch * Minor version bump indicates a change in functionality that may affect users. * Patch version bump indicates bug-fixes or minor improvements not expected to affect users. +## v5.0.0 +* Based on pytorch version 1.2 +* Improved training stability: gradient capping and warm-up +* Merged mod-base and canonical entry points + * Custom model definitions should now take an + `alphabet_info` argument rather than `outsize` +* Improved RNA support: tools can reverse references and basecalls +* Basecaller changes: + * chunk size argument now matches guppy + * CPU calling enabled + * lower memory usage +* Multi-GPU training enabled +* Bug fixes + ## v4.1.0 * Ab initio ("bootstrap") training of models @@ -16,7 +30,6 @@ Version numbers: major.minor.patch * Training walk-through * Tweaks to optimisation parameters - ## v3.0.2 * Improved training parameters * Use orthonormal initialisation of starting weights diff --git a/Makefile b/Makefile index 5cabcbf..e41ac56 100644 --- a/Makefile +++ b/Makefile @@ -11,19 +11,17 @@ CUDA ?= $(shell (which nvcc && nvcc --version) | grep -oP "(?<=release )[0-9.]+" # Determine correct torch package to install -TORCH_CUDA_8.0 = cu80 -TORCH_CUDA_9.0 = cu90 +TORCH_CUDA_9.2 = cu92 TORCH_CUDA_10.0 = cu100 TORCH_PLATFORM ?= $(if $(TORCH_CUDA_$(CUDA)),$(TORCH_CUDA_$(CUDA)),cpu) PY3_MINOR = $(shell $(PYTHON) -c "import sys; print(sys.version_info.minor)") -TORCH_Linux = http://download.pytorch.org/whl/${TORCH_PLATFORM}/torch-1.0.0-cp3${PY3_MINOR}-cp3${PY3_MINOR}m-linux_x86_64.whl +TORCH_Linux = http://download.pytorch.org/whl/${TORCH_PLATFORM}/torch-1.2.0-cp3${PY3_MINOR}-cp3${PY3_MINOR}m-manylinux1_x86_64.whl TORCH_Darwin = torch TORCH ?= $(TORCH_$(shell uname -s)) # determine correct cupy package to install -CUPY_8.0 = cupy-cuda80 -CUPY_9.0 = cupy-cuda90 +CUPY_9.2 = cupy-cuda92 CUPY_10.0 = cupy-cuda100 CUPY ?= $(CUPY_$(CUDA)) diff --git a/README.md b/README.md index e7d286e..a7d3062 100644 --- a/README.md +++ b/README.md @@ -31,9 +31,9 @@ expect to get your hands dirty. # Contents 1. [Installing system prerequisites](#installing-system-prerequisites) -2. [Installing Taiyaki](#installation) +2. [Installing Taiyaki](#installing-taiyaki) 3. [Tests](#tests) -4. [Walk through](#walk-through) +4. [Walk through](#walk-throughs-and-further-documentation) 5. [Workflows](#workflows)
* [Using the workflow Makefile](#using-the-workflow-makefile)
* [Steps from fast5 files to basecalling](#steps-from-fast5-files-to-basecalling)
@@ -86,7 +86,7 @@ Windows is not supported. If you intend to use Taiyaki with a GPU, make sure you have installed and set up [CUDA](#cuda) before proceeding. --- -## Install Taiyaki in a new virtual environment +## Install Taiyaki in a new virtual environment (RECOMMENDED) We recommend installing Taiyaki in a self-contained [virtual environment](https://docs.python.org/3/tutorial/venv.html). @@ -99,6 +99,9 @@ You will need to run `source venv/bin/activate` at the start of each session whe ## Install Taiyaki system-wide or into activated Python environment +This is not the recommended installation method: we recommend that you install taiyaki in its +[own virtual environment](#install-taiyaki-in-a-new-virtual-environment) if possible. + Taiyaki can be installed from source using either: python3 setup.py install @@ -111,14 +114,13 @@ Alternatively, you can use pip with either: # Tests -Tests can be run as follows: - - make workflow #runs scripts which carry out the workflow for basecall-network training and for squiggle-predictor training - make acctest #runs acceptance tests - make unittest #runs unit tests - make multiGPU_test #runs multi-GPU test (GPUs 0 and 1 must be available, and CUDA must be installed - see below) +Tests can be run as follows, provided that the recommended `make install` installation method was used: -If Taiyaki has been installed in a virtual environment, it will have to activated before running tests: `source venv/bin/activate`. To deactivate, run `deactivate`. + source venv/bin/activate # activates taiyaki virtual environment (do this first) + make workflow # runs scripts which carry out the workflow for basecall-network training and for squiggle-predictor training + make acctest # runs acceptance tests + make unittest # runs unit tests + make multiGPU_test # runs multi-GPU test (GPUs 0 and 1 must be available, and CUDA must be installed - see below) # Walk throughs and further documentation For a walk-through of Taiyaki model training, including how to obtain sample training data, see [docs/walkthrough.rst](docs/walkthrough.rst). diff --git a/bin/basecall.py b/bin/basecall.py index 6dd8cfd..610d96c 100755 --- a/bin/basecall.py +++ b/bin/basecall.py @@ -30,21 +30,21 @@ add_common_command_args(parser, 'alphabet device input_folder input_strand_list limit output quiet recursive version'.split()) -parser.add_argument("--chunk_size", type=Positive(int), +parser.add_argument("--chunk_size", type=Positive(int), metavar="blocks", default=basecall_helpers._DEFAULT_CHUNK_SIZE, - help="Size of signal chunks sent to GPU") + help="Size of signal chunks sent to GPU is chunk_size * model stride") parser.add_argument("--max_concurrent_chunks", type=Positive(int), default=128, help="Maximum number of chunks to call at " "once. Lower values will consume less (GPU) RAM.") -parser.add_argument("--modified_base_output", action=FileAbsent, default=None, +parser.add_argument("--modified_base_output", action=FileAbsent, default=None, metavar="mod_basecalls.hdf5", help="Output filename for modified base output.") -parser.add_argument("--overlap", type=NonNegative(int), +parser.add_argument("--overlap", type=NonNegative(int), metavar="blocks", default=basecall_helpers._DEFAULT_OVERLAP, help="Overlap between signal chunks sent to GPU") parser.add_argument('--reverse', default=False, action=AutoBool, help='Reverse sequences in output') parser.add_argument('--scaling', action=FileExists, default=None, - help='Per-read scaling params') + help='Path to TSV containing per-read scaling params') parser.add_argument("model", action=FileExists, help="Model checkpoint file to use for basecalling") diff --git a/bin/train_abinitio.py b/bin/train_abinitio.py index 9b4f143..b5447cf 100755 --- a/bin/train_abinitio.py +++ b/bin/train_abinitio.py @@ -138,7 +138,6 @@ def save_model(network, outdir, index=None): for i in range(args.niteration): - lr_scheduler.step() idx = np.random.randint(len(chunks), size=args.batch_size) indata = chunks[idx].transpose(1, 0) @@ -186,4 +185,7 @@ def save_model(network, outdir, index=None): total_samples = 0 t0 = tn + lr_scheduler.step() + + save_model(network, args.outdir) diff --git a/bin/train_flipflop.py b/bin/train_flipflop.py index be4a17c..1d4f6bd 100755 --- a/bin/train_flipflop.py +++ b/bin/train_flipflop.py @@ -419,8 +419,6 @@ def main(): for i in range(args.niteration): - lr_scheduler.step() - # Chunk length is chosen randomly in the range given but forced to # be a multiple of the stride batch_chunk_len = (np.random.randint( @@ -520,6 +518,9 @@ def main(): # log.write("* GPU{} params:".format(args.local_rank)) #log.write("{}...{}\n".format(v,u)) + lr_scheduler.step() + + if is_lead_process: helpers.save_model(network, args.outdir, model_skeleton=network_save_skeleton) diff --git a/bin/train_squiggle.py b/bin/train_squiggle.py index f2235b7..450c1d7 100755 --- a/bin/train_squiggle.py +++ b/bin/train_squiggle.py @@ -129,7 +129,6 @@ def main(): total_chunks = 0 for i in range(args.niteration): - lr_scheduler.step() # If the logging threshold is 0 then we log all chunks, including those rejected, so pass the log # object into assemble_batch # chunk_batch is a list of dicts. @@ -194,6 +193,7 @@ def main(): log.write(" {:.1%} chunks filtered".format(n_fail / n_tot)) log.write("\n") + lr_scheduler.step() helpers.save_model(conv_net, args.outdir) diff --git a/requirements.txt b/requirements.txt index d342149..62e54e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ ont_fast5_api == 1.2.0 pysam >= 0.15.0 matplotlib >= 2.0.0 scipy >= 1 -torch >= 1, < 1.1 +torch == 1.2 diff --git a/setup.py b/setup.py index 4b8a0cc..3b201ad 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ "matplotlib >= 2.0.0", "pysam >= 0.15.0", "scipy >= 1", - "torch >= 1, < 1.1", + "torch == 1.2" ] diff --git a/taiyaki/__init__.py b/taiyaki/__init__.py index 8eb964f..7ccd5b9 100644 --- a/taiyaki/__init__.py +++ b/taiyaki/__init__.py @@ -1,7 +1,7 @@ """Custard owns my heart!""" __version_info__ = { - 'major': 4, - 'minor': 1, + 'major': 5, + 'minor': 0, 'revision': 0, } __version__ = "{major}.{minor}.{revision}".format(**__version_info__) diff --git a/taiyaki/layers.py b/taiyaki/layers.py index 14855e6..9c63c0e 100755 --- a/taiyaki/layers.py +++ b/taiyaki/layers.py @@ -19,7 +19,8 @@ def init_(param, value): """Set parameter value (inplace) from tensor, numpy array, list or tuple""" value_as_tensor = torch.tensor(value, dtype=param.data.dtype) - param.data.detach_().set_(value_as_tensor) + with torch.no_grad(): + param.set_(value_as_tensor) def random_orthonormal(n, m=None): @@ -595,24 +596,8 @@ def birnn(forward, backward): @torch.jit.script -def logaddexp_fwdbwd(x, y): - z = torch.max(x, y) + torch.log1p(torch.exp(-torch.abs(x - y))) - return z, (x-z).exp(), (y-z).exp() - - -class LogAddExp(torch.autograd.Function): - @staticmethod - def forward(ctx, x, y): - z, xmz, ymz = logaddexp_fwdbwd(x, y) - ctx.save_for_backward(xmz, ymz) - return z - - @staticmethod - def backward(ctx, outgrad): - xmz, ymz = ctx.saved_tensors - return outgrad * xmz, outgrad * ymz - -logaddexp = LogAddExp.apply +def logaddexp(x, y): + return torch.max(x, y) + torch.log1p(torch.exp(-torch.abs(x - y))) @torch.jit.script diff --git a/test/unit/test_layers.py b/test/unit/test_layers.py index c92254e..954a56a 100644 --- a/test/unit/test_layers.py +++ b/test/unit/test_layers.py @@ -326,7 +326,7 @@ def test_cupy_and_non_cupy_same(self): # rtol before softmax = atol after softmax. Therefore I've replaced # the atol with the default value for rtol. print((abs(x1.grad - x2.grad)).max()) - self.assertTrue(torch.allclose(x1.grad, x2.grad, atol=1e-05)) + self.assertTrue(torch.allclose(x1.grad, x2.grad, atol=1e-04)) class UpSampleTest(LayerTest, unittest.TestCase):