diff --git a/README.md b/README.md
index a72a144..9f6d48d 100644
--- a/README.md
+++ b/README.md
@@ -4,15 +4,9 @@
 
 Elixir bindings for [Hugging Face Tokenizers](https://github.com/huggingface/tokenizers).
 
-## Getting started
+## Installation
 
-In order to use `Tokenizers`, you will need Elixir installed. Then create an Elixir project via the `mix` build tool:
-
-```
-$ mix new my_app
-```
-
-Then you can add `Tokenizers` as dependency in your `mix.exs`.
+You can add `:tokenizers` as dependency in your `mix.exs`:
 
 ```elixir
 def deps do
@@ -30,26 +24,27 @@ Mix.install([
 ])
 ```
 
-## Quick example
+## Example
+
+You can use any pre-trained tokenizer from any model repo on Hugging Face Hub, such as [bert-base-cased](https://huggingface.co/bert-base-cased).
 
 ```elixir
-# Go get a tokenizer -- any from the Hugging Face models repo will do
 {:ok, tokenizer} = Tokenizers.Tokenizer.from_pretrained("bert-base-cased")
 {:ok, encoding} = Tokenizers.Tokenizer.encode(tokenizer, "Hello there!")
 Tokenizers.Encoding.get_tokens(encoding)
-# ["Hello", "there", "!"]
+#=> ["Hello", "there", "!"]
 Tokenizers.Encoding.get_ids(encoding)
-# [8667, 1175, 106]
+#=> [8667, 1175, 106]
 ```
 
 The [notebooks](./notebooks) directory has [an introductory Livebook](./notebooks/pretrained.livemd) to give you a feel for the API.
 
 ## Contributing
 
-Tokenizers uses Rust to call functionality from the Hugging Face Tokenizers library. While 
-Rust is not necessary to use Tokenizers as a package, you need Rust tooling installed on 
-your machine if you want to compile from source, which is the case when contributing to 
-Tokenizers. In particular, you will need Rust Stable, which can be installed with 
+Tokenizers uses Rust to call functionality from the Hugging Face Tokenizers library. While
+Rust is not necessary to use Tokenizers as a package, you need Rust tooling installed on
+your machine if you want to compile from source, which is the case when contributing to
+Tokenizers. In particular, you will need Rust Stable, which can be installed with
 [Rustup](https://rust-lang.github.io/rustup/installation/index.html).
 
 ## License
diff --git a/lib/tokenizers.ex b/lib/tokenizers.ex
index 8865f76..542f302 100644
--- a/lib/tokenizers.ex
+++ b/lib/tokenizers.ex
@@ -4,16 +4,19 @@ defmodule Tokenizers do
 
   Hugging Face describes the Tokenizers library as:
 
-  > Fast State-of-the-art tokenizers, optimized for both research and production
+  > Fast State-of-the-art tokenizers, optimized for both research and
+  > production
   >
-  > 🤗 Tokenizers provides an implementation of today’s most used tokenizers, with a focus on performance and versatility. These tokenizers are also used in 🤗 Transformers.
+  > 🤗 Tokenizers provides an implementation of today’s most used
+  > tokenizers, with a focus on performance and versatility. These
+  > tokenizers are also used in 🤗 Transformers.
 
-  This library has bindings to use pretrained tokenizers. Support for building and training
-  a tokenizer from scratch is forthcoming.
+  A tokenizer is effectively a pipeline of transformations that take
+  a text input and return an encoded version of that text (`t:Tokenizers.Encoding.t/0`).
 
-  A tokenizer is effectively a pipeline of transforms to take some input text and return a
-  `Tokenizers.Encoding.t()`. The main entrypoint to this library is the `Tokenizers.Tokenizer`
-  module, which holds the `Tokenizers.Tokenizer.t()` struct, a container holding the constituent
-  parts of the pipeline. Most functionality is there.
+  The main entrypoint to this library is the `Tokenizers.Tokenizer`
+  module, which defines the `t:Tokenizers.Tokenizer.t/0` struct, a
+  container holding the constituent parts of the pipeline. Most
+  functionality is in that module.
   """
 end
diff --git a/lib/tokenizers/added_token.ex b/lib/tokenizers/added_token.ex
index a9aa430..a531833 100644
--- a/lib/tokenizers/added_token.ex
+++ b/lib/tokenizers/added_token.ex
@@ -1,53 +1,51 @@
 defmodule Tokenizers.AddedToken do
   @moduledoc """
-  This struct represents AddedTokens
+  This struct represents a token added to tokenizer vocabulary.
   """
 
   @type t() :: %__MODULE__{resource: reference()}
   defstruct [:resource]
 
-  @typedoc """
-  Options for added token initialisation. All options can be ommited.
-  """
-  @type opts() :: [
-          special: boolean(),
-          single_word: boolean(),
-          lstrip: boolean(),
-          rstrip: boolean(),
-          normalized: boolean()
-        ]
-
   @doc """
-  Create a new AddedToken.
+  Builds a new added token.
+
+  ## Options
+
+    * `:special` - defines whether this token is a special token.
+      Defaults to `false`
 
-  * `:special` (default `false`) - defines whether this token is a special token.
+    * `:single_word` - defines whether this token should only match
+      single words. If `true`, this token will never match inside of a
+      word. For example the token `ing` would match on `tokenizing` if
+      this option is `false`. The notion of ”inside of a word” is
+      defined by the word boundaries pattern in regular expressions
+      (i.e. the token should start and end with word boundaries).
+      Defaults to `false`
 
-  * `:single_word` (default `false`) - defines whether this token should only match single words.
-    If `true`, this token will never match inside of a word. For example the token `ing` would
-    match on `tokenizing` if this option is `false`, but not if it is `true`.
-    The notion of ”inside of a word” is defined by the word boundaries pattern
-    in regular expressions (i.e. the token should start and end with word boundaries).
+    * `:lstrip` - defines whether this token should strip all potential
+      whitespace on its left side. If `true`, this token will greedily
+      match any whitespace on its left. For example if we try to match
+      the token `[MASK]` with `lstrip=true`, in the text `"I saw a [MASK]"`,
+      we would match on `" [MASK]"`. (Note the space on the left).
+      Defaults to `false`
 
-  * `:lstrip` (default `false`) - defines whether this token should strip all potential
-    whitespaces on its left side.
-    If `true`, this token will greedily match any whitespace on its left.
-    For example if we try to match the token `[MASK]` with `lstrip=true`,
-    in the text `"I saw a [MASK]"`, we would match on `" [MASK]"`. (Note the space on the left).
+    * `:rstrip` - defines whether this token should strip all potential
+      whitespaces on its right side. If `true`, this token will greedily
+      match any whitespace on its right. It works just like `:lstrip`,
+      but on the right. Defaults to `false`
 
-  * `:rstrip` (default `false`) - defines whether this token should strip all potential
-    whitespaces on its right side.
-    If `true`, this token will greedily match any whitespace on its right.
-    It works just like `lstrip` but on the right.
+    * `:normalized` - defines whether this token should match against
+      the normalized version of the input text. For example, with the
+      added token `"yesterday"`, and a normalizer in charge of
+      lowercasing the text, the token could be extract from the input
+      `"I saw a lion Yesterday"`. If `true`, the token will be extracted
+      from the normalized input `"i saw a lion yesterday"`. If `false`,
+      the token will be extracted from the original input
+      `"I saw a lion Yesterday"`. Defaults to `false` for special tokens
+      and `true` otherwise
 
-  * `:normalized` (default `true` for not special tokens, `false` for special tokens) -
-    defines whether this token should match against the normalized version of the input text.
-    For example, with the added token `"yesterday"`,
-    and a normalizer in charge of lowercasing the text,
-    the token could be extract from the input `"I saw a lion Yesterday"`.
-    If `true`, the token will be extracted from the normalized input `"i saw a lion yesterday"`.
-    If `false`, the token will be extracted from the original input `"I saw a lion Yesterday"`.
   """
-  @spec new(token :: String.t(), opts :: opts()) :: t()
+  @spec new(token :: String.t(), keyword()) :: t()
   defdelegate new(token, opts \\ []), to: Tokenizers.Native, as: :added_token_new
 
   @doc """
diff --git a/lib/tokenizers/decoder.ex b/lib/tokenizers/decoder.ex
index e46a25b..b3f546e 100644
--- a/lib/tokenizers/decoder.ex
+++ b/lib/tokenizers/decoder.ex
@@ -1,10 +1,16 @@
 defmodule Tokenizers.Decoder do
   @moduledoc """
-  The Decoder knows how to go from the IDs used by the Tokenizer, back to a readable piece of text.
-  Some Normalizer and PreTokenizer use special characters or identifiers that need to be reverted.
+  Decoders and decoding functions.
+
+  Decoder transforms a sequence of token ids back to a readable piece
+  of text.
+
+  Some normalizers and pre-tokenizers use special characters or
+  identifiers that need special logic to be reverted.
   """
 
   defstruct [:resource]
+
   @type t() :: %__MODULE__{resource: reference()}
 
   @doc """
@@ -13,113 +19,104 @@ defmodule Tokenizers.Decoder do
   @spec decode(t(), [String.t()]) :: {:ok, String.t()} | {:error, any()}
   defdelegate decode(decoder, tokens), to: Tokenizers.Native, as: :decoders_decode
 
-  @typedoc """
-  Options for BPE decoder initialization. All options can be ommited.
+  @doc """
+  Creates a BPE decoder.
 
-  * `suffix` - The suffix to add to the end of each word, defaults to `</w>`
-  """
-  @type bpe_options :: [suffix: String.t()]
+  ## Options
+
+    * `suffix` - the suffix to add to the end of each word. Defaults
+      to `</w>`
 
-  @doc """
-  Creates new BPE decoder
   """
-  @spec bpe(bpe_options :: bpe_options()) :: t()
-  defdelegate bpe(options \\ []), to: Tokenizers.Native, as: :decoders_bpe
+  @spec bpe(keyword()) :: t()
+  defdelegate bpe(opts \\ []), to: Tokenizers.Native, as: :decoders_bpe
 
   @doc """
-  Creates new ByteFallback decoder
+  Creates a ByteFallback decoder.
   """
   @spec byte_fallback() :: t()
   defdelegate byte_fallback(), to: Tokenizers.Native, as: :decoders_byte_fallback
 
   @doc """
-  Creates new ByteLevel decoder
+  Creates a ByteLevel decoder.
   """
   @spec byte_level() :: t()
   defdelegate byte_level(), to: Tokenizers.Native, as: :decoders_byte_level
 
-  @typedoc """
-  Options for CTC decoder initialization. All options can be ommited.
+  @doc """
+  Creates a CTC decoder.
 
-  * `pad_token` - The token used for padding, defaults to `<pad>`
-  * `word_delimiter_token` - The token used for word delimiter, defaults to `|`
-  * `cleanup` - Whether to cleanup tokenization artifacts, defaults to `true`
-  """
-  @type ctc_options :: [
-          pad_token: String.t(),
-          word_delimiter_token: String.t(),
-          cleanup: boolean()
-        ]
+  ## Options
+
+    * `pad_token` - the token used for padding. Defaults to `<pad>`
+
+    * `word_delimiter_token` - the token used for word delimiter.
+      Defaults to `|`
+
+    * `cleanup` - whether to cleanup tokenization artifacts, defaults
+      to `true`
 
-  @doc """
-  Creates new CTC decoder
   """
-  @spec ctc(ctc_options :: ctc_options()) :: t()
-  defdelegate ctc(options \\ []), to: Tokenizers.Native, as: :decoders_ctc
+  @spec ctc(keyword()) :: t()
+  defdelegate ctc(opts \\ []), to: Tokenizers.Native, as: :decoders_ctc
 
   @doc """
-  Creates new Fuse decoder
+  Creates a Fuse decoder.
   """
   @spec fuse :: t()
   defdelegate fuse(), to: Tokenizers.Native, as: :decoders_fuse
 
-  @typedoc """
-  Options for Metaspace decoder initialization. All options can be ommited.
+  @doc """
+  Creates a Metaspace decoder.
+
+  ## Options
 
-  * `replacement` - The replacement character, defaults to `▁` (as char)
-  * `add_prefix_space` - Whether to add a space to the first word, defaults to `true`
-  """
+    * `replacement` - the replacement character. Defaults to `▁`
+      (as char)
 
-  @type metaspace_options :: [
-          replacement: char(),
-          add_prefix_space: boolean()
-        ]
+    * `add_prefix_space` - whether to add a space to the first word.
+      Defaults to `true`
 
-  @doc """
-  Creates new Metaspace decoder
   """
-  @spec metaspace(metaspace_options :: metaspace_options()) :: t()
-  defdelegate metaspace(options \\ []),
+  @spec metaspace(keyword()) :: t()
+  defdelegate metaspace(opts \\ []),
     to: Tokenizers.Native,
     as: :decoders_metaspace
 
   @doc """
-  Creates new Replace decoder
+  Creates a Replace decoder.
   """
-  @spec replace(pattern :: String.t(), content :: String.t()) :: t()
+  @spec replace(String.t(), String.t()) :: t()
   defdelegate replace(pattern, content), to: Tokenizers.Native, as: :decoders_replace
 
   @doc """
-  Creates new Sequence decoder
+  Combines a list of decoders into a single sequential decoder.
   """
   @spec sequence(decoders :: [t()]) :: t()
   defdelegate sequence(decoders), to: Tokenizers.Native, as: :decoders_sequence
 
   @doc """
-  Creates new Strip decoder.
+  Creates a Strip decoder.
 
   It expects a character and the number of times to strip the
   character on `left` and `right` sides.
   """
-  @spec strip(content :: char(), left :: non_neg_integer(), right :: non_neg_integer()) :: t()
+  @spec strip(char(), non_neg_integer(), non_neg_integer()) :: t()
   defdelegate strip(content, left, right), to: Tokenizers.Native, as: :decoders_strip
 
-  @typedoc """
-  Options for WordPiece decoder initialization. All options can be ommited.
+  @doc """
+  Creates a WordPiece decoder.
 
-  * `prefix` - The prefix to use for subwords, defaults to `##`
-  * `cleanup` - Whether to cleanup tokenization artifacts, defaults to `true`
-  """
-  @type word_piece_options :: [
-          prefix: String.t(),
-          cleanup: boolean()
-        ]
+  ## Options
+
+    * `prefix` - The prefix to use for subwords. Defaults to `##`
+
+    * `cleanup` - Whether to cleanup tokenization artifacts. Defaults
+      to `true`
 
-  @doc """
-  Creates new WordPiece decoder
   """
-  @spec word_piece(word_piece_options :: word_piece_options()) :: t()
-  defdelegate word_piece(options \\ []),
+  @spec word_piece(keyword()) :: t()
+  defdelegate word_piece(opts \\ []),
     to: Tokenizers.Native,
     as: :decoders_wordpiece
 end
diff --git a/lib/tokenizers/encoding.ex b/lib/tokenizers/encoding.ex
index 5a5ca20..e50b42b 100644
--- a/lib/tokenizers/encoding.ex
+++ b/lib/tokenizers/encoding.ex
@@ -1,33 +1,39 @@
 defmodule Tokenizers.Encoding do
   @moduledoc """
-  The struct and associated functions for an encoding, the output of a tokenizer.
+  Encoding is the result of passing a text through tokenization pipeline.
 
-  Use these functions to retrieve the inputs needed for a natural language processing machine learning model.
+  This function defines a struct and a number of functions to retrieve
+  information about the encoded text.
+
+  For further machine learning processing you most likely want to
+  access the encoded token ids via `get_ids/1`. If you want to convert
+  the ids to a tensor, use `get_u32_ids/1` to get a zero-copy binary.
   """
 
-  @type t :: %__MODULE__{resource: reference()}
   defstruct resource: nil
 
+  @type t :: %__MODULE__{resource: reference()}
+
   @doc """
-  Get the number of tokens in an encoding.
+  Returns the number of tokens in `encoding`.
   """
   @spec get_length(t()) :: non_neg_integer()
   defdelegate get_length(encoding), to: Tokenizers.Native, as: :encoding_get_length
 
   @doc """
-  Return the number of sequences combined in this Encoding
+  Returns the number of sequences combined in `encoding`.
   """
   @spec get_n_sequences(t()) :: non_neg_integer()
   defdelegate get_n_sequences(encoding), to: Tokenizers.Native, as: :encoding_get_n_sequences
 
   @doc """
-  Set the given sequence id for the whole range of tokens contained in this Encoding.
+  Sets the given sequence id for all tokens contained in `encoding`.
   """
   @spec set_sequence_id(t(), non_neg_integer()) :: t()
   defdelegate set_sequence_id(encoding, id), to: Tokenizers.Native, as: :encoding_set_sequence_id
 
   @doc """
-  Get the ids from an encoding.
+  Returns the ids from `encoding`.
   """
   @spec get_ids(t()) :: [integer()]
   defdelegate get_ids(encoding), to: Tokenizers.Native, as: :encoding_get_ids
@@ -39,7 +45,7 @@ defmodule Tokenizers.Encoding do
   defdelegate get_u32_ids(encoding), to: Tokenizers.Native, as: :encoding_get_u32_ids
 
   @doc """
-  Get token type ids from an encoding.
+  Returns token type ids from `encoding`.
   """
   @spec get_type_ids(t()) :: [integer()]
   defdelegate get_type_ids(encoding), to: Tokenizers.Native, as: :encoding_get_type_ids
@@ -51,7 +57,7 @@ defmodule Tokenizers.Encoding do
   defdelegate get_u32_type_ids(encoding), to: Tokenizers.Native, as: :encoding_get_u32_type_ids
 
   @doc """
-  Get the attention mask from an encoding.
+  Returns the attention mask from `encoding`.
   """
   @spec get_attention_mask(t()) :: [integer()]
   defdelegate get_attention_mask(encoding),
@@ -67,7 +73,7 @@ defmodule Tokenizers.Encoding do
     as: :encoding_get_u32_attention_mask
 
   @doc """
-  Get the special tokens mask from an encoding.
+  Returns the special tokens mask from `encoding`.
   """
   @spec get_special_tokens_mask(t()) :: [integer()]
   defdelegate get_special_tokens_mask(encoding),
@@ -83,25 +89,25 @@ defmodule Tokenizers.Encoding do
     as: :encoding_get_u32_special_tokens_mask
 
   @doc """
-  Get the tokens from an encoding.
+  Returns the tokens from `encoding`.
   """
   @spec get_tokens(t()) :: [binary()]
   defdelegate get_tokens(encoding), to: Tokenizers.Native, as: :encoding_get_tokens
 
   @doc """
-  Get word ids from an encoding.
+  Returns word ids from `encoding`.
   """
   @spec get_word_ids(t()) :: [non_neg_integer() | nil]
   defdelegate get_word_ids(encoding), to: Tokenizers.Native, as: :encoding_get_word_ids
 
   @doc """
-  Get sequence ids from an encoding.
+  Returns sequence ids from `encoding`.
   """
   @spec get_sequence_ids(t()) :: [non_neg_integer() | nil]
   defdelegate get_sequence_ids(encoding), to: Tokenizers.Native, as: :encoding_get_sequence_ids
 
   @doc """
-  Get offsets from an encoding.
+  Returns offsets from `encoding`.
 
   The offsets are expressed in terms of UTF-8 bytes.
   """
@@ -109,14 +115,14 @@ defmodule Tokenizers.Encoding do
   defdelegate get_offsets(encoding), to: Tokenizers.Native, as: :encoding_get_offsets
 
   @doc """
-  Get the overflow from an encoding.
+  Returns the overflow from `encoding`.
   """
   @spec get_overflowing(t()) :: [t()]
   defdelegate get_overflowing(encoding), to: Tokenizers.Native, as: :encoding_get_overflowing
 
   @doc """
-  Get the encoded tokens corresponding to the word at the given index in the input sequence,
-  with the form (start_token, end_token + 1)
+  Returns the encoded tokens corresponding to the word at the given
+  index in the input sequence, with the form `{start_token, end_token + 1}`.
   """
   @spec word_to_tokens(t(), non_neg_integer(), non_neg_integer()) ::
           {non_neg_integer(), non_neg_integer()} | nil
@@ -125,7 +131,8 @@ defmodule Tokenizers.Encoding do
     as: :encoding_word_to_tokens
 
   @doc """
-  Get the offsets of the word at the given index in the input sequence.
+  Returns the offsets of the word at the given index in the input
+  sequence.
   """
   @spec word_to_chars(t(), non_neg_integer(), non_neg_integer()) ::
           {non_neg_integer(), non_neg_integer()} | nil
@@ -134,7 +141,7 @@ defmodule Tokenizers.Encoding do
     as: :encoding_word_to_chars
 
   @doc """
-  Returns the index of the sequence containing the given token
+  Returns the index of the sequence containing the given token.
   """
   @spec token_to_sequence(t(), non_neg_integer()) :: non_neg_integer() | nil
   defdelegate token_to_sequence(encoding, token),
@@ -142,21 +149,21 @@ defmodule Tokenizers.Encoding do
     as: :encoding_token_to_sequence
 
   @doc """
-  Get the offsets of the token at the given index.
+  Returns the offsets of the token at the given index.
   """
   @spec token_to_chars(t(), non_neg_integer()) ::
           {non_neg_integer(), {non_neg_integer(), non_neg_integer()}} | nil
   defdelegate token_to_chars(encoding, token), to: Tokenizers.Native, as: :encoding_token_to_chars
 
   @doc """
-  Get the word that contains the token at the given index.
+  Returns the word that contains the token at the given index.
   """
   @spec token_to_word(t(), non_neg_integer()) ::
           {non_neg_integer(), non_neg_integer()} | nil
   defdelegate token_to_word(encoding, token), to: Tokenizers.Native, as: :encoding_token_to_word
 
   @doc """
-  Get the token that contains the given char.
+  Returns the token that contains the given char.
   """
   @spec char_to_token(t(), non_neg_integer(), non_neg_integer()) ::
           non_neg_integer() | nil
@@ -165,7 +172,7 @@ defmodule Tokenizers.Encoding do
     as: :encoding_char_to_token
 
   @doc """
-  Get the word that contains the given char.
+  Returns the word that contains the given char.
   """
   @spec char_to_word(t(), non_neg_integer(), non_neg_integer()) ::
           non_neg_integer() | nil
@@ -173,48 +180,52 @@ defmodule Tokenizers.Encoding do
     to: Tokenizers.Native,
     as: :encoding_char_to_word
 
-  @typedoc """
-  Options for padding. All options can be ommited.
+  @doc """
+  Pad the encoding to the given length.
+
+  ## Options
 
-  * `direction` (default `:right`) - The padding direction.
-  * `pad_id` (default `0`) - The id corresponding to the padding token.
-  * `pad_type_id` (default `0`) - The type ID corresponding to the padding token.
-  * `pad_token` (default `[PAD]`) - The padding token to use.
+    * `direction` (default `:right`) - the padding direction
 
-  """
-  @type padding_opts :: [
-          pad_id: non_neg_integer(),
-          pad_type_id: non_neg_integer(),
-          pad_token: String.t(),
-          direction: :left | :right
-        ]
+    * `pad_id` (default `0`) - the id corresponding to the padding
+      token
+
+    * `pad_type_id` (default `0`) - the type ID corresponding to the
+      padding token
+
+    * `pad_token` (default `[PAD]`) - the padding token to use
 
-  @doc """
-  Pad the encoding to the given length.
   """
-  @spec pad(t(), non_neg_integer(), padding_opts()) :: t()
+  @spec pad(t(), non_neg_integer(), opts) :: t()
+        when opts: [
+               pad_id: non_neg_integer(),
+               pad_type_id: non_neg_integer(),
+               pad_token: String.t(),
+               direction: :left | :right
+             ]
   defdelegate pad(encoding, target_length, opts \\ []),
     to: Tokenizers.Native,
     as: :encoding_pad
 
-  @typedoc """
-  Options for truncation. All options can be ommited.
-
-  * `stride` (default `0`) - The length of previous content to be included in each overflowing piece.
-  * `direction` (default `:right`) - The truncation direction.
-  """
-  @type truncation_opts :: [stride: non_neg_integer(), direction: :left | :right]
-
   @doc """
   Truncate the encoding to the given length.
+
+  ## Options
+
+    * `stride` (default `0`) - the length of previous content to be
+      included in each overflowing piece
+
+    * `direction` (default `:right`) - the truncation direction
+
   """
-  @spec truncate(t(), non_neg_integer(), truncation_opts()) :: t()
+  @spec truncate(t(), non_neg_integer(), opts) :: t()
+        when opts: [stride: non_neg_integer(), direction: :left | :right]
   defdelegate truncate(encoding, max_length, opts \\ []),
     to: Tokenizers.Native,
     as: :encoding_truncate
 
   @doc """
-  Returns the number of tokens in an `t()`.
+  Returns the number of tokens in `encoding`.
   """
   @spec n_tokens(encoding :: t()) :: non_neg_integer()
   defdelegate n_tokens(encoding), to: Tokenizers.Native, as: :encoding_get_length
diff --git a/lib/tokenizers/http_client.ex b/lib/tokenizers/http_client.ex
index a319384..cdb9f00 100644
--- a/lib/tokenizers/http_client.ex
+++ b/lib/tokenizers/http_client.ex
@@ -15,7 +15,7 @@ defmodule Tokenizers.HTTPClient do
 
     * `:method` - An HTTP method. By default it uses the `:get` method.
 
-    * `:base_url` - The base URL to make requests. By default is #{inspect(@base_url)}. 
+    * `:base_url` - The base URL to make requests. By default is #{inspect(@base_url)}.
 
     * `:url` - A path to a resource. By default is "".
 
diff --git a/lib/tokenizers/model.ex b/lib/tokenizers/model.ex
index c674e56..f310c79 100644
--- a/lib/tokenizers/model.ex
+++ b/lib/tokenizers/model.ex
@@ -3,34 +3,36 @@ defmodule Tokenizers.Model do
   The struct and associated functions for the tokenizer model.
   """
 
+  defstruct [:resource]
+
   @typedoc """
   Represents different kind of models that can be used across the library.
   """
   @type t() :: %__MODULE__{resource: reference()}
-  defstruct [:resource]
 
   @doc """
   Retrieves information about the model.
 
   Information retrieved differs per model but all include `model_type`.
   """
-  @spec info(model :: t()) :: map()
+  @spec info(t()) :: map()
   defdelegate info(model), to: Tokenizers.Native, as: :models_info
 
-  @typedoc """
-  Options to save the model. All options can be ommited.
+  @doc """
+  Saves the given model in the given directory.
 
-  * `:prefix` (default `""`) - The prefix to use for all the files that will get created.
-  """
-  @type save_opts() :: [prefix: String.t()]
+  This function generates a couple files with predefined names, you
+  can specify `:prefix` to scope them. Existing files with the same
+  names in this directory will be overridden.
+
+  ## Options
+
+    * `:prefix` - the prefix to use for all the files that will get
+      created. Defaults to `""`
 
-  @doc """
-  Save the current model in the given folder, using the given name for the various files that will get created.
-  Any file with the same name that already exist in this folder will be overwritten.
   """
-  @spec save(model :: t(), folder :: String.t(), opts :: save_opts()) ::
-          {:ok, file_paths :: [String.t()]} | {:error, any()}
-  defdelegate save(model, folder, opts \\ []), to: Tokenizers.Native, as: :models_save
+  @spec save(t(), String.t(), keyword()) :: {:ok, file_paths :: [String.t()]} | {:error, any()}
+  defdelegate save(model, directory, opts \\ []), to: Tokenizers.Native, as: :models_save
 end
 
 defimpl Inspect, for: Tokenizers.Model do
diff --git a/lib/tokenizers/model/bpe.ex b/lib/tokenizers/model/bpe.ex
index 95fe0ed..761c178 100644
--- a/lib/tokenizers/model/bpe.ex
+++ b/lib/tokenizers/model/bpe.ex
@@ -1,14 +1,23 @@
 defmodule Tokenizers.Model.BPE do
   @typedoc """
-  Options for model initialisation. All options can be ommited.
-
-  * `:cache_capacity` (default `10_000`) - The number of words that the BPE cache can contain.
-    The cache allows to speed-up the process by keeping
-    the result of the merge operations for a number of words.
-  * `:dropout` - The BPE dropout to use. Must be an float between 0 and 1
-  * `:unk_token` - The unknown token to be used by the model
-  * `:continuing_subword_prefix` - The prefix to attach to subword units that don't represent a beginning of word
-  * `:end_of_word_suffix` - The suffix to attach to subword units that represent an end of word
+  Options for model initialisation.
+
+    * `:cache_capacity` - the number of words that the BPE cache can
+      contain. The cache allows to speed-up the process by keeping
+      the result of the merge operations for a number of words.
+      Defaults to `10_000`
+
+    * `:dropout` - The BPE dropout to use. Must be an float between
+      0 and 1
+
+    * `:unk_token` - The unknown token to be used by the model
+
+    * `:continuing_subword_prefix` - The prefix to attach to subword
+      units that don't represent a beginning of word
+
+    * `:end_of_word_suffix` - The suffix to attach to subword units
+      that represent an end of word
+
   """
   @type options() :: [
           cache_capacity: number(),
@@ -21,30 +30,26 @@ defmodule Tokenizers.Model.BPE do
         ]
 
   @doc """
-  Instantiate a BPE model from the given vocab and merges
+  Instantiate a BPE model from the given vocab and merges.
   """
   @spec init(
-          vocab :: %{String.t() => integer()},
-          merges :: [{String.t(), String.t()}],
-          options :: options()
+          %{String.t() => integer()},
+          [{String.t(), String.t()}],
+          options()
         ) :: {:ok, Tokenizers.Model.t()}
   defdelegate init(vocab, merges, options \\ []), to: Tokenizers.Native, as: :models_bpe_init
 
   @doc """
-  Instantiate an empty BPE Model
+  Instantiate an empty BPE model.
   """
   @spec empty() :: {:ok, Tokenizers.Model.t()}
   defdelegate empty(), to: Tokenizers.Native, as: :models_bpe_empty
 
   @doc """
-  Instantiate a BPE model from the given vocab and merges files
+  Instantiate a BPE model from the given vocab and merges files.
   """
-  @spec from_file(
-          vocab :: String.t(),
-          merges :: String.t(),
-          options :: options()
-        ) :: {:ok, Tokenizers.Model.t()}
-  defdelegate from_file(vocab, merges, options \\ []),
+  @spec from_file(String.t(), String.t(), options()) :: {:ok, Tokenizers.Model.t()}
+  defdelegate from_file(vocab_path, merges_path, options \\ []),
     to: Tokenizers.Native,
     as: :models_bpe_from_file
 end
diff --git a/lib/tokenizers/model/unigram.ex b/lib/tokenizers/model/unigram.ex
index fc70da0..fe91976 100644
--- a/lib/tokenizers/model/unigram.ex
+++ b/lib/tokenizers/model/unigram.ex
@@ -1,20 +1,18 @@
 defmodule Tokenizers.Model.Unigram do
   @typedoc """
-  Options for model initialisation. All options can be ommited.
+  Options for model initialisation.
+
+    * `:unk_id`- the unknown token id to be used by the model
 
-  * `:unk_id`- The unknown token id to be used by the model.
   """
   @type options() :: [
           unk_id: float()
         ]
 
   @doc """
-  Instantiate a Unigram model from the given vocab
+  Instantiate a Unigram model from the given vocab.
   """
-  @spec init(
-          vocab :: [{String.t(), number()}],
-          options :: options()
-        ) :: {:ok, Tokenizers.Model.t()}
+  @spec init([{String.t(), number()}], options()) :: {:ok, Tokenizers.Model.t()}
   defdelegate init(vocab, options \\ []),
     to: Tokenizers.Native,
     as: :models_unigram_init
diff --git a/lib/tokenizers/model/wordlevel.ex b/lib/tokenizers/model/wordlevel.ex
index bd58d92..082ce50 100644
--- a/lib/tokenizers/model/wordlevel.ex
+++ b/lib/tokenizers/model/wordlevel.ex
@@ -1,15 +1,17 @@
 defmodule Tokenizers.Model.WordLevel do
   @typedoc """
-  Options for model initialisation. All options can be ommited.
+  Options for model initialisation.
+
+    * `:unk_token` - the unknown token to be used by the model. Defaults
+      to "[UNK]"
 
-  * `:unk_token` (default `"[UNK]"`) - The unknown token to be used by the model.
   """
   @type options() :: [
           unk_token: String.t()
         ]
 
   @doc """
-  Instantiate a WordLevel model from the given vocab
+  Instantiate a WordLevel model from the given vocab.
   """
   @spec init(
           vocab :: %{String.t() => integer()},
@@ -20,19 +22,16 @@ defmodule Tokenizers.Model.WordLevel do
     as: :models_wordlevel_init
 
   @doc """
-  Instantiate an empty WordLevel model
+  Instantiate an empty WordLevel model.
   """
   @spec empty() :: {:ok, Tokenizers.Model.t()}
   defdelegate empty(), to: Tokenizers.Native, as: :models_wordlevel_empty
 
   @doc """
-  Instantiate a WordLevel model from the given vocab file
+  Instantiate a WordLevel model from the given vocab file.
   """
-  @spec from_file(
-          vocab :: String.t(),
-          options :: options()
-        ) :: {:ok, Tokenizers.Model.t()}
-  defdelegate from_file(vocab, options \\ []),
+  @spec from_file(String.t(), options()) :: {:ok, Tokenizers.Model.t()}
+  defdelegate from_file(vocab_path, options \\ []),
     to: Tokenizers.Native,
     as: :models_wordlevel_from_file
 end
diff --git a/lib/tokenizers/model/wordpiece.ex b/lib/tokenizers/model/wordpiece.ex
index a678d0c..fd8f21e 100644
--- a/lib/tokenizers/model/wordpiece.ex
+++ b/lib/tokenizers/model/wordpiece.ex
@@ -1,10 +1,16 @@
 defmodule Tokenizers.Model.WordPiece do
   @typedoc """
-  Options for model initialisation. All options can be ommited.
+  Options for model initialisation.
+
+    * `:unk_token`  - the unknown token to be used by the model.
+      Defaults to `"[UNK]"`
+
+    * `:max_input_chars_per_word` - the maximum number of characters
+      to authorize in a single word. Defaults to `100`
+
+    * `:continuing_subword_prefix`  - the prefix to attach to subword
+      units that don't represent a beginning of word Defaults to `"##"`
 
-  * `:unk_token` (default `"[UNK]"`) - The unknown token to be used by the model.
-  * `:max_input_chars_per_word` (default `100`) - The maximum number of characters to authorize in a single word.
-  * `:continuing_subword_prefix` (default `"##"`) - The prefix to attach to subword units that don't represent a beginning of word
   """
   @type options() :: [
           unk_token: String.t(),
@@ -13,30 +19,24 @@ defmodule Tokenizers.Model.WordPiece do
         ]
 
   @doc """
-  Instantiate a WordPiece model from the given vocab
+  Instantiate a WordPiece model from the given vocab.
   """
-  @spec init(
-          vocab :: %{String.t() => integer()},
-          options :: options()
-        ) :: {:ok, Tokenizers.Model.t()}
+  @spec init(%{String.t() => integer()}, options()) :: {:ok, Tokenizers.Model.t()}
   defdelegate init(vocab, options \\ []),
     to: Tokenizers.Native,
     as: :models_wordpiece_init
 
   @doc """
-  Instantiate an empty WordPiece model
+  Instantiate an empty WordPiece model.
   """
   @spec empty() :: {:ok, Tokenizers.Model.t()}
   defdelegate empty(), to: Tokenizers.Native, as: :models_wordpiece_empty
 
   @doc """
-  Instantiate a WordPiece model from the given vocab file
+  Instantiate a WordPiece model from the given vocab file.
   """
-  @spec from_file(
-          vocab :: String.t(),
-          options :: options()
-        ) :: {:ok, Tokenizers.Model.t()}
-  defdelegate from_file(vocab, options \\ []),
+  @spec from_file(String.t(), options()) :: {:ok, Tokenizers.Model.t()}
+  defdelegate from_file(vocab_path, options \\ []),
     to: Tokenizers.Native,
     as: :models_wordpiece_from_file
 end
diff --git a/lib/tokenizers/native.ex b/lib/tokenizers/native.ex
index d87ef31..850dc41 100644
--- a/lib/tokenizers/native.ex
+++ b/lib/tokenizers/native.ex
@@ -1,4 +1,6 @@
 defmodule Tokenizers.Native do
+  @moduledoc false
+
   mix_config = Mix.Project.config()
   version = mix_config[:version]
   github_url = mix_config[:package][:links]["GitHub"]
diff --git a/lib/tokenizers/normalizer.ex b/lib/tokenizers/normalizer.ex
index 1b3691f..ffd603a 100644
--- a/lib/tokenizers/normalizer.ex
+++ b/lib/tokenizers/normalizer.ex
@@ -1,95 +1,105 @@
 defmodule Tokenizers.Normalizer do
   @moduledoc """
-  A Normalizer is in charge of pre-processing the input string
-  in order to normalize it as relevant for a given use case.
+  Normalizers and normalization functions.
 
-  Some common examples of normalization are the Unicode normalization algorithms
-  (NFD, NFKD, NFC & NFKC), lowercasing etc...
-  The specificity of tokenizers is that we keep track of the alignment while normalizing.
-  This is essential to allow mapping from the generated tokens back to the input text.
+  A normalizer is in charge of pre-processing the input string in
+  order to normalize it as relevant for the given use case.
 
-  The Normalizer is optional.
+  Some common examples of normalization are the Unicode normalization
+  algorithms (NFD, NFKD, NFC & NFKC) or lowercasing. The specificity
+  of tokenizers is that we keep track of the alignment while
+  normalizing. This is essential to allow mapping from the generated
+  tokens back to the input text.
   """
 
-  @type t() :: %__MODULE__{resource: reference()}
   defstruct [:resource]
 
+  @type t() :: %__MODULE__{resource: reference()}
+
   @doc """
-  Normalizes the input presented as string into new string
+  Normalizes the given text input.
   """
-  @spec normalize(normalizer :: t(), input :: String.t()) :: {:ok, String.t()}
+  @spec normalize(t(), String.t()) :: {:ok, String.t()}
   defdelegate normalize(normalizer, input), to: Tokenizers.Native, as: :normalizers_normalize
 
-  @typedoc """
-  Options for BERT normalizer initialisation. All values are optional.
-
-  * `:clean_text` (default `true`) - Whether to clean the text, by removing any control characters and replacing all whitespaces by the classic one.
-  * `:handle_chinese_chars` (default `true`) - Whether to handle chinese chars by putting spaces around them.
-  * `:strip_accents` - Whether to strip all accents. If this option is not specified, then it will be determined by the value for lowercase (as in the original Bert).
-  * `:lowercase` (default `true`) - Whether to lowercase.
-  """
-  @type bert_opts() :: [
-          clean_text: boolean(),
-          handle_chinese_chars: boolean(),
-          strip_accents: boolean(),
-          lowercase: boolean()
-        ]
   @doc """
-  Takes care of normalizing raw text before giving it to a Bert model. This includes cleaning the text, handling accents, chinese chars and lowercasing.
+  Takes care of normalizing raw text before giving it to a Bert model.
+
+  This includes cleaning the text, handling accents, chinese chars and
+  lowercasing.
+
+  ## Options
+
+    * `:clean_text` - whether to clean the text, by removing any
+      control characters and replacing all whitespaces by the classic
+      one. Defaults to `true`
+
+    * `:handle_chinese_chars` - whether to handle chinese chars by
+      putting spaces around them. Default `true`
+
+    * `:strip_accents` - whether to strip all accents. If this option
+      is not specified, then it will be determined by the value for
+      lowercase (as in the original Bert)
+
+    * `:lowercase` - whether to lowercase. Default `true`
+
   """
-  @spec bert_normalizer(opts :: bert_opts()) :: t()
+  @spec bert_normalizer(keyword()) :: t()
   defdelegate bert_normalizer(opts \\ []),
     to: Tokenizers.Native,
     as: :normalizers_bert_normalizer
 
   @doc """
-  NFD Unicode Normalizer,
+  Creates a NFD Unicode normalizer.
   """
   @spec nfd :: t()
   defdelegate nfd(), to: Tokenizers.Native, as: :normalizers_nfd
 
   @doc """
-  NFKD Unicode Normalizer
+  Creates a NFKD Unicode normalizer.
   """
   @spec nfkd :: t()
   defdelegate nfkd(), to: Tokenizers.Native, as: :normalizers_nfkd
 
   @doc """
-  NFC Unicode Normalizer
+  Creates a NFC Unicode normalizer.
   """
   @spec nfc :: t()
   defdelegate nfc(), to: Tokenizers.Native, as: :normalizers_nfc
 
   @doc """
-  NFKC Unicode Normalizer
+  Creates a NFKC Unicode normalizer.
   """
   @spec nfkc :: t()
   defdelegate nfkc(), to: Tokenizers.Native, as: :normalizers_nfkc
 
-  @typedoc """
-  Options for Strip normalizer initialisation. All values are optional.
-
-  * `:left` (default `true`) - Whether to strip left side.
-  * `:right` (default `true`) - Whether to strip right side.
-  """
-  @type strip_opts() :: [
-          left: boolean(),
-          right: boolean()
-        ]
   @doc """
-  Strip normalizer. Removes all whitespace characters on the specified sides (left, right or both) of the input
+  Creates a Strip normalizer.
+
+  Removes all whitespace characters on the specified sides (left,
+  right or both) of the input
+
+  ## Options
+
+    * `:left` - whether to strip left side. Defaults to `true`
+
+    * `:right` - whether to strip right side. Defaults to `true
+
   """
-  @spec strip(opts :: strip_opts()) :: t()
+  @spec strip(keyword()) :: t()
   defdelegate strip(opts \\ []), to: Tokenizers.Native, as: :normalizers_strip
 
   @doc """
-  Prepend normalizer.
+  Creates a Prepend normalizer.
   """
   @spec prepend(prepend :: String.t()) :: t()
   defdelegate prepend(prepend), to: Tokenizers.Native, as: :normalizers_prepend
 
   @doc """
-  Strip Accent normalizer. Removes all accent symbols in unicode (to be used with NFD for consistency).
+  Creates a Strip Accent normalizer.
+
+  Removes all accent symbols in unicode (to be used with NFD for
+  consistency).
   """
   @spec strip_accents :: t()
   defdelegate strip_accents(), to: Tokenizers.Native, as: :normalizers_strip_accents
@@ -97,7 +107,7 @@ defmodule Tokenizers.Normalizer do
   @doc """
   Composes multiple normalizers that will run in the provided order.
   """
-  @spec sequence(normalizers :: [t()]) :: t()
+  @spec sequence([t()]) :: t()
   defdelegate sequence(normalizers), to: Tokenizers.Native, as: :normalizers_sequence
 
   @doc """
@@ -107,24 +117,25 @@ defmodule Tokenizers.Normalizer do
   defdelegate lowercase(), to: Tokenizers.Native, as: :normalizers_lowercase
 
   @doc """
-  Replaces a custom string or regexp and changes it with given content
+  Replaces a custom string or regexp and changes it with given content.
   """
-  @spec replace(pattern :: String.t(), content :: String.t()) ::
-          t()
+  @spec replace(String.t(), String.t()) :: t()
   defdelegate replace(pattern, content),
     to: Tokenizers.Native,
     as: :normalizers_replace
 
   @doc """
-  Nmt normalizer
+  Creates a Nmt normalizer.
   """
   @spec nmt :: t()
   defdelegate nmt(), to: Tokenizers.Native, as: :normalizers_nmt
 
   @doc """
-  Precompiled normalizer. Don’t use manually it is used for compatiblity for SentencePiece.
+  Precompiled normalizer.
+
+  Don’t use manually it is used for compatibility with SentencePiece.
   """
-  @spec precompiled(data :: binary()) :: {:ok, t()} | {:error, any()}
+  @spec precompiled(binary()) :: {:ok, t()} | {:error, any()}
   defdelegate precompiled(data), to: Tokenizers.Native, as: :normalizers_precompiled
 end
 
diff --git a/lib/tokenizers/post_processor.ex b/lib/tokenizers/post_processor.ex
index df8280d..435018f 100644
--- a/lib/tokenizers/post_processor.ex
+++ b/lib/tokenizers/post_processor.ex
@@ -1,82 +1,58 @@
 defmodule Tokenizers.PostProcessor do
   @moduledoc """
-  After the whole pipeline, we sometimes want to insert some special tokens
-  before feed a tokenized string into a model like ”[CLS] My horse is amazing [SEP]”.
-  The PostProcessor is the component doing just that.
+  Post-processors.
+
+  After the whole pipeline, we sometimes want to insert some special
+  tokens before we feed the encoded text into a model like
+  ”[CLS] My horse is amazing [SEP]”, we can do that with a post-processor.
   """
 
-  @type t() :: %__MODULE__{resource: reference()}
   defstruct [:resource]
 
-  @doc """
-  Instantiate a new BertProcessing with the given tokens
+  @type t() :: %__MODULE__{resource: reference()}
 
-  Params are tuple with the string representation of the token, and its id
+  @doc """
+  Creates a Bert post-processor with the given tokens.
   """
-  @spec bert(
-          sep :: {String.t(), integer()},
-          cls :: {String.t(), integer()}
-        ) :: t()
+  @spec bert({String.t(), integer()}, {String.t(), integer()}) :: t()
   defdelegate bert(sep, cls), to: Tokenizers.Native, as: :post_processors_bert
 
-  @typedoc """
-  Options for Roberta post-processor. All values are optional.
+  @doc """
+  Creates a Roberta post-processor.
 
-  * `:trim_offest` (default `true`) - Whether to trim the whitespaces in the produced offsets
-  * `:add_prefix_space` (default `true`) - Whether add_prefix_space was ON during the pre-tokenization.
-  """
-  @type roberta_opts() :: [
-          trim_offsets: boolean(),
-          add_prefix_space: boolean()
-        ]
+  ## Options
 
-  @doc """
-  Creates Roberta post-processor.
-  """
-  @spec roberta(
-          sep :: {String.t(), integer()},
-          cls :: {String.t(), integer()},
-          opts :: roberta_opts()
-        ) :: t()
-  defdelegate roberta(sep, cls, opts \\ []), to: Tokenizers.Native, as: :post_processors_roberta
+    * `:trim_offest` - whether to trim the whitespaces in the produced
+      offsets. Defaults to `true`
 
-  @typedoc """
-  Options for ByteLevel post-processor. All values are optional.
+    * `:add_prefix_space` - whether add_prefix_space was ON during the
+      pre-tokenization. Defaults to `true`
 
-  * `:trim_offsets` (default `true`) - Whether to trim the whitespaces in the produced offsets
   """
-  @type byte_level_opts() :: [
-          trim_offsets: boolean()
-        ]
+  @spec roberta({String.t(), integer()}, {String.t(), integer()}, keyword()) :: t()
+  defdelegate roberta(sep, cls, opts \\ []), to: Tokenizers.Native, as: :post_processors_roberta
 
   @doc """
-  Creates ByteLevel post-processor.
-  """
-  @spec byte_level(opts :: byte_level_opts()) :: t()
-  defdelegate byte_level(opts \\ []), to: Tokenizers.Native, as: :post_processors_byte_level
+  Creates a ByteLevel post-processor.
 
-  @typedoc """
-  Options for Template post-processor.
+  ## Options
+
+    * `:trim_offsets` - whether to trim the whitespaces in the produced
+      offsets. Defaults to `true`
 
-  * `:single` - A string describing the template for a single sequence.
-  * `:pair` - A string describing the template for a pair of sequences.
-  * `:special_tokens` - A list of special tokens to use in the template.
   """
-  @type template_opts() :: [
-          single: String.t(),
-          pair: String.t(),
-          special_tokens: [{String.t(), integer()}]
-        ]
+  @spec byte_level(keyword()) :: t()
+  defdelegate byte_level(opts \\ []), to: Tokenizers.Native, as: :post_processors_byte_level
 
   @doc """
-  Creates Template post-processor.
+  Creates a Template post-processor.
 
-  Let’s you easily template the post processing, adding special tokens,
-  and specifying the type_id for each sequence/special token.
-  The template is given two strings representing the single sequence and the pair of sequences,
-  as well as a set of special tokens to use.
+  Let’s you easily template the post processing, adding special tokens
+  and specifying the type id for each sequence/special token. The
+  template is given two strings representing the single sequence and
+  the pair of sequences, as well as a set of special tokens to use.
 
-  Example, when specifying a template with these values:
+  For example, when specifying a template with these values:
 
   * single: `"[CLS] $A [SEP]"`
   * pair: `"[CLS] $A [SEP] $B [SEP]"`
@@ -86,8 +62,20 @@ defmodule Tokenizers.PostProcessor do
 
   > Input: `("I like this", "but not this")`
   > Output: `"[CLS] I like this [SEP] but not this [SEP]"`
+
+  ## Options
+
+    * `:single` - a string describing the template for a single
+      sequence
+
+    * `:pair` - a string describing the template for a pair of
+      sequences
+
+    * `:special_tokens` - a list of special tokens to use in the
+      template. Must be a list of `{token, token_id}` tuples
+
   """
-  @spec template(opts :: template_opts()) :: t()
+  @spec template(keyword()) :: t()
   defdelegate template(opts \\ []), to: Tokenizers.Native, as: :post_processors_template
 
   @doc """
diff --git a/lib/tokenizers/pre_tokenizer.ex b/lib/tokenizers/pre_tokenizer.ex
index 3b931d6..b9f4540 100644
--- a/lib/tokenizers/pre_tokenizer.ex
+++ b/lib/tokenizers/pre_tokenizer.ex
@@ -1,55 +1,63 @@
 defmodule Tokenizers.PreTokenizer do
   @moduledoc """
-  The `PreTokenizer` takes care of splitting the input according to a set of rules.
-  This pre-processing lets you ensure that the underlying `Model`
-  does not build tokens across multiple “splits”.
-  For example if you don’t want to have whitespaces inside a token,
-  then you can have a `PreTokenizer` that splits on these whitespaces.
-
-  You can easily combine multiple `PreTokenizer` together using a `Sequence` (see below).
-  The `PreTokenizer` is also allowed to modify the string, just like a `Normalizer` does.
-  This is necessary to allow some complicated algorithms
-  that require to split before normalizing (e.g. the ByteLevel)
+  Pre-tokenizers.
+
+  A pre-tokenizer takes care of splitting the input according to a set
+  of rules. This pre-processing lets you ensure that the underlying
+  model does not build tokens across multiple “splits”. For example
+  if you don’t want to have whitespaces inside a token, then you can
+  have a pre-tokenizer that splits on these whitespaces.
+
+  You can easily combine multiple pre-tokenizers together using
+  `sequence/1`.
+
+  A pre-tokenizer is also allowed to modify the string, just like a
+  normalizer does. This is necessary to allow some complicated
+  algorithms that require to split before normalizing (e.g. ByteLevel).
   """
 
-  @type t() :: %__MODULE__{resource: reference()}
   defstruct [:resource]
 
+  @type t() :: %__MODULE__{resource: reference()}
+
   @doc """
   Converts a string into a sequence of pre-tokens.
   """
-  @spec pre_tokenize(pre_tokenizer :: t(), sequence :: String.t()) ::
-          {:ok, [{String.t(), {integer(), integer()}}]}
-  defdelegate pre_tokenize(normalizer, input),
+  @spec pre_tokenize(t(), String.t()) :: {:ok, [{String.t(), {integer(), integer()}}]}
+  defdelegate pre_tokenize(pre_tokenizer, input),
     to: Tokenizers.Native,
     as: :pre_tokenizers_pre_tokenize
 
-  @typedoc """
-  Options for ByteLevel pre-tokenizer. All values are optional.
+  @doc """
+  Creates a ByteLevel pre-tokenizer.
 
-  * `:add_prefix_space` (default `true`) - Whether to add a space to the first word if there isn’t already one. This lets us treat hello exactly like say hello.
-  * `:use_regex` (default `true`) - Set this to False to prevent this pre_tokenizer from using the GPT2 specific regexp for spliting on whitespace.
-  """
-  @type byte_level_opts() :: [
-          add_prefix_space: boolean(),
-          use_regex: boolean()
-        ]
+  Splits on whitespaces while remapping all the bytes to a set of
+  visible characters. This technique has been introduced by OpenAI
+  with GPT-2 and has some more or less nice properties:
+
+    * Since it maps on bytes, a tokenizer using this only requires
+      256 characters as initial alphabet (the number of values a byte
+      can have), as opposed to the 130,000+ Unicode characters.
+
+    * A consequence of the previous point is that it is absolutely
+      unnecessary to have an unknown token using this since we can
+      represent anything with 256 tokens (Youhou!! 🎉🎉)
+
+    * For non ascii characters, it gets completely unreadable, but it
+      works nonetheless!
+
+  ## Options
+
+    * `:add_prefix_space` - whether to add a space to the first word
+      if there isn’t already one. This lets us treat hello exactly
+      like say hello. Defaults to `true`
+
+    * `:use_regex` - set this to `false` to prevent this pre-tokenizer
+      from using the GPT2 specific regexp for splitting on whitespace.
+      Defaults to `true`
 
-  @doc """
-  Creates ByteLevel PreTokenizer.
-
-  Splits on whitespaces while remapping all the bytes to a set of visible characters.
-  This technique as been introduced by OpenAI with GPT-2 and has some more or less nice properties:
-
-  * Since it maps on bytes, a tokenizer using this only requires 256 characters
-    as initial alphabet (the number of values a byte can have),
-    as opposed to the 130,000+ Unicode characters.
-  * A consequence of the previous point is that it is absolutely unnecessary
-    to have an unknown token using this since we can represent anything
-    with 256 tokens (Youhou!! 🎉🎉)
-  * For non ascii characters, it gets completely unreadable, but it works nonetheless!
   """
-  @spec byte_level(opts :: byte_level_opts()) :: t()
+  @spec byte_level(keyword()) :: t()
   defdelegate byte_level(opts \\ []), to: Tokenizers.Native, as: :pre_tokenizers_byte_level
 
   @doc """
@@ -61,67 +69,60 @@ defmodule Tokenizers.PreTokenizer do
     as: :pre_tokenizers_byte_level_alphabet
 
   @doc """
-  Creates Whitespace pre-tokenizer.
+  Creates a Whitespace pre-tokenizer.
 
-  Splits on word boundaries (using the following regular expression: `\w+|[^\w\s]+`
+  Splits on word boundaries. Uses the following regular expression:
+  `\w+|[^\w\s]+`.
   """
   @spec whitespace() :: t()
   defdelegate whitespace(), to: Tokenizers.Native, as: :pre_tokenizers_whitespace
 
   @doc """
-  Creates WhitespaceSplit pre-tokenizer.
+  Creates a WhitespaceSplit pre-tokenizer.
 
-  Splits on any whitespace character
+  Splits on any whitespace character.
   """
   @spec whitespace_split() :: t()
   defdelegate whitespace_split(), to: Tokenizers.Native, as: :pre_tokenizers_whitespace_split
 
   @doc """
-  Creates BertPreTokenizer pre-tokenizer.
+  Creates a BertPreTokenizer pre-tokenizer.
 
   Splits for use in Bert models.
   """
   @spec bert_pre_tokenizer() :: t()
   defdelegate bert_pre_tokenizer(), to: Tokenizers.Native, as: :pre_tokenizers_bert
 
-  @typedoc """
-  Options for Metaspace pre-tokenizer. All values are optional.
-
-  * `:replacement` (default `"▁"`) - The replacement character to use.
-  * `:add_prefix_space` (default `true`) - Whether to add a space to the first word if there isn’t already one. This lets us treat hello exactly like say hello.
-  """
-  @type metaspace_opts() :: [
-          replacement: char(),
-          add_prefix_space: boolean()
-        ]
-
   @doc """
   Creates Metaspace pre-tokenizer.
 
-  Splits on whitespaces and replaces them with a special char “▁” (U+2581)
+  Splits on whitespaces and replaces them with a special char “▁”
+  (U+2581).
+
+  ## Options
+
+    * `:replacement` - the replacement character to use. Defaults to `"▁"`
+
+    * `:add_prefix_space` - whether to add a space to the first word
+      if there isn’t already one. This lets us treat hello exactly
+      like say hello. Defaults to `true`
+
   """
-  @spec metaspace(opts :: metaspace_opts()) :: t()
+  @spec metaspace(keyword()) :: t()
   defdelegate metaspace(opts \\ []), to: Tokenizers.Native, as: :pre_tokenizers_metaspace
 
   @doc """
-  Creates CharDelimiterSplit pre-tokenizer.
+  Creates a CharDelimiterSplit pre-tokenizer.
 
-  This pre-tokenizer simply splits on the provided delimiter. Works almost like the `.split(delimiter)`
-  function, except that it accounts for multiple consecutive spaces
+  This pre-tokenizer simply splits on the provided delimiter. Works
+  almost like simple split function, except that it accounts for
+  multiple consecutive spaces.
   """
-
-  @spec char_delimiter_split(delimiter :: char()) :: t()
+  @spec char_delimiter_split(char()) :: t()
   defdelegate char_delimiter_split(delimiter),
     to: Tokenizers.Native,
     as: :pre_tokenizers_char_delimiter_split
 
-  @typedoc """
-  Options for Split pre-tokenizer. All values are optional.
-
-  * `:invert` (default `false`) - Whether to invert the split or not.
-  """
-  @type spit_opts() :: [invert: boolean()]
-
   @typedoc """
   Specifies how delimiter should behave for several pretokenizers.
   """
@@ -133,58 +134,50 @@ defmodule Tokenizers.PreTokenizer do
           | :contiguous
 
   @doc """
-  Creates Split pre-tokenizer.
+  Creates a Split pre-tokenizer.
+
+  Versatile pre-tokenizer that splits on provided pattern and according
+  to provided behavior. The pattern can be inverted if necessary.
 
-  Versatile pre-tokenizer that splits on provided pattern and according to provided behavior.
-  The pattern can be inverted if necessary.
+  ## Options
 
-  * pattern should be either a custom string or regexp.
-  * behavior should be one of:
+    * `:invert` - whether to invert the split or not. Defaults to `false`
 
-    * `:removed`
-    * `:isolated`
-    * `:merged_with_previous`
-    * `:merged_with_next`
-    * `:contiguous`
   """
-  @spec split(
-          pattern :: String.t(),
-          behavior :: split_delimiter_behaviour(),
-          opts :: spit_opts()
-        ) :: t()
+  @spec split(String.t(), split_delimiter_behaviour(), keyword()) :: t()
   defdelegate split(pattern, behavior, opts \\ []),
     to: Tokenizers.Native,
     as: :pre_tokenizers_split
 
   @doc """
-  Creates Punctuation pre-tokenizer.
+  Creates a Punctuation pre-tokenizer.
 
   Will isolate all punctuation characters.
   """
-  @spec punctuation(behavor :: split_delimiter_behaviour()) :: t()
-  defdelegate punctuation(behavor), to: Tokenizers.Native, as: :pre_tokenizers_punctuation
+  @spec punctuation(split_delimiter_behaviour()) :: t()
+  defdelegate punctuation(behaviour), to: Tokenizers.Native, as: :pre_tokenizers_punctuation
 
   @doc """
-  Creates Sequence pre-tokenizer.
+  Creates a Sequence pre-tokenizer.
 
-  Lets you compose multiple `PreTokenizer` that will be run in the given order
+  Lets you compose multiple pre-tokenizers that will be run in the
+  given order.
   """
-  @spec sequence(pre_tokenizers :: [t()]) :: t()
+  @spec sequence([t()]) :: t()
   defdelegate sequence(pre_tokenizers), to: Tokenizers.Native, as: :pre_tokenizers_sequence
 
-  @typedoc """
-  Options for Digits pre-tokenizer. All values are optional.
-
-  * `:individual_digits` (default `false`) - Whether to split individual digits or not.
-  """
-  @type digits_opts() :: [individual_digits: boolean()]
-
   @doc """
-  Creates Digits pre-tokenizer.
+  Creates a Digits pre-tokenizer.
 
   Splits the numbers from any other characters.
+
+  ## Options
+
+    * `:individual_digits` - whether to split individual digits or not.
+      Defaults to `false`
+
   """
-  @spec digits(opts :: digits_opts()) :: t()
+  @spec digits(keyword()) :: t()
   defdelegate digits(opts \\ []),
     to: Tokenizers.Native,
     as: :pre_tokenizers_digits
diff --git a/lib/tokenizers/shared.ex b/lib/tokenizers/shared.ex
index 1b5f2c2..2d54553 100644
--- a/lib/tokenizers/shared.ex
+++ b/lib/tokenizers/shared.ex
@@ -1,8 +1,6 @@
 defmodule Tokenizers.Shared do
   @moduledoc false
 
-  # __Private__ shared internal functions.
-
   def unwrap({:ok, value}), do: value
   def unwrap({:error, reason}), do: raise(reason)
 end
diff --git a/lib/tokenizers/tokenizer.ex b/lib/tokenizers/tokenizer.ex
index f6d215c..e0756cb 100644
--- a/lib/tokenizers/tokenizer.ex
+++ b/lib/tokenizers/tokenizer.ex
@@ -1,21 +1,22 @@
 defmodule Tokenizers.Tokenizer do
   @moduledoc """
-  The struct and associated functions for a tokenizer.
+  Functions to load, apply and train tokenizers.
 
-  A `Tokenizers.t()` is a container that holds the constituent parts of the tokenization pipeline.
+  The `t:Tokenizers.Tokenizer.t/0` struct represents the tokenization
+  pipeline. When you call `Tokenizers.Tokenizer.encode/3`, the input
+  text goes through the following steps:
 
-  When you call `Tokenizers.Tokenizer.encode/3`, the input text goes through the following pipeline:
+    * normalization
+    * pre-tokenization
+    * model
+    * post-processing
 
-  - normalization
-  - pre-tokenization
-  - model
-  - post-processing
-
-  This returns a `Tokenizers.Encoding.t()`, which can then give you the token ids for each token in the input text.
-  These token ids are usually used as the input for natural language processing machine learning models.
+  This pipeline returns a `t:Tokenizers.Encoding.t/0`, which can then
+  give you the token ids representing the input text. These token ids
+  are usually used as the input for natural language processing (NLP)
+  machine learning models.
   """
 
-  @type t :: %__MODULE__{resource: reference()}
   defstruct [:resource]
 
   alias Tokenizers.Model
@@ -25,6 +26,8 @@ defmodule Tokenizers.Tokenizer do
   alias Tokenizers.Normalizer
   alias Tokenizers.Decoder
 
+  @type t :: %__MODULE__{resource: reference()}
+
   @typedoc """
   An input being a subject to tokenization.
 
@@ -33,40 +36,40 @@ defmodule Tokenizers.Tokenizer do
   @type encode_input :: String.t() | {String.t(), String.t()}
 
   @doc """
-  Instantiate a new tokenizer from an existing models.
-  """
-  @spec init(model :: Model.t()) :: {:ok, t()} | {:error, any()}
-  defdelegate init(model), to: Tokenizers.Native, as: :tokenizer_init
-
-  @doc """
-  Instantiate a new tokenizer from an existing file on the Hugging Face Hub.
+  Loads a new tokenizer from a repository on Hugging Face Hub.
 
-  This is going to download a tokenizer file, save it to disk and load that file.
+  This is going to download a tokenizer file, save it to disk and load
+  that file.
 
   ## Options
 
-    * `:http_client` - A tuple with a module and options. This module should implement
-      the `request/1` function, accepting a keyword list with the options for a request.
-      This is inspired by `Req.request/1`: https://hexdocs.pm/req/Req.html#request/1
+    * `:http_client` - a tuple with a module and options. This module
+      should implement the `request/1` function, accepting a keyword
+      list with the options for a request. This is inspired by
+      `Req.request/1`: https://hexdocs.pm/req/Req.html#request/1
 
       The default HTTP client config is: `{Tokenizers.HTTPClient, []}`.
-      Since it's inspired by `Req`, it's possible to use that client without any adjustments.
+      Since it's inspired by `Req`, it's possible to use that client
+      without any adjustments.
 
-      When making request, the options `:url` and `:method` are going to be overridden.
-      `:headers` contains the "user-agent" set by default.
+      When making request, the options `:url` and `:method` are going
+      to be overridden. `:headers` contains the "user-agent" set by
+      default.
 
-    * `:revision` - The revision name that should be used for fetching the tokenizers
-      from Hugging Face.
+    * `:revision` - the revision name that should be used for fetching
+      the tokenizers from the Hugging Face repository
 
-    * `:use_cache` - Tells if it should read from cache when the file already exists.
-      Defaults to `true`.
+    * `:use_cache` - tells if it should read from cache when the file
+      already exists. Defaults to `true`
 
-    * `:cache_dir` - The directory where cache is saved. Files are written to cache
-      even if `:use_cache` is false. By default it uses `:filename.basedir/3` to get
-      a cache dir based in the "tokenizers_elixir" application name.
+    * `:cache_dir` - the directory where cache is saved. Files are
+      written to cache even if `:use_cache` is `false`. By default
+      it uses `:filename.basedir/3` to get a cache dir based in the
+      "tokenizers_elixir" application name
 
   """
   @spec from_pretrained(String.t(), Keyword.t()) :: {:ok, t()} | {:error, term()}
+  @doc type: :loading
   def from_pretrained(identifier, opts \\ []) do
     opts =
       Keyword.validate!(
@@ -173,288 +176,327 @@ defmodule Tokenizers.Tokenizer do
   @doc """
   Instantiate a new tokenizer from the file at the given path.
   """
+  @doc type: :loading
   @spec from_file(path :: String.t(), keyword()) :: {:ok, t()} | {:error, term()}
-  def from_file(path, options \\ []) do
-    if Keyword.has_key?(options, :additional_special_tokens) do
+  def from_file(path, opts \\ []) do
+    if Keyword.has_key?(opts, :additional_special_tokens) do
       IO.warn(
         "passing :additional_special_tokens as an option is deprecated. Use add_special_tokens/2 instead"
       )
     end
 
-    Tokenizers.Native.tokenizer_from_file(path, options)
+    Tokenizers.Native.tokenizer_from_file(path, opts)
   end
 
   @doc """
   Instantiate a new tokenizer from the buffer.
   """
+  @doc type: :loading
   @spec from_buffer(data :: String.t(), keyword()) :: {:ok, t()} | {:error, term()}
-  def from_buffer(data, options \\ []) do
-    if Keyword.has_key?(options, :additional_special_tokens) do
+  def from_buffer(data, opts \\ []) do
+    if Keyword.has_key?(opts, :additional_special_tokens) do
       IO.warn(
         "passing :additional_special_tokens as an option is deprecated. Use add_special_tokens/2 instead"
       )
     end
 
-    Tokenizers.Native.tokenizer_from_buffer(data, options)
+    Tokenizers.Native.tokenizer_from_buffer(data, opts)
   end
 
   @doc """
-  Save the tokenizer to the provided path. Options:
+  Save the tokenizer to the provided path.
+
+  ## Options
+
+    * `:pretty` - whether to pretty print the JSON file. Defaults to `true`
 
-  * `:pretty` - Whether to pretty print the JSON file. Defaults to `true`.
   """
-  @spec save(t(), pretty: boolean()) :: {:ok, String.t()} | {:error, term()}
-  defdelegate save(tokenizer, path, options \\ []), to: Tokenizers.Native, as: :tokenizer_save
+  @doc type: :loading
+  @spec save(t(), keyword()) :: {:ok, String.t()} | {:error, term()}
+  defdelegate save(tokenizer, path, opts \\ []), to: Tokenizers.Native, as: :tokenizer_save
 
-  ##############################################################################
-  # Setup
-  ##############################################################################
+  @doc """
+  Instantiate a new tokenizer from an existing model.
+  """
+  @doc type: :configuration
+  @spec init(Model.t()) :: {:ok, t()} | {:error, any()}
+  defdelegate init(model), to: Tokenizers.Native, as: :tokenizer_init
 
   @doc """
-  Get the `Tokenizer`'s `Model`.
+  Returns the model currently used by `tokenizer`.
   """
+  @doc type: :configuration
   @spec get_model(t()) :: Model.t()
   defdelegate get_model(tokenizer), to: Tokenizers.Native, as: :tokenizer_get_model
 
   @doc """
-  Set the `Tokenizer`'s `Model`.
+  Sets `tokenizer`'s model.
   """
+  @doc type: :configuration
   @spec set_model(t(), Model.t()) :: t()
   defdelegate set_model(tokenizer, model), to: Tokenizers.Native, as: :tokenizer_set_model
 
   @doc """
-  Get the `Tokenizer`'s `Normalizer`.
+  Returns the normalizer currently used by `tokenizer`.
   """
+  @doc type: :configuration
   @spec get_normalizer(t()) :: Normalizer.t() | nil
   defdelegate get_normalizer(tokenizer), to: Tokenizers.Native, as: :tokenizer_get_normalizer
 
   @doc """
-  Set the `Tokenizer`'s `Normalizer`.
+  Sets `tokenizer`'s normalizer.
   """
+  @doc type: :configuration
   @spec set_normalizer(t(), Normalizer.t()) :: t()
   defdelegate set_normalizer(tokenizer, normalizer),
     to: Tokenizers.Native,
     as: :tokenizer_set_normalizer
 
   @doc """
-  Get the `Tokenizer`'s `PreTokenizer`.
+  Returns the pre-tokenizer currently used by `tokenizer`.
   """
-  alias Tokenizers.PreTokenizer
+  @doc type: :configuration
   @spec get_pre_tokenizer(t()) :: PreTokenizer.t() | nil
   defdelegate get_pre_tokenizer(tokenizer),
     to: Tokenizers.Native,
     as: :tokenizer_get_pre_tokenizer
 
   @doc """
-  Set the `Tokenizer`'s `PreTokenizer`.
+  Sets `tokenizer`'s pre-tokenizer.
   """
+  @doc type: :configuration
   @spec set_pre_tokenizer(t(), PreTokenizer.t()) :: t()
   defdelegate set_pre_tokenizer(tokenizer, pre_tokenizer),
     to: Tokenizers.Native,
     as: :tokenizer_set_pre_tokenizer
 
   @doc """
-  Get the `Tokenizer`'s `PostProcessor`.
+  Returns the post-processor currently used by `tokenizer`.
   """
+  @doc type: :configuration
   @spec get_post_processor(t()) :: PostProcessor.t() | nil
   defdelegate get_post_processor(tokenizer),
     to: Tokenizers.Native,
     as: :tokenizer_get_post_processor
 
   @doc """
-  Set the `Tokenizer`'s `PostProcessor`.
+  Sets `tokenizer`'s post-processor.
   """
+  @doc type: :configuration
   @spec set_post_processor(t(), PostProcessor.t()) :: t()
   defdelegate set_post_processor(tokenizer, post_processor),
     to: Tokenizers.Native,
     as: :tokenizer_set_post_processor
 
   @doc """
-  Get the `Tokenizer`'s `Decoder`.
+  Returns the decoder currently used by `tokenizer`.
   """
+  @doc type: :configuration
   @spec get_decoder(t()) :: Decoder.t() | nil
   defdelegate get_decoder(tokenizer), to: Tokenizers.Native, as: :tokenizer_get_decoder
 
   @doc """
-  Set the `Tokenizer`'s `Decoder`.
+  Sets `tokenizer`'s decoder.
   """
+  @doc type: :configuration
   @spec set_decoder(t(), Decoder.t()) :: t()
   defdelegate set_decoder(tokenizer, decoder), to: Tokenizers.Native, as: :tokenizer_set_decoder
 
   @doc """
   Get the tokenizer's vocabulary as a map of token to id.
+
+  ## Options
+
+    * `:with_added_tokens` - whether to include the tokens explicitly
+      added to the tokenizer. Defaults to `true`
+
   """
-  @spec get_vocab(tokenizer :: t(), with_additional_tokens :: boolean()) :: %{
-          String.t() => integer()
-        }
-  defdelegate get_vocab(tokenizer, with_additional_tokens \\ true),
-    to: Tokenizers.Native,
-    as: :tokenizer_get_vocab
+  @spec get_vocab(t(), keyword()) :: %{String.t() => integer()}
+  @doc type: :configuration
+  def get_vocab(tokenizer, opts \\ []) do
+    opts = Keyword.validate!(opts, with_added_tokens: true)
+    Tokenizers.Native.tokenizer_get_vocab(tokenizer, opts[:with_added_tokens])
+  end
 
   @doc """
   Get the number of tokens in the vocabulary.
+
+  ## Options
+
+    * `:with_added_tokens` - whether to include the tokens explicitly
+      added to the tokenizer. Defaults to `true`
+
   """
-  @spec get_vocab_size(tokenizer :: t(), with_additional_tokens :: boolean()) ::
-          non_neg_integer()
-  defdelegate get_vocab_size(tokenizer, with_additional_tokens \\ true),
-    to: Tokenizers.Native,
-    as: :tokenizer_get_vocab_size
+  @spec get_vocab_size(t(), keyword()) :: non_neg_integer()
+  @doc type: :configuration
+  def get_vocab_size(tokenizer, opts \\ []) do
+    opts = Keyword.validate!(opts, with_added_tokens: true)
+    Tokenizers.Native.tokenizer_get_vocab_size(tokenizer, opts[:with_added_tokens])
+  end
 
   @doc """
-  Adds tokens to the vocabulary.
-  These tokens **are not special**. To add special tokens - use `add_special_tokens/2`.
+  Adds tokens to `tokenizer`'s vocabulary.
+
+  These tokens **are not special**. To add special tokens use
+  `add_special_tokens/2`.
   """
+  @doc type: :configuration
   @spec add_tokens(tokenizer :: t(), tokens :: [String.t()]) :: non_neg_integer()
   defdelegate add_tokens(tokenizer, tokens),
     to: Tokenizers.Native,
     as: :tokenizer_add_tokens
 
   @doc """
-  Adds special tokens to the vocabulary.
-  These tokens **are special**. To add regular tokens - use `add_tokens/2`.
+  Adds special tokens to `tokenizer`'s vocabulary.
+
+  These tokens **are special**. To add regular tokens use `add_tokens/2`.
   """
+  @doc type: :configuration
   @spec add_special_tokens(tokenizer :: t(), tokens :: [String.t()]) :: non_neg_integer()
   defdelegate add_special_tokens(tokenizer, tokens),
     to: Tokenizers.Native,
     as: :tokenizer_add_special_tokens
 
-  @typedoc """
-  Truncation options. All options can be ommited.
-
-  * `:max_length` (default: `512`) - the maximum length to truncate the model's input to.
-  * `:stride` (default: `0`) - the stride to use when overflowing the model's input.
-  * `:strategy` (default: `:longest_first) - the strategy to use when overflowing the model's input.
-  * `:direction` (default: `:right`) - the direction to use when overflowing the model's input.
-  """
-  @type truncation_options() :: [
-          max_length: non_neg_integer(),
-          stride: non_neg_integer(),
-          strategy: :longest_first | :only_first | :only_second,
-          direction: :left | :right
-        ]
-
   @doc """
-  Set truncation for the tokenizer.
+  Configures `tokenizer` with truncation.
+
+  To disable truncation use `disable_truncation/1`.
+
+  ## Options
+
+    * `:max_length` (default: `512`) - the maximum length to truncate
+      the model's input to
+
+    * `:stride` (default: `0`) - the stride to use when overflowing
+      the model's input
+
+    * `:strategy` (default: `:longest_first`) - the strategy to use
+      when overflowing the model's input
+
+    * `:direction` (default: `:right`) - the direction to use when
+      overflowing the model's input
+
   """
-  @spec set_truncation(
-          tokenizer :: t(),
-          opts :: truncation_options()
-        ) :: t()
+  @doc type: :configuration
+  @spec set_truncation(t(), opts) :: t()
+        when opts: [
+               max_length: non_neg_integer(),
+               stride: non_neg_integer(),
+               strategy: :longest_first | :only_first | :only_second,
+               direction: :left | :right
+             ]
   defdelegate set_truncation(tokenizer, opts \\ []),
     to: Tokenizers.Native,
     as: :tokenizer_set_truncation
 
   @doc """
-  Disable truncation for the tokenizer.
+  Disable truncation on `tokenizer`.
   """
-  @spec disable_truncation(tokenizer :: t()) :: t()
+  @doc type: :configuration
+  @spec disable_truncation(t()) :: t()
   defdelegate disable_truncation(tokenizer),
     to: Tokenizers.Native,
     as: :tokenizer_disable_truncation
 
-  @typedoc """
-  Padding options. All options can be ommited.
-
-  * `:strategy` (default: `:batch_longest`) - the strategy to use when padding.
-  * `:direction` (default: `:right`) - the direction to use when padding.
-  * `:pad_to_multiple_of` (default: `0`) - the multiple to pad to.
-  * `:pad_id` (default: `0`) - the id of the token to use for padding.
-  * `:pad_type_id` (default: `0`) - the id of the token type to use for padding.
-  * `:pad_token` (default: `"<pad>"`) - the token to use for padding.
-  """
-  @type padding_options() :: [
-          strategy: :batch_longest | {:fixed, non_neg_integer()},
-          direction: :left | :right,
-          pad_to_multiple_of: non_neg_integer(),
-          pad_id: non_neg_integer(),
-          pad_type_id: non_neg_integer(),
-          pad_token: String.t()
-        ]
-
   @doc """
-  Set padding for the tokenizer.
+  Configures `tokenizer` with padding.
+
+  To disable padding use `disable_padding/1`.
+
+  ## Options
+
+    * `:strategy` (default: `:batch_longest`) - the strategy to use
+      when padding
+
+    * `:direction` (default: `:right`) - the direction to use when
+      padding
+
+    * `:pad_to_multiple_of` (default: `0`) - the multiple to pad to
+
+    * `:pad_id` (default: `0`) - the id of the token to use for padding
+
+    * `:pad_type_id` (default: `0`) - the id of the token type to use
+      for padding
+
+    * `:pad_token` (default: `"[PAD]"`) - the token to use for padding
+
   """
-  @spec set_padding(tokenizer :: t(), opts :: padding_options()) :: t()
+  @doc type: :configuration
+  @spec set_padding(tokenizer :: t(), opts) :: t()
+        when opts: [
+               strategy: :batch_longest | {:fixed, non_neg_integer()},
+               direction: :left | :right,
+               pad_to_multiple_of: non_neg_integer(),
+               pad_id: non_neg_integer(),
+               pad_type_id: non_neg_integer(),
+               pad_token: String.t()
+             ]
   defdelegate set_padding(tokenizer, opts),
     to: Tokenizers.Native,
     as: :tokenizer_set_padding
 
   @doc """
-  Disable padding for the tokenizer.
+  Disable padding on `tokenizer`.
   """
+  @doc type: :configuration
   @spec disable_padding(tokenizer :: t()) :: t()
   defdelegate disable_padding(tokenizer),
     to: Tokenizers.Native,
     as: :tokenizer_disable_padding
 
-  ##############################################################################
-  # Infering
-  ##############################################################################
-
   @doc """
   Encode the given sequence to a `Tokenizers.Encoding.t()`.
 
-  Options:
-  * `:add_special_tokens` (default: `true`) - whether to add special tokens to the sequence.
+  ## Options
+
+    * `:add_special_tokens` - whether to add special tokens to the
+      sequence. Defaults to `true`
+
   """
-  @spec encode(
-          tokenizer :: t(),
-          input :: encode_input(),
-          options :: [add_special_tokens: boolean()]
-        ) ::
-          {:ok, Encoding.t()} | {:error, term()}
-  defdelegate encode(tokenizer, input, options \\ []),
+  @doc type: :inference
+  @spec encode(t(), encode_input(), keyword()) :: {:ok, Encoding.t()} | {:error, term()}
+  defdelegate encode(tokenizer, input, opts \\ []),
     to: Tokenizers.Native,
     as: :tokenizer_encode
 
   @doc """
-  Encode the given batch of sequences to a `Tokenizers.Encoding.t()`.
-
-  For options check `encode/3`.
+  Batched version of `encode/3`.
   """
-  @spec encode_batch(
-          tokenizer :: t(),
-          input :: [encode_input()],
-          options :: [add_special_tokens: boolean()]
-        ) ::
-          {:ok, [Encoding.t()]} | {:error, term()}
-  defdelegate encode_batch(tokenizer, input, options \\ []),
+  @doc type: :inference
+  @spec encode_batch(t(), [encode_input()], keyword()) :: {:ok, [Encoding.t()]} | {:error, term()}
+  defdelegate encode_batch(tokenizer, input, opts \\ []),
     to: Tokenizers.Native,
     as: :tokenizer_encode_batch
 
   @doc """
   Decodes the given list of ids back to a string.
 
-  Options:
+  ## Options
+
+    * `:skip_special_tokens` - whether to exclude special tokens from
+      the decoded string. Defaults to `true`
 
-  * `:skip_special_tokens` (default: `true`) - whether to remove special tokens from the decoded string.
   """
-  @spec decode(
-          tokenizer :: t(),
-          ids :: [non_neg_integer()],
-          options :: [skip_special_tokens: boolean()]
-        ) ::
-          {:ok, String.t()} | {:error, term()}
-  defdelegate decode(tokenizer, ids, options \\ []),
+  @doc type: :inference
+  @spec decode(t(), [non_neg_integer()], keyword()) :: {:ok, String.t()} | {:error, term()}
+  defdelegate decode(tokenizer, ids, opts \\ []),
     to: Tokenizers.Native,
     as: :tokenizer_decode
 
   @doc """
-  Decode the given list of ids or list of lists of ids back to strings.
+  Batched version of `decode/3`.
   """
-  @spec decode_batch(
-          tokenizer :: t(),
-          sentences :: [[non_neg_integer()]],
-          options :: [skip_special_tokens: boolean()]
-        ) ::
+  @doc type: :inference
+  @spec decode_batch(t(), [[non_neg_integer()]], keyword()) ::
           {:ok, [String.t()]} | {:error, term()}
-  defdelegate decode_batch(tokenizer, sentences, options \\ []),
+  defdelegate decode_batch(tokenizer, sentences, opts \\ []),
     to: Tokenizers.Native,
     as: :tokenizer_decode_batch
 
   @doc """
   Convert a given id to its token.
   """
+  @doc type: :inference
   @spec id_to_token(t(), integer()) :: String.t() | nil
   defdelegate id_to_token(tokenizer, id),
     to: Tokenizers.Native,
@@ -463,27 +505,27 @@ defmodule Tokenizers.Tokenizer do
   @doc """
   Convert a given token to its id.
   """
+  @doc type: :inference
   @spec token_to_id(t(), String.t()) :: non_neg_integer() | nil
   defdelegate token_to_id(tokenizer, token),
     to: Tokenizers.Native,
     as: :tokenizer_token_to_id
 
-  ##############################################################################
-  # Training
-  ##############################################################################
-
   @doc """
   Train the tokenizer on the given files.
+
+  ## Options
+
+    * `:trainer` - the trainer to use. Defaults to the default trainer
+      corresponding to `tokenizers`'s model
+
   """
-  @spec train_from_files(
-          tokenizer :: t(),
-          files :: [String.t()],
-          trainer :: Tokenizers.Trainer.t() | nil
-        ) ::
-          {:ok, t()} | {:error, term()}
-  defdelegate train_from_files(tokenizer, files, trainer \\ nil),
-    to: Tokenizers.Native,
-    as: :tokenizer_train_from_files
+  @doc type: :training
+  @spec train_from_files(t(), [String.t()], keyword()) :: {:ok, t()} | {:error, term()}
+  def train_from_files(tokenizer, paths, opts \\ []) do
+    opts = Keyword.validate!(opts, trainer: nil)
+    Tokenizers.Native.tokenizer_train_from_files(tokenizer, paths, opts[:trainer])
+  end
 end
 
 defimpl Inspect, for: Tokenizers.Tokenizer do
diff --git a/mix.exs b/mix.exs
index aa70d50..fb2099a 100644
--- a/mix.exs
+++ b/mix.exs
@@ -41,7 +41,38 @@ defmodule Tokenizers.MixProject do
       main: "Tokenizers",
       source_ref: "v#{@version}",
       source_url: @source_url,
-      extras: ["LICENSE", "notebooks/quicktour.livemd", "notebooks/pretrained.livemd"]
+      extras: ["notebooks/pretrained.livemd", "notebooks/training.livemd", "LICENSE"],
+      groups_for_modules: [
+        Tokenization: [
+          Tokenizers.Tokenizer,
+          Tokenizers.Encoding,
+          Tokenizers.Decoder
+        ],
+        Pipeline: [
+          Tokenizers.Normalizer,
+          Tokenizers.PreTokenizer,
+          Tokenizers.PostProcessor
+        ],
+        Training: [
+          Tokenizers.Model,
+          Tokenizers.Model.BPE,
+          Tokenizers.Model.Unigram,
+          Tokenizers.Model.WordLevel,
+          Tokenizers.Model.WordPiece,
+          Tokenizers.Trainer,
+          Tokenizers.AddedToken
+        ],
+        Other: [
+          Tokenizers.HTTPClient
+        ]
+      ],
+      groups_for_functions: [
+        # Tokenizers.Tokenizer
+        Loading: &(&1[:type] == :loading),
+        Inference: &(&1[:type] == :inference),
+        Configuration: &(&1[:type] == :configuration),
+        Training: &(&1[:type] == :training)
+      ]
     ]
   end
 
diff --git a/native/ex_tokenizers/src/tokenizer.rs b/native/ex_tokenizers/src/tokenizer.rs
index c0b133a..f214919 100644
--- a/native/ex_tokenizers/src/tokenizer.rs
+++ b/native/ex_tokenizers/src/tokenizer.rs
@@ -591,7 +591,7 @@ pub fn tokenizer_train_from_files(
         new_tokenizer.with_model(new_model);
         match trainer {
             Some(trainer) => {
-                // TODO: call clone on trainer wrapper once available,
+                // TODO: call clone on trainer wrapper once available (tokenizers > 0.13.3)
                 // see https://github.com/huggingface/tokenizers/pull/1317
                 let trainer = match trainer.resource.0.read().unwrap().deref() {
                     TrainerWrapper::BpeTrainer(trainer) => {
diff --git a/notebooks/pretrained.livemd b/notebooks/pretrained.livemd
index a050a0a..f275edd 100644
--- a/notebooks/pretrained.livemd
+++ b/notebooks/pretrained.livemd
@@ -1,4 +1,4 @@
-# Pretrained Tokenizers
+# Pretrained tokenizers
 
 ```elixir
 Mix.install([
diff --git a/notebooks/quicktour.livemd b/notebooks/training.livemd
similarity index 98%
rename from notebooks/quicktour.livemd
rename to notebooks/training.livemd
index dc5059d..7cd7208 100644
--- a/notebooks/quicktour.livemd
+++ b/notebooks/training.livemd
@@ -1,4 +1,4 @@
-# Quicktour
+# Training custom tokenizer
 
 ```elixir
 Mix.install([
@@ -88,7 +88,7 @@ Now, we can just call the `Tokenizer.train_from_files/3` function with the list
     "wikitext-103-raw/wiki.valid.raw"
   ]
   |> Enum.map(&Path.join(__DIR__, &1))
-  |> then(&Tokenizer.train_from_files(tokenizer, &1, trainer))
+  |> then(&Tokenizer.train_from_files(tokenizer, &1, trainer: trainer))
 ```
 
 This should only take a few seconds to train our tokenizer on the full wikitext dataset! To save the tokenizer in one file that contains all its configuration and vocabulary, just use the `Tokenizer.save/2` function: