From 8ea49e9403b141bd10fd8faa387749ea210b1d4a Mon Sep 17 00:00:00 2001 From: Grant Smith <57376089+ggsmith842@users.noreply.github.com> Date: Wed, 26 Jun 2024 15:14:42 -0600 Subject: [PATCH 1/7] Cookbook Extract Links from HTML --- .../cookbook/extract-links-from-html/00-Re.ml | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 data/cookbook/extract-links-from-html/00-Re.ml diff --git a/data/cookbook/extract-links-from-html/00-Re.ml b/data/cookbook/extract-links-from-html/00-Re.ml new file mode 100644 index 0000000000..75f2b4534c --- /dev/null +++ b/data/cookbook/extract-links-from-html/00-Re.ml @@ -0,0 +1,68 @@ +--- +packages: + - name: "re" + tested_version: "1.11.0" + used_libraries: + - re +--- + +(* + +Given an HTML document or string we can use the `re` library to create a regular expression that finds the href tags containing web links. +For example in the sample below we would expect to find three links for this HTML document. + +Sample HTML: + + + +
+ +