diff --git a/articles/tech-dm-class.html b/articles/tech-dm-class.html index bbec10891..b488c2714 100644 --- a/articles/tech-dm-class.html +++ b/articles/tech-dm-class.html @@ -214,7 +214,7 @@
src
in
flights_dm <- dm_from_con(sqlite_con)
flights_dm
#> ── Table source ───────────────────────────────────────────────────────────
-#> src: sqlite 3.44.2 [/tmp/RtmpqSPcvW/nycflights13.sqlite]
+#> src: sqlite 3.44.2 [/tmp/RtmpFjvJH1/nycflights13.sqlite]
#> ── Metadata ───────────────────────────────────────────────────────────────
#> Tables: `airlines`, `airports`, `flights`, `planes`, `sqlite_stat1`, … (7 total)
#> Columns: 62
@@ -263,7 +263,7 @@ Access tables
flights_dm[["airports"]]
#> # Source: table<airports> [?? x 8]
-#> # Database: sqlite 3.44.2 [/tmp/RtmpqSPcvW/nycflights13.sqlite]
+#> # Database: sqlite 3.44.2 [/tmp/RtmpFjvJH1/nycflights13.sqlite]
#> faa name lat lon alt tz dst tzone
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 04G Lansdowne Airport 41.1 -80.6 1044 -5 A Amer…
@@ -301,7 +301,7 @@ Primary keys of dm
objectsflights_dm_with_key <- dm_add_pk(flights_dm, airports, faa)
flights_dm_with_key
#> ── Table source ───────────────────────────────────────────────────────────
-#> src: sqlite 3.44.2 [/tmp/RtmpqSPcvW/nycflights13.sqlite]
+#> src: sqlite 3.44.2 [/tmp/RtmpFjvJH1/nycflights13.sqlite]
#> ── Metadata ───────────────────────────────────────────────────────────────
#> Tables: `airlines`, `airports`, `flights`, `planes`, `sqlite_stat1`, … (7 total)
#> Columns: 62
@@ -383,7 +383,7 @@ Foreign keys
flights_dm_with_key %>% dm_add_fk(flights, origin, airports)
#> ── Table source ───────────────────────────────────────────────────────────
-#> src: sqlite 3.44.2 [/tmp/RtmpqSPcvW/nycflights13.sqlite]
+#> src: sqlite 3.44.2 [/tmp/RtmpFjvJH1/nycflights13.sqlite]
#> ── Metadata ───────────────────────────────────────────────────────────────
#> Tables: `airlines`, `airports`, `flights`, `planes`, `sqlite_stat1`, … (7 total)
#> Columns: 62
diff --git a/news/index.html b/news/index.html
index aecb7fe7f..b64d89980 100644
--- a/news/index.html
+++ b/news/index.html
@@ -78,22 +78,8 @@
dm 1.0.10
Chore
-- Establish compatibility with igraph >= 2.0.0 (#2187).
-- Unblock every three hours.
-- Fix withr 3.0.0 compatibility (#2184).
-- Use dev odbc to fix SQL Server problem (#2178).
-- Configure Aviator to avoid setting the blocked label.
-- Snapshot updates for rcc-smoke (#2182).
-- Reexport
tibble::glimpse()
instead of pillar::glimpse()
to avoid pillar dependency with roxygen2 7.3.0 (#2179).
-
-
-
-Uncategorized
-- Merge branch ‘cran-1.0.9’.
+Establish compatibility with igraph >= 2.0.0 (#2187) and withr 3.0.0(#2184).
+Reexport tibble::glimpse()
instead of pillar::glimpse()
to avoid pillar dependency with roxygen2 7.3.0 (#2179).
diff --git a/pkgdown.yml b/pkgdown.yml
index 12cfbad26..d7c77cb69 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -22,7 +22,7 @@ articles:
tech-dm-low-level: tech-dm-low-level.html
tech-dm-naming: tech-dm-naming.html
tech-dm-zoom: tech-dm-zoom.html
-last_built: 2024-01-21T10:55Z
+last_built: 2024-01-21T11:43Z
urls:
reference: https://dm.cynkra.com/reference
article: https://dm.cynkra.com/articles
diff --git a/search.json b/search.json
index 8841c64b8..40b0a4092 100644
--- a/search.json
+++ b/search.json
@@ -1 +1 @@
-[{"path":"https://dm.cynkra.com/CODE_OF_CONDUCT.html","id":null,"dir":"","previous_headings":"","what":"Contributor Code of Conduct","title":"Contributor Code of Conduct","text":"contributors maintainers project, pledge respect people contribute reporting issues, posting feature requests, updating documentation, submitting pull requests patches, activities. committed making participation project harassment-free experience everyone, regardless level experience, gender, gender identity expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion. Examples unacceptable behavior participants include use sexual language imagery, derogatory comments personal attacks, trolling, public private harassment, insults, unprofessional conduct. Project maintainers right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct. Project maintainers follow Code Conduct may removed project team. Instances abusive, harassing, otherwise unacceptable behavior may reported opening issue contacting one project maintainers. Code Conduct adapted Contributor Covenant (https://www.contributor-covenant.org), version 1.0.0, available https://contributor-covenant.org/version/1/0/0/.","code":""},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to dm","title":"Contributing to dm","text":"outlines propose change dm set local development environment test databases.","code":""},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to dm","text":"can fix typos, spelling mistakes, grammatical errors documentation directly using GitHub web interface, long changes made source file. generally means ’ll need edit roxygen2 comments .R, .Rd file. can find .R file generates .Rd reading comment first line.","code":""},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"bigger-changes","dir":"","previous_headings":"","what":"Bigger changes","title":"Contributing to dm","text":"want make bigger change, ’s good idea first file issue make sure someone team agrees ’s needed. ’ve found bug, please file issue illustrates bug minimal reprex (also help write unit test, needed).","code":""},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"Bigger changes","what":"Pull request process","title":"Contributing to dm","text":"Fork package clone onto computer. haven’t done , recommend using usethis::create_from_github(\"cynkra/dm\", fork = TRUE). Install development dependencies devtools::install_dev_deps(), make sure package passes R CMD check running devtools::check(). R CMD check doesn’t pass cleanly, ’s good idea ask help continuing. Create Git branch pull request (PR). recommend using usethis::pr_init(\"brief-description--change\"). Make changes, commit git, create PR running usethis::pr_push(), following prompts browser. title PR briefly describe change. body PR contain Fixes #issue-number. user-facing changes, add bullet top NEWS.md (.e. just first header). Follow style described https://style.tidyverse.org/news.html.","code":""},{"path":[]},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"general-remarks","dir":"","previous_headings":"Code style & design","what":"General remarks","title":"Contributing to dm","text":"New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use specific branch downlit knitting README.","code":""},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"testing","dir":"","previous_headings":"Code style & design","what":"Testing","title":"Contributing to dm","text":"use {testthat}. exported functions tested. R scripts “tests/testthat/”” whose name start “helper” loaded devtools::load_all() available tests interactive debugging, can’t found :::. “helper-src.R” implements complex mechanism dm creating functions, dm_for_filter(), create remote dm different database management system depending context. allows tests use run different setups github actions. Additionally my_db_test_src() return relevant database. order test databases locally (typically debug CI tests fails can’t debug online log) can set environ variable “DM_TEST_SRC” “postgres”, “mariadb”, “mssql”, “duckdb” “sqlite”. might setup credentials “helper-config-db.R” . useful expectations can found “helper-expectations.R”. “helper-skip.R” helpers skip tests contexts, might useful instance feature supported databases. using expect_snapshot() DBMS dependent call (.e call uses dm_for_filter() copies my_db_test_src()), variant argument set my_test_src_name (global variable created loading helpers) snapshots end different directories.","code":""},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"function-naming","dir":"","previous_headings":"Code style & design","what":"Function naming","title":"Contributing to dm","text":"See vignette function naming logic.","code":""},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"error-messages","dir":"","previous_headings":"Code style & design","what":"Error messages","title":"Contributing to dm","text":"strive standardise error messages {dm}. failure triggered abort() function defined “error-helpers.R” error class defined dm_error_full() error message created separate function. Please follow pattern used \"error-helpers.R\". Exceptions might exist mostly waiting harmonised.","code":""},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"test-databases","dir":"","previous_headings":"","what":"Test databases","title":"Contributing to dm","text":"repository comes docker-compose.yml file sets databases required testing. shell commands expected run top-level directory clone repository.","code":""},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"macos-only-install-colima-to-run-docker-containers","dir":"","previous_headings":"Test databases","what":"macOS only: Install colima to run Docker containers","title":"Contributing to dm","text":"Take note address output, used later. See also https://docs.google.com/document/d/1axInaYK6oK6riRio72uTAeQazuork1X0clY9UL9gYoE/edit?usp=sharing details colima.","code":"brew install colima docker-compose colima start -c 4 -m 4 --vm-type vz --vz-rosetta --network-address colima status # INFO[0000] colima is running using macOS Virtualization.Framework # INFO[0000] arch: aarch64 # INFO[0000] runtime: docker # INFO[0000] mountType: virtiofs # INFO[0000] address: 192.168.64.2 # INFO[0000] socket: unix:///Users/kirill/.colima/default/docker.sock"},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"mssql-odbc-drivers","dir":"","previous_headings":"Test databases","what":"mssql: ODBC drivers","title":"Contributing to dm","text":"Linux: https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing--microsoft-odbc-driver--sql-server?view=sql-server-ver16&tabs=ubuntu18-install%2Calpine17-install%2Cdebian8-install%2Credhat7-13-install%2Crhel7-offline macOS: https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/install-microsoft-odbc-driver-sql-server-macos?view=sql-server-ver16","code":""},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"start-new-database-containers","dir":"","previous_headings":"Test databases","what":"Start new database containers","title":"Contributing to dm","text":"","code":"make db-start # May take several minutes to pull the images"},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"start-database-containers-without-forcing-recreation","dir":"","previous_headings":"Test databases","what":"Start database containers without forcing recreation","title":"Contributing to dm","text":"","code":"make db-restart # May take several minutes to pull the images"},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"connectivity-test","dir":"","previous_headings":"Test databases","what":"Connectivity test","title":"Contributing to dm","text":"macOS: Linux: Controlled environment variables: DM_TEST_DOCKER_HOST: 127.0.0.1 localhost Linux, see output colima status macOS (Linux, using localhost instead 127.0.0.1 may cause problems MariaDB.) See also Makefile. recommended adapt .Renviron connectivity established. subsequent instructions omit setting environment variables explicitly.","code":"DM_TEST_DOCKER_HOST=192.168.64.2 make connect DM_TEST_DOCKER_HOST=127.0.0.1 make connect"},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"test-against-a-specific-database-backend","dir":"","previous_headings":"Test databases","what":"Test against a specific database backend","title":"Contributing to dm","text":"","code":"make test-postgres"},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"test-against-all-backends","dir":"","previous_headings":"Test databases","what":"Test against all backends","title":"Contributing to dm","text":"","code":"make -j1 test"},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"test-on-docker","dir":"","previous_headings":"Test databases","what":"Test on Docker","title":"Contributing to dm","text":"","code":"# make docker-build # not necessary, image available on ghcr.io make docker-test"},{"path":"https://dm.cynkra.com/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to dm","text":"Please note dm project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://dm.cynkra.com/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2022 cynkra GmbH. Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://dm.cynkra.com/TODO.html","id":null,"dir":"","previous_headings":"","what":"FIXME NEXT","title":"FIXME NEXT","text":"un-skip tests","code":""},{"path":"https://dm.cynkra.com/TODO.html","id":"later","dir":"","previous_headings":"","what":"Later","title":"FIXME NEXT","text":"Store keys objects dm, much easier debugging Add “strict mode” GitHub Actions: validation dm_from_def() Named PK unique constraints: https://github.com/r-dbi/DBI/pull/351#issuecomment-833438890 dm_paste(): remove select argument documentation, via _impl() function takes dots argument, like dm_rm_pk() Use copy_to(temporary = FALSE) sqlite duckdb: use file can discarded Requires schema support databases","code":""},{"path":[]},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"from-database-dm_from_con","dir":"Articles","previous_headings":"Create dm…","what":"from database: dm_from_con()","title":"Relational data with dm: Cheat sheet","text":"","code":"con <- DBI::dbConnect(...) dm_from_con(con)"},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"from-data-frames-dmdf1-df2----","dir":"Articles","previous_headings":"Create dm…","what":"from data frames: dm(df1, df2, ...)","title":"Relational data with dm: Cheat sheet","text":"","code":"dm(df1, df2, df3)"},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"from-dm-dmdm1-df1---","dir":"Articles","previous_headings":"Create dm…","what":"from dm: dm(dm1, df1...)","title":"Relational data with dm: Cheat sheet","text":"","code":"dm(dm1, df1) dm(dm1, dm2)"},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"add-keys-dm_add_pk-dm_add_fk","dir":"Articles","previous_headings":"Create dm…","what":"Add keys: dm_add_pk(), dm_add_fk()","title":"Relational data with dm: Cheat sheet","text":"Automatic MariaDB, SQL Server, Postgres, others.","code":""},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"primary-keys","dir":"Articles","previous_headings":"Create dm… > Add keys: dm_add_pk(), dm_add_fk()","what":"Primary keys","title":"Relational data with dm: Cheat sheet","text":"Identify potential primary keys: dm_enum_pk_candidates(): columns, candidate, . Add primary keys: Identify potential foreign keys: dm_enum_fk_candidates(): columns, candidate, . Add foreign keys:","code":"dm1 |> dm_add_pk(table, columns) dm1 |> dm_add_fk(table, column)"},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"dm-objects-relational-data-models","dir":"Articles","previous_headings":"","what":"dm objects: relational data models","title":"Relational data with dm: Cheat sheet","text":"dm package provides grammar relational data models. helps maintain referential integrity. dm behaves like list tables (data frames lazy tables) capturing relationships tables.","code":""},{"path":[]},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"select-tables-dm_select_tbldm1----","dir":"Articles","previous_headings":"Resize dm","what":"Select tables: dm_select_tbl(dm1, ...)","title":"Relational data with dm: Cheat sheet","text":"","code":"dm1 |> dm_select_tbl(-df3)"},{"path":[]},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"select-columns-dm_selectdm1-table----","dir":"Articles","previous_headings":"Resize dm","what":"Select columns: dm_select(dm1, table, ...)","title":"Relational data with dm: Cheat sheet","text":"Automatic update dm meta-information table relations.","code":"dm1 |> dm_select(df3, -c3, -c4)"},{"path":[]},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"filter-rows-dm_filterdm1-table-pred","dir":"Articles","previous_headings":"Resize dm","what":"Filter rows: dm_filter(dm1, table = (pred))","title":"Relational data with dm: Cheat sheet","text":"Filter rows table condition defined, also directly/indirectly connected tables.","code":"dm1 |> dm_filter(df3 = (x == \"val\"))"},{"path":[]},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"control-diagram-level-of-detail-display","dir":"Articles","previous_headings":"Visualize dm: dm_draw()","what":"Control diagram level of detail: display…","title":"Relational data with dm: Cheat sheet","text":"keys (default): dm_draw(view_type = \"keys_only\"). variables: dm_draw(view_type = \"\"). table names: dm_draw(view_type = \"title_only\").","code":"dm |> dm_draw( view_type = \"title_only\", rankdir = \"TB\" )"},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"control-diagram-scope","dir":"Articles","previous_headings":"Visualize dm: dm_draw()","what":"Control diagram scope","title":"Relational data with dm: Cheat sheet","text":"visualize fewer tables first use dm_select_tbl().","code":""},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"control-diagram-colors-dm_set_colors","dir":"Articles","previous_headings":"Visualize dm: dm_draw()","what":"Control diagram colors: dm_set_colors()","title":"Relational data with dm: Cheat sheet","text":"","code":"dm |> dm_set_colors( pink = flights, orange = starts_with(\"air\") ) |> dm_draw()"},{"path":[]},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"dm_examine_constraints","dir":"Articles","previous_headings":"Data checks","what":"dm_examine_constraints()","title":"Relational data with dm: Cheat sheet","text":"tibble information key constraints met violated.","code":""},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"dm_examine_cardinalities","dir":"Articles","previous_headings":"Data checks","what":"dm_examine_cardinalities()","title":"Relational data with dm: Cheat sheet","text":"tibble information cardinality foreign keys constraints.","code":""},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"check_keydf-col1","dir":"Articles","previous_headings":"Data checks","what":"check_key(df, col1)","title":"Relational data with dm: Cheat sheet","text":"returns error unique key.","code":""},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"check_subsetdf1-df2","dir":"Articles","previous_headings":"Data checks","what":"check_subset(df1, df2)","title":"Relational data with dm: Cheat sheet","text":"returns error df1 subset df2.","code":""},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"check_set_equalitydf1-df2","dir":"Articles","previous_headings":"Data checks","what":"check_set_equality(df1, df2)","title":"Relational data with dm: Cheat sheet","text":"returns error df1 df2 sets.","code":""},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"fix-column-names-dm_disambiguate_cols","dir":"Articles","previous_headings":"Data checks","what":"Fix column names: dm_disambiguate_cols()","title":"Relational data with dm: Cheat sheet","text":"dm_disambiguate_cols(dm1) ensures columns dm unique names.","code":""},{"path":[]},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"wide-tibble-cascade-joins-with-dm_flatten_to_tbl","dir":"Articles","previous_headings":"Transform dm into tibble","what":"Wide tibble: Cascade joins with dm_flatten_to_tbl()","title":"Relational data with dm: Cheat sheet","text":"direct neighbours: dm_flatten_to_tbl() neighbours: dm_flatten_to_tbl(.recursive = TRUE)","code":"dm1 |> dm_flatten_to_tbl( .start = df1 ) dm1 |> dm_flatten_to_tbl( .start = df1, .recursive = TRUE )"},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"single-tibble-dm-dm_wrap_tbl","dir":"Articles","previous_headings":"Transform dm into tibble","what":"Single tibble dm: dm_wrap_tbl()","title":"Relational data with dm: Cheat sheet","text":"Parent tables packed — dm_pack_tbl(). Child tables nested — dm_nest_tbl().","code":"dm1 |> dm_wrap_tbl( root = green_df )"},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"retrieve-one-table-of-the-dm-pull_tbl","dir":"Articles","previous_headings":"Transform dm into tibble","what":"Retrieve one table of the dm: pull_tbl()","title":"Relational data with dm: Cheat sheet","text":"dm zoomed, retrieve zoomed table automatically.","code":"dm1 |> pull_tbl( dm1, green_df, keyed = TRUE ) dm1 |> dm_zoom_to(green_df) |> pull_tbl()"},{"path":[]},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"method-1-deconstruct-and-reconstruct","dir":"Articles","previous_headings":"Mutate, create, analyze tables","what":"Method 1: deconstruct and reconstruct","title":"Relational data with dm: Cheat sheet","text":"dm_get_tables(keyed = TRUE): keep information primary foreign keys). tidyverse pipeline table interest. Optional: update dm object:","code":"dm_tbl <- dm1 |> dm_get_tables(keyed = TRUE) new_table1 <- dm_tbl$table1 |> mutate(...) dm1 |> dm_select_tbl(-table1) |> dm(table1 = new_table1)"},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"method-2-zoom","dir":"Articles","previous_headings":"Mutate, create, analyze tables","what":"Method 2: zoom","title":"Relational data with dm: Cheat sheet","text":"dm_zoom_to(): Zoom table. tidyverse pipeline (mutate(), etc.). dm_update_zoomed() (replace) / dm_insert_zoomed()","code":"zoomed_dm1 <- dm1 |> dm_zoom_to(green_df) zoomed_dm2 <- zoomed_dm1 |> mutate(var = thing) dm3 <- zoomed_dm2 |> dm_update_zoomed()"},{"path":[]},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"export-dm-object-to-database-copy_dm_to","dir":"Articles","previous_headings":"Modify database source of a dm","what":"Export dm object to database: copy_dm_to()","title":"Relational data with dm: Cheat sheet","text":"Need database connection — DBI::dbConnect().","code":"con <- DBI::dbConnect(...) # Persistent tables: persistent_dm <- copy_dm_to( con, dm1, temporary = FALSE ) DBI::dbDisconnect(con)"},{"path":"https://dm.cynkra.com/articles/cheatsheet.html","id":"insert-update-or-remove-rows-in-a-dm","dir":"Articles","previous_headings":"","what":"Insert, update or remove rows in a dm","title":"Relational data with dm: Cheat sheet","text":"Methods: dm_rows_insert(dm1, dm2): adds new rows dm_rows_update(dm1, dm2): changes values rows dm_rows_patch(dm1, dm2): fills missing values dm_rows_upsert(dm1, dm2): adds new changes rows dm_rows_delete(dm1, dm2): deletes rows dm immutable, except functions mutable backend (database) in_place = TRUE.","code":"dm1 |> dm_rows_insert(dm2, in_place = FALSE) dm1 |> dm_rows_insert(dm2, in_place = TRUE)"},{"path":"https://dm.cynkra.com/articles/dm.html","id":"creating-a-dm-object","dir":"Articles","previous_headings":"","what":"Creating a dm object","title":"Getting started with dm","text":"dm objects can created individual tables loaded directly relational data model RDBMS (relational database management system). demonstration, ’re going work model hosted public server. first thing need connection RDBMS hosting data. create dm object RDBMS using dm_from_con(), passing connection object just created first argument. dm object interrogates RDBMS table column information, primary foreign keys. Currently, primary foreign keys available SQL Server, Postgres MariaDB.","code":"library(RMariaDB) fin_db <- dbConnect( MariaDB(), username = \"guest\", password = \"relational\", dbname = \"Financial_ijs\", host = \"relational.fit.cvut.cz\" ) #> Error in `dm:::financial_db_con()`: #> ! Can't connect to relational.fit.cvut.cz or databases.pacha.dev: #> Failed to connect: Can't connect to MySQL server on 'relational.fit.cvut.cz:3306' (101) #> Failed to connect: Can't connect to MySQL server on 'databases.pacha.dev:3306' (110) library(dm) fin_dm <- dm_from_con(fin_db) #> Error in eval(expr, envir, enclos): object 'fin_db' not found fin_dm #> Error in eval(expr, envir, enclos): object 'fin_dm' not found"},{"path":"https://dm.cynkra.com/articles/dm.html","id":"selecting-tables","dir":"Articles","previous_headings":"","what":"Selecting tables","title":"Getting started with dm","text":"dm object can accessed like named list tables: Additionally, dm functions pipe-friendly support tidy evaluation. can use [ dm_select_tbl() verb derive smaller dm loans, accounts, districts trans tables:","code":"names(fin_dm) #> Error in eval(expr, envir, enclos): object 'fin_dm' not found fin_dm$loans #> Error in eval(expr, envir, enclos): object 'fin_dm' not found dplyr::count(fin_dm$trans) #> Error in eval(expr, envir, enclos): object 'fin_dm' not found fin_dm_small <- fin_dm[c(\"loans\", \"accounts\", \"districts\", \"trans\")] #> Error in eval(expr, envir, enclos): object 'fin_dm' not found fin_dm_small <- fin_dm %>% dm_select_tbl(loans, accounts, districts, trans) #> Error in eval(expr, envir, enclos): object 'fin_dm' not found"},{"path":"https://dm.cynkra.com/articles/dm.html","id":"linking-tables-by-adding-keys","dir":"Articles","previous_headings":"","what":"Linking tables by adding keys","title":"Getting started with dm","text":"many cases, dm_from_con() already returns dm keys set. , dm allows us define primary foreign keys . , use learn_keys = FALSE obtain dm object tables. data model, id columns uniquely identify records accounts loans tables, used primary key. primary key defined dm_add_pk(). loan linked one account via account_id column loans table, relationship established dm_add_fk().","code":"library(dm) fin_dm_small <- dm_from_con(fin_db, learn_keys = FALSE) %>% dm_select_tbl(loans, accounts, districts, trans) #> Error in eval(expr, envir, enclos): object 'fin_db' not found fin_dm_keys <- fin_dm_small %>% dm_add_pk(table = accounts, columns = id) %>% dm_add_pk(loans, id) %>% dm_add_fk(table = loans, columns = account_id, ref_table = accounts) %>% dm_add_pk(trans, id) %>% dm_add_fk(trans, account_id, accounts) %>% dm_add_pk(districts, id) %>% dm_add_fk(accounts, district_id, districts) #> Error in eval(expr, envir, enclos): object 'fin_dm_small' not found"},{"path":"https://dm.cynkra.com/articles/dm.html","id":"visualizing-a-data-model","dir":"Articles","previous_headings":"","what":"Visualizing a data model","title":"Getting started with dm","text":"diagram data model quickest way verify ’re right track. can display visual summary dm time. default display table name, defined keys, links tables. Visualizing dm current state, can see keys created link tables together. Color guides eye.","code":"fin_dm_keys %>% dm_set_colors(darkgreen = c(loans, accounts), darkblue = trans, grey = districts) %>% dm_draw() #> Error in eval(expr, envir, enclos): object 'fin_dm_keys' not found"},{"path":"https://dm.cynkra.com/articles/dm.html","id":"accessing-a-data-model-as-a-table","dir":"Articles","previous_headings":"","what":"Accessing a data model as a table","title":"Getting started with dm","text":"want perform modeling analysis relational model, need transform tabular format R functions can work . argument recursive = TRUE, dm_flatten_to_tbl() automatically follow foreign keys across tables gather available columns single table. Apart rows printed , data fetched database. Use select() reduce number columns fetched, collect() retrieve entire result local processing.","code":"fin_dm_keys %>% dm_flatten_to_tbl(loans, .recursive = TRUE) #> Error in eval(expr, envir, enclos): object 'fin_dm_keys' not found loans_df <- fin_dm_keys %>% dm_flatten_to_tbl(loans, .recursive = TRUE) %>% select(id, amount, duration, A3) %>% collect() #> Error in eval(expr, envir, enclos): object 'fin_dm_keys' not found model <- lm(amount ~ duration + A3, data = loans_df) #> Error in eval(mf, parent.frame()): object 'loans_df' not found model #> Error in eval(expr, envir, enclos): object 'model' not found"},{"path":"https://dm.cynkra.com/articles/dm.html","id":"operations-on-table-data-within-a-dm","dir":"Articles","previous_headings":"","what":"Operations on table data within a dm","title":"Getting started with dm","text":"don’t need take extra step exporting data work . dm object, complete access dplyr’s data manipulation verbs. operate data within individual tables. work particular table use dm_zoom_to() set context chosen table. can perform dplyr operations want. Note , example, use dm_insert_zoomed() add results new table data model. table temporary deleted session ends. want make permanent changes data model RDBMS, please see “Persisting results” section vignette(\"howto-dm-db\").","code":"fin_dm_total <- fin_dm_keys %>% dm_zoom_to(loans) %>% group_by(account_id) %>% summarize(total_amount = sum(amount, na.rm = TRUE)) %>% ungroup() %>% dm_insert_zoomed(\"total_loans\") #> Error in eval(expr, envir, enclos): object 'fin_dm_keys' not found fin_dm_total$total_loans #> Error in eval(expr, envir, enclos): object 'fin_dm_total' not found"},{"path":"https://dm.cynkra.com/articles/dm.html","id":"checking-constraints","dir":"Articles","previous_headings":"","what":"Checking constraints","title":"Getting started with dm","text":"’s always smart check data model follows specifications. building model changing existing models adding tables keys, even important new model validated. dm_examine_constraints() checks primary foreign keys reports violate expected constraints. constraint checking, including cardinality, finding candidate columns keys, normalization, see vignette(\"tech-dm-low-level\").","code":"fin_dm_total %>% dm_examine_constraints() #> Error in eval(expr, envir, enclos): object 'fin_dm_total' not found #> Error in h(simpleError(msg, call)): error in evaluating the argument 'conn' in selecting a method for function 'dbDisconnect': object 'fin_db' not found"},{"path":"https://dm.cynkra.com/articles/dm.html","id":"next-steps","dir":"Articles","previous_headings":"","what":"Next Steps","title":"Getting started with dm","text":"Now introduced basic operation dm, next step learn dm methods particular use case requires. data RDBMS? move vignette(\"howto-dm-db\") detailed look working existing relational data model. data data frames, want read vignette(\"howto-dm-df\") next. like know relational data models order get dm, check vignette(\"howto-dm-theory\"). ’re familiar relational data models, want know work dm, vignette(\"tech-dm-join\"), vignette(\"tech-dm-filter\"), vignette(\"tech-dm-zoom\") good next step.","code":""},{"path":"https://dm.cynkra.com/articles/dm.html","id":"standing-on-the-shoulders-of-giants","dir":"Articles","previous_headings":"","what":"Standing on the shoulders of giants","title":"Getting started with dm","text":"{dm} package follows tidyverse principles: dm objects immutable (data never overwritten place) functions used dm objects pipeable (.e., return new dm table objects) tidy evaluation used (unquoted function arguments supported) {dm} package builds heavily upon {datamodelr} package, upon tidyverse. ’re looking forward good collaboration! {polyply} package similar intent slightly different interface. {data.cube} package quite intent using array-like interface. Articles {rquery} package discuss join controllers join dependency sorting, intent move declaration table relationships code data. {tidygraph} package stores network two related tables nodes edges, compatible {dplyr} workflows. object-oriented programming languages, object-relational mapping similar concept attempts map set related tables class hierarchy.","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-copy.html","id":"copy-models-or-copy-tables","dir":"Articles","previous_headings":"","what":"Copy models or copy tables?","title":"Copy tables to and from a database","text":"Using {dm} can persist entire relational data model single function call. copy_dm_to() move entire model destination RDBMS. may need deploy new model. may want add new tables existing model RDBMS. requirements can handled using compute() copy_to() methods. Calling compute() copy_to() requires write permission RDBMS; otherwise, error returned. Therefore, following examples, instantiate test dm object move local SQLite database full permissions. {dm} {dbplyr} designed treat code used manipulate local SQLite database remote RDBMS similarly. steps already introduced vignette(\"howto-dm-db\") discussed detail Copying relational model section.","code":"library(dm) library(dbplyr) fin_dm <- dm_financial() %>% dm_select_tbl(-trans) %>% collect() #> Error in `financial_db_con()` at dm/R/financial.R:18:3: #> ! Can't connect to relational.fit.cvut.cz or databases.pacha.dev: #> Failed to connect: Can't connect to MySQL server on 'relational.fit.cvut.cz:3306' (101) #> Failed to connect: Can't connect to MySQL server on 'databases.pacha.dev:3306' (110) local_db <- DBI::dbConnect(RSQLite::SQLite()) deployed_dm <- copy_dm_to(local_db, fin_dm, temporary = FALSE) #> Error in eval(expr, envir, enclos): object 'fin_dm' not found"},{"path":"https://dm.cynkra.com/articles/howto-dm-copy.html","id":"copying-tables","dir":"Articles","previous_headings":"","what":"Copying and persisting individual tables","title":"Copy tables to and from a database","text":"part data analysis, may combine tables multiple sources create links existing tables via foreign keys, create new tables holding data summaries. example , already discussed vignette(\"howto-dm-db\"), computes total amount loans account. derived table total_loans lazy table powered {dbplyr} package: results materialized, instead SQL query built executed time data requested. avoid recomputing query every time use total_loans, call compute() right inserting derived table dm_insert_tbl(). compute() forces computation query stores full results table RDBMS. Note differences queries returned sql_render(). my_dm_total$total_loans still lazily evaluated full query constructed chain operations generated still place needs run access . Contrast my_dm_total_computed$total_loans, query realized accessing rows requires simple SELECT * statement. table name, dbplyr_001, automatically generated name argument supplied compute(). default create temporary tables. want results persist across sessions permanent tables, compute() must called argument temporary = FALSE table name name argument. See ?compute details. called whole dm object (without zoom), compute() materializes tables new temporary tables executing associated SQL query storing full results. Depending size data, may take considerable time may even infeasible. may useful occasionally create snapshots data subject change.","code":"my_dm_total <- deployed_dm %>% dm_zoom_to(loans) %>% group_by(account_id) %>% summarize(total_amount = sum(amount, na.rm = TRUE)) %>% ungroup() %>% dm_insert_zoomed(\"total_loans\") #> Error in eval(expr, envir, enclos): object 'deployed_dm' not found my_dm_total$total_loans %>% sql_render() #> Error in eval(expr, envir, enclos): object 'my_dm_total' not found my_dm_total_computed <- deployed_dm %>% dm_zoom_to(loans) %>% group_by(account_id) %>% summarize(total_amount = sum(amount, na.rm = TRUE)) %>% ungroup() %>% compute() %>% dm_insert_zoomed(\"total_loans\") #> Error in eval(expr, envir, enclos): object 'deployed_dm' not found my_dm_total_computed$total_loans %>% sql_render() #> Error in eval(expr, envir, enclos): object 'my_dm_total_computed' not found #> Error in eval(expr, envir, enclos): object 'my_dm_total_computed' not found my_dm_total_snapshot <- my_dm_total %>% compute() #> Error in eval(expr, envir, enclos): object 'my_dm_total' not found"},{"path":"https://dm.cynkra.com/articles/howto-dm-copy.html","id":"data-frames","dir":"Articles","previous_headings":"","what":"Adding local data frames to an RDBMS","title":"Copy tables to and from a database","text":"need add local data frames existing dm object, use copy_to() method. takes arguments copy_dm_to(), except second argument takes data frame rather dm. result derived dm object contains new table. demonstrate use copy_to(), example use {dm} pull consolidated data several tables RDBMS, estimate linear model data, insert residuals back RDBMS link existing tables. done local SQLite database, process work unchanged supported RDBMS. Please note use recursive = TRUE dm_flatten_to_tbl(). method gathers linked information single wide table. follows foreign key relations starting table supplied argument gathers columns related tables, disambiguating column names goes. code, select() statement isolates columns need model. collect() works similarly compute() forcing execution underlying SQL query, returns results local tibble. , local tibble, loans_df, used estimate linear model residuals stored along original associated id new tibble, loans_residuals. id column necessary link new tibble tables dm collected . Adding loans_residuals dm done using copy_to(). call method includes argument temporary = FALSE want table persist beyond current session. pipeline create necessary primary foreign keys integrate table rest relational model. information key creation, see vignette(\"howto-dm-db\") vignette(\"howto-dm-theory\").","code":"loans_df <- deployed_dm %>% dm_flatten_to_tbl(loans, .recursive = TRUE) %>% select(id, amount, duration, A3) %>% collect() #> Error in eval(expr, envir, enclos): object 'deployed_dm' not found model <- lm(amount ~ duration + A3, data = loans_df) #> Error in eval(mf, parent.frame()): object 'loans_df' not found loans_residuals <- tibble::tibble( id = loans_df$id, resid = unname(residuals(model)) ) #> Error: object 'loans_df' not found loans_residuals #> Error in eval(expr, envir, enclos): object 'loans_residuals' not found my_dm_sqlite_resid <- copy_to(deployed_dm, loans_residuals, temporary = FALSE) %>% dm_add_pk(loans_residuals, id) %>% dm_add_fk(loans_residuals, id, loans) #> Error in eval(expr, envir, enclos): object 'deployed_dm' not found my_dm_sqlite_resid %>% dm_set_colors(violet = loans_residuals) %>% dm_draw() #> Error in eval(expr, envir, enclos): object 'my_dm_sqlite_resid' not found my_dm_sqlite_resid %>% dm_examine_constraints() #> Error in eval(expr, envir, enclos): object 'my_dm_sqlite_resid' not found my_dm_sqlite_resid$loans_residuals #> Error in eval(expr, envir, enclos): object 'my_dm_sqlite_resid' not found"},{"path":"https://dm.cynkra.com/articles/howto-dm-copy.html","id":"copy-model","dir":"Articles","previous_headings":"","what":"Persisting a relational model with copy_dm_to()","title":"Copy tables to and from a database","text":"Persistence, intended make permanent changes, requires write access source RDBMS. code repeat code opened Copying persisting individual tables section beginning tutorial. uses {dm} convenience function dm_financial() create dm object corresponding data model public dataset repository. dm object downloaded locally first, deploying local SQLite database. dm_select_tbl() used exclude transaction table trans due size, collect() method retrieves remaining tables returns local dm object. just simple move local relational model RDBMS. Note call copy_dm_to() argument temporary = FALSE supplied. Without argument, model still copied database, argument default temporary = TRUE data deleted session ends. output can observe src deployed_dm SQLite, fin_dm source indicated local data model. Copying relational model empty database simplest use case copy_dm_to(). want copy model RDBMS already populated, aware copy_dm_to() overwrite pre-existing tables. case need use table_names argument give tables unique names. table_names can named character vector, names matching table names dm object values containing desired names RDBMS, function one-sided formula. example , paste0() used add prefix table names provide uniqueness. Note different table names dup_dm$accounts deployed_dm$accounts. , table name accounts dm object, link different tables database. dup_dm, table backed table dup_accounts RDBMS. dm_deployed$accounts shows us table still backed accounts table copy_dm_to() operation performed preceding example. Managing tables RDBMS outside scope dm. find need remove tables perform operations directly RDBMS, see {DBI} package. done, forget disconnect:","code":"dm_financial() %>% dm_nrow() #> Error in `financial_db_con()` at dm/R/financial.R:18:3: #> ! Can't connect to relational.fit.cvut.cz or databases.pacha.dev: #> Failed to connect: Can't connect to MySQL server on 'relational.fit.cvut.cz:3306' (101) #> Failed to connect: Can't connect to MySQL server on 'databases.pacha.dev:3306' (110) fin_dm <- dm_financial() %>% dm_select_tbl(-trans) %>% collect() #> Error in `financial_db_con()` at dm/R/financial.R:18:3: #> ! Can't connect to relational.fit.cvut.cz or databases.pacha.dev: #> Failed to connect: Can't connect to MySQL server on 'relational.fit.cvut.cz:3306' (101) #> Failed to connect: Can't connect to MySQL server on 'databases.pacha.dev:3306' (110) fin_dm #> Error in eval(expr, envir, enclos): object 'fin_dm' not found destination_db <- DBI::dbConnect(RSQLite::SQLite()) deployed_dm <- copy_dm_to(destination_db, fin_dm, temporary = FALSE) #> Error in eval(expr, envir, enclos): object 'fin_dm' not found deployed_dm #> Error in eval(expr, envir, enclos): object 'deployed_dm' not found dup_dm <- copy_dm_to(destination_db, fin_dm, temporary = FALSE, table_names = ~ paste0(\"dup_\", .x)) #> Error in eval(expr, envir, enclos): object 'fin_dm' not found dup_dm #> Error in eval(expr, envir, enclos): object 'dup_dm' not found remote_name(dup_dm$accounts) #> Error in eval(expr, envir, enclos): object 'dup_dm' not found remote_name(deployed_dm$accounts) #> Error in eval(expr, envir, enclos): object 'deployed_dm' not found DBI::dbDisconnect(destination_db) DBI::dbDisconnect(local_db)"},{"path":"https://dm.cynkra.com/articles/howto-dm-copy.html","id":"conclusion","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Copy tables to and from a database","text":"dm makes straightforward deploy complete relational model RDBMS using copy_dm_to() function. tables created relational model analysis development, compute() copy_to() can used persist (using argument temporary = FALSE) sessions copy local tables database dm. collect() method downloads entire dm object fits memory database.","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-copy.html","id":"further-reading","dir":"Articles","previous_headings":"","what":"Further Reading","title":"Copy tables to and from a database","text":"need finer-grained control modifications relational model, see vignette(\"howto-dm-rows\") introduction row level operations, including updates, insertions, deletions patching. like know relational data models order get dm, check vignette(\"howto-dm-theory\"). ’re familiar relational data models want know work dm, vignette(\"tech-dm-join\"), vignette(\"tech-dm-filter\"), vignette(\"tech-dm-zoom\") good next step.","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-db.html","id":"building-a-dm-from-a-subset-of-tables","dir":"Articles","previous_headings":"","what":"Building a dm from a subset of tables","title":"Create a dm object from a database","text":"dm can also constructed individual tables views. useful want work subset database’s tables, perhaps different schemas. , use $ notation extract two tables financial database. create dm passing tables arguments. Note tables arguments source, case my_db.","code":"dbListTables(my_db) #> Error in h(simpleError(msg, call)): error in evaluating the argument 'conn' in selecting a method for function 'dbListTables': object 'my_db' not found library(dbplyr) loans <- tbl(my_db, \"loans\") #> Error in eval(expr, envir, enclos): object 'my_db' not found accounts <- tbl(my_db, \"accounts\") #> Error in eval(expr, envir, enclos): object 'my_db' not found my_manual_dm <- dm(loans, accounts) #> Error in `map()` at dm/R/dm.R:58:3: #> ℹ In index: 1. #> Caused by error: #> ! object 'loans' not found my_manual_dm #> Error in eval(expr, envir, enclos): object 'my_manual_dm' not found"},{"path":"https://dm.cynkra.com/articles/howto-dm-db.html","id":"defining-primary-and-foreign-keys","dir":"Articles","previous_headings":"","what":"Defining Primary and Foreign Keys","title":"Create a dm object from a database","text":"Primary keys foreign keys relational database tables linked . primary key column column tuple unique value row within table. foreign key column column tuple containing primary key row another table. Foreign keys act cross references tables. specify relationships gives us relational database. information keys crash course databases, see vignette(\"howto-dm-theory\"). many cases, dm_from_con() already returns dm keys set. , dm allows us define primary foreign keys . , use learn_keys = FALSE obtain dm object tables. model diagram provided test database loosely illustrates intended relationships tables. diagram, can see loans table linked accounts table. , create links 3 steps: Add primary key id accounts table Add primary key id loans table Add foreign key account_id loans table referencing accounts table assign colors tables draw structure dm. Note foreign key created, primary key referenced table need specified, primary key must already defined. , mentioned , primary foreign key constraints database currently imported Postgres, SQL Server databases MariaDB, dm_from_con() used. process key definition needs done manually databases. instantiated dm object, can continue add tables . tables original source dm, use dm()","code":"library(dm) fin_dm <- dm_from_con(my_db, learn_keys = FALSE) #> Error in eval(expr, envir, enclos): object 'my_db' not found fin_dm #> Error in eval(expr, envir, enclos): object 'fin_dm' not found my_dm_keys <- my_manual_dm %>% dm_add_pk(accounts, id) %>% dm_add_pk(loans, id) %>% dm_add_fk(loans, account_id, accounts) %>% dm_set_colors(green = loans, orange = accounts) #> Error in eval(expr, envir, enclos): object 'my_manual_dm' not found my_dm_keys %>% dm_draw() #> Error in eval(expr, envir, enclos): object 'my_dm_keys' not found trans <- tbl(my_db, \"trans\") #> Error in eval(expr, envir, enclos): object 'my_db' not found my_dm_keys %>% dm(trans) #> Error in `map()` at dm/R/dm.R:58:3: #> ℹ In index: 1. #> Caused by error: #> ! object 'my_dm_keys' not found"},{"path":"https://dm.cynkra.com/articles/howto-dm-db.html","id":"serializing-a-dm-object","dir":"Articles","previous_headings":"","what":"Serializing a dm object","title":"Create a dm object from a database","text":"dm object always linked database connection. connection lost dm object saved disk, e.g., saving workspace R Posit Workbench, using knitr chunks: connection tightly coupled tables dm object replaced. practical solution define, dm object project uses, function recreates using new database connection: avoid reconnecting /recreating every time need dm object, can use memoise::memoise() memoize connection /dm functions.","code":"unserialize(serialize(my_dm_keys, NULL)) #> Error in eval(expr, envir, enclos): object 'my_dm_keys' not found my_db_fun <- function() { dbConnect( MariaDB(), username = \"guest\", password = \"relational\", dbname = \"Financial_ijs\", host = \"relational.fit.cvut.cz\" ) } my_dm_fun <- function(my_db = my_db_fun()) { loans <- tbl(my_db, \"loans\") accounts <- tbl(my_db, \"accounts\") dm(loans, accounts) %>% dm_add_pk(accounts, id) %>% dm_add_pk(loans, id) %>% dm_add_fk(loans, account_id, accounts) %>% dm_set_colors(green = loans, orange = accounts) } #> Error in `dm:::financial_db_con()`: #> ! Can't connect to relational.fit.cvut.cz or databases.pacha.dev: #> Failed to connect: Can't connect to MySQL server on 'relational.fit.cvut.cz:3306' (101) #> Failed to connect: Can't connect to MySQL server on 'databases.pacha.dev:3306' (110)"},{"path":"https://dm.cynkra.com/articles/howto-dm-db.html","id":"transient-nature-of-operations","dir":"Articles","previous_headings":"","what":"Transient nature of operations","title":"Create a dm object from a database","text":"Like R objects, dm immutable operations performed transient unless stored new variable. , like {dbplyr}, results never written database unless explicitly requested.","code":"my_dm_keys #> Error in eval(expr, envir, enclos): object 'my_dm_keys' not found my_dm_trans <- my_dm_keys %>% dm(trans) #> Error in `map()` at dm/R/dm.R:58:3: #> ℹ In index: 1. #> Caused by error: #> ! object 'my_dm_keys' not found my_dm_trans #> Error in eval(expr, envir, enclos): object 'my_dm_trans' not found my_dm_keys %>% dm_flatten_to_tbl(loans) #> Error in eval(expr, envir, enclos): object 'my_dm_keys' not found my_dm_keys %>% dm_flatten_to_tbl(loans) %>% sql_render() #> Error in eval(expr, envir, enclos): object 'my_dm_keys' not found"},{"path":"https://dm.cynkra.com/articles/howto-dm-db.html","id":"performing-operations-on-tables-by-zooming","dir":"Articles","previous_headings":"","what":"Performing operations on tables by “zooming”","title":"Create a dm object from a database","text":"dm collection tables, wish perform operations individual table, set context operations using dm_zoom_to(). See vignette(\"tech-dm-zoom\") detail zooming. dm operations transient unless persistence explicitly requested. make chain manipulations selected table permanent, assign result dm_insert_zoomed() new object, my_dm_total. new dm object, derived my_dm_keys, new lazy table total_loans linked accounts table. Context set table “loans” using dm_zoom_to(loans). can learn zooming tutorial vignette(\"tech-dm-zoom\"). use {dplyr} functions zoomed table generate new summary table. summarize() returns temporary table one row group created preceding group_by() function. columns temporary table constrained columns passed arguments group_by() function column(s) created summarize() function. dm_insert_zoomed(\"total_loans\") adds temporary table created summarize() data model new name, total_loans. grouping variable account_id primary key, new derived table automatically linked accounts table. resulting table total_loans can accessed like table dm object. lazy table powered {dbplyr} package: results materialized; instead, SQL query built executed time data requested. Use compute() zoomed table materialize temporary table avoid recomputing. See vignette(\"howto-dm-copy\") details.","code":"my_dm_total <- my_dm_keys %>% dm_zoom_to(loans) %>% group_by(account_id) %>% summarize(total_amount = sum(amount, na.rm = TRUE)) %>% ungroup() %>% dm_insert_zoomed(\"total_loans\") #> Error in eval(expr, envir, enclos): object 'my_dm_keys' not found my_dm_total %>% dm_set_colors(violet = total_loans) %>% dm_draw() #> Error in eval(expr, envir, enclos): object 'my_dm_total' not found my_dm_total$total_loans #> Error in eval(expr, envir, enclos): object 'my_dm_total' not found my_dm_total$total_loans %>% sql_render() #> Error in eval(expr, envir, enclos): object 'my_dm_total' not found"},{"path":"https://dm.cynkra.com/articles/howto-dm-db.html","id":"downloading-data","dir":"Articles","previous_headings":"","what":"Downloading data","title":"Create a dm object from a database","text":"becomes necessary move data locally analysis reporting, {dm} method collect() used. Operations dm objects databases limited report first ten results. collect() forces evaluation SQL queries generation complete set results. resulting tables transferred RDBMS stored local tibbles. Use method caution. sure size dataset downloading, can call dm_nrow() dm row count data model’s tables.","code":"my_dm_local <- my_dm_total %>% collect() #> Error in eval(expr, envir, enclos): object 'my_dm_total' not found my_dm_local$total_loans #> Error in eval(expr, envir, enclos): object 'my_dm_local' not found my_dm_total %>% dm_nrow() #> Error in eval(expr, envir, enclos): object 'my_dm_total' not found"},{"path":"https://dm.cynkra.com/articles/howto-dm-db.html","id":"persist","dir":"Articles","previous_headings":"","what":"Persisting results","title":"Create a dm object from a database","text":"just simple move local relational model RDBMS using collect() download . method used copy_dm_to() takes arguments database connection dm object. example , local SQLite database used demonstrate , {dm} designed work RDBMS supported {DBI}. output, can observe src deployed_dm SQLite database, my_dm_local source local R environment. Persisting tables covered detail vignette(\"howto-dm-copy\"). done, forget disconnect:","code":"destination_db <- DBI::dbConnect(RSQLite::SQLite()) deployed_dm <- copy_dm_to(destination_db, my_dm_local) #> Error in eval(expr, envir, enclos): object 'my_dm_local' not found deployed_dm #> Error in eval(expr, envir, enclos): object 'deployed_dm' not found my_dm_local #> Error in eval(expr, envir, enclos): object 'my_dm_local' not found DBI::dbDisconnect(destination_db) DBI::dbDisconnect(my_db) #> Error in h(simpleError(msg, call)): error in evaluating the argument 'conn' in selecting a method for function 'dbDisconnect': object 'my_db' not found"},{"path":"https://dm.cynkra.com/articles/howto-dm-db.html","id":"conclusion","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Create a dm object from a database","text":"tutorial, demonstrated simple load database dm object begin working . Currently, loading dm RDBMS requires manually set key relations, {dm} provides methods make straightforward. planned future versions dm support automatic key creation RDBMS. next step read vignette(\"howto-dm-copy\"), copying tables RDBMS covered. vignette(\"howto-dm-rows\") discusses manipulation individual rows database.","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-db.html","id":"further-reading","dir":"Articles","previous_headings":"","what":"Further reading","title":"Create a dm object from a database","text":"vignette(\"howto-dm-df\") – data local data frames? article covers creating data model local data frames, including building relationships data model, verifying model, leveraging power dplyr operate data model. vignette(\"howto-dm-theory\") – know data frames little relational data models? quick introduction walk key similarities differences, show move individual data frames relational data model.","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"creating-a-dm-object-from-data-frames","dir":"Articles","previous_headings":"","what":"Creating a dm object from data frames","title":"Create a dm object from data frames","text":"example data set using available nycflights13 package. five tables working contain information flights departed airports New York destinations United States 2013: flights represents trips taken planes names transport organizations (name) abbreviated codes (carrier) airports indicates ports departure (origin) destination (dest) weather contains meteorological information hour planes describes characteristics aircraft ’ve loaded {nycflights13}, aforementioned tables work environment, ready accessed. data probably available R package. Whatever format , need able load data frames R session. data large, consider using dm connect database instead. See vignette(\"howto-dm-db\") details using dm databases.","code":"library(nycflights13) airports #> # A tibble: 1,458 × 8 #> faa name lat lon alt tz dst tzone #> #> 1 04G Lansdowne Airport 41.1 -80.6 1044 -5 A Amer… #> 2 06A Moton Field Municipal Airport 32.5 -85.7 264 -6 A Amer… #> 3 06C Schaumburg Regional 42.0 -88.1 801 -6 A Amer… #> 4 06N Randall Airport 41.4 -74.4 523 -5 A Amer… #> 5 09J Jekyll Island Airport 31.1 -81.4 11 -5 A Amer… #> 6 0A9 Elizabethton Municipal Airpo… 36.4 -82.2 1593 -5 A Amer… #> 7 0G6 Williams County Airport 41.5 -84.5 730 -5 A Amer… #> 8 0G7 Finger Lakes Regional Airport 42.9 -76.8 492 -5 A Amer… #> 9 0P2 Shoestring Aviation Airfield 39.8 -76.6 1000 -5 U Amer… #> 10 0S9 Jefferson County Intl 48.1 -123. 108 -8 A Amer… #> # ℹ 1,448 more rows"},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"adding-tables","dir":"Articles","previous_headings":"","what":"Adding Tables","title":"Create a dm object from data frames","text":"first step tell dm tables want work connected. can use dm(), passing table names arguments. as_dm() function alternative works already list tables.","code":"library(dm) flights_dm_no_keys <- dm(airlines, airports, flights, planes, weather) flights_dm_no_keys #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `weather` #> Columns: 53 #> Primary keys: 0 #> Foreign keys: 0"},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"a-dm-is-a-list","dir":"Articles","previous_headings":"","what":"A dm is a list","title":"Create a dm object from data frames","text":"dm objects behave like lists user- console-friendly print format. fact, using dm nicer list organizing data frames environment easy first step towards using dm data modeling functionality. Subsetting syntax dm object (either subscript name) similar syntax lists, don’t need learn additional syntax work dm objects.","code":"names(flights_dm_no_keys) #> [1] \"airlines\" \"airports\" \"flights\" \"planes\" \"weather\" flights_dm_no_keys$airports #> # A tibble: 1,458 × 8 #> faa name lat lon alt tz dst tzone #> #> 1 04G Lansdowne Airport 41.1 -80.6 1044 -5 A Amer… #> 2 06A Moton Field Municipal Airport 32.5 -85.7 264 -6 A Amer… #> 3 06C Schaumburg Regional 42.0 -88.1 801 -6 A Amer… #> 4 06N Randall Airport 41.4 -74.4 523 -5 A Amer… #> 5 09J Jekyll Island Airport 31.1 -81.4 11 -5 A Amer… #> 6 0A9 Elizabethton Municipal Airpo… 36.4 -82.2 1593 -5 A Amer… #> 7 0G6 Williams County Airport 41.5 -84.5 730 -5 A Amer… #> 8 0G7 Finger Lakes Regional Airport 42.9 -76.8 492 -5 A Amer… #> 9 0P2 Shoestring Aviation Airfield 39.8 -76.6 1000 -5 U Amer… #> 10 0S9 Jefferson County Intl 48.1 -123. 108 -8 A Amer… #> # ℹ 1,448 more rows flights_dm_no_keys[c(\"airports\", \"flights\")] #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airports`, `flights` #> Columns: 27 #> Primary keys: 0 #> Foreign keys: 0"},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"defining-keys","dir":"Articles","previous_headings":"","what":"Defining Keys","title":"Create a dm object from data frames","text":"Even though now dm object contains data, specified five tables connected. , need define primary keys foreign keys tables. Primary keys foreign keys relational database tables linked . primary key column column tuple unique value row within table. foreign key column column tuple containing primary key row another table. Foreign keys act cross references tables. specify relationships gives us relational database. information keys crash course databases, see vignette(\"howto-dm-theory\").","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"primary-keys","dir":"Articles","previous_headings":"","what":"Primary Keys","title":"Create a dm object from data frames","text":"dm offers dm_enum_pk_candidates() identify viable primary keys table dm object, dm_add_pk() add . Now, can add identified primary keys: Note {dm} functions work named positional argument specification, compound keys can specified using vector argument.","code":"dm_enum_pk_candidates( dm = flights_dm_no_keys, table = planes ) #> # A tibble: 9 × 3 #> columns candidate why #> #> 1 tailnum TRUE \"\" #> 2 year FALSE \"has duplicate values: 2001 (284), 2000 (244), 20… #> 3 type FALSE \"has duplicate values: Fixed wing multi engine (3… #> 4 manufacturer FALSE \"has duplicate values: BOEING (1630), AIRBUS INDU… #> 5 model FALSE \"has duplicate values: 737-7H4 (361), A320-232 (2… #> 6 engines FALSE \"has duplicate values: 2 (3288), 1 (27), 4 (4), 3… #> 7 seats FALSE \"has duplicate values: 149 (452), 140 (411), 55 (… #> 8 speed FALSE \"has 3299 missing values, and duplicate values: 4… #> 9 engine FALSE \"has duplicate values: Turbo-fan (2750), Turbo-je… flights_dm_only_pks <- flights_dm_no_keys %>% dm_add_pk(table = airlines, columns = carrier) %>% dm_add_pk(airports, faa) %>% dm_add_pk(planes, tailnum) %>% dm_add_pk(weather, c(origin, time_hour)) flights_dm_only_pks #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `weather` #> Columns: 53 #> Primary keys: 4 #> Foreign keys: 0"},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"foreign-keys","dir":"Articles","previous_headings":"","what":"Foreign Keys","title":"Create a dm object from data frames","text":"define tables related, use dm_add_fk() add foreign keys. Naturally, function deal two tables: table looking reference, table providing reference. Accordingly, calling dm_add_fk(), table argument specifies table needs foreign key link second table, ref_table argument specifies table linked , needs primary key already defined . chosen column successful candidates provided dm_enum_fk_candidates(), use dm_add_fk() function establish foreign key linking tables. second call dm_add_fk() complete process flights airlines tables started . carrier column airlines table added foreign key flights. created required primary foreign keys link tables together, now relational data model can work .","code":"dm_enum_fk_candidates( dm = flights_dm_only_pks, table = flights, ref_table = airlines ) #> # A tibble: 19 × 3 #> columns candidate why #> #> 1 carrier TRUE \"\" #> 2 year FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 3 month FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 4 day FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 5 dep_time FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 6 sched_dep_time FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 7 dep_delay FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 8 arr_time FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 9 sched_arr_time FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 10 arr_delay FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 11 flight FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 12 tailnum FALSE \"values of `flights$tailnum` not in `airlines$… #> 13 origin FALSE \"values of `flights$origin` not in `airlines$c… #> 14 dest FALSE \"values of `flights$dest` not in `airlines$car… #> 15 air_time FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 16 distance FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 17 hour FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 18 minute FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 19 time_hour FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… flights_dm_all_keys <- flights_dm_only_pks %>% dm_add_fk(table = flights, columns = tailnum, ref_table = planes) %>% dm_add_fk(flights, carrier, airlines) %>% dm_add_fk(flights, origin, airports) %>% dm_add_fk(flights, c(origin, time_hour), weather) flights_dm_all_keys #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `weather` #> Columns: 53 #> Primary keys: 4 #> Foreign keys: 4"},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"visualization","dir":"Articles","previous_headings":"","what":"Visualization","title":"Create a dm object from data frames","text":"Visualizing data model quick easy way verify successfully created model aiming . can use dm_draw() stage process generate visual representation tables links :","code":"flights_dm_no_keys %>% dm_draw(rankdir = \"TB\", view_type = \"all\") flights_dm_no_keys %>% dm_add_pk(airlines, carrier) %>% dm_draw() flights_dm_only_pks %>% dm_add_fk(flights, tailnum, planes) %>% dm_draw() flights_dm_all_keys %>% dm_draw()"},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"integrity-checks","dir":"Articles","previous_headings":"","what":"Integrity Checks","title":"Create a dm object from data frames","text":"well checking data model visually, dm can examine constraints created addition keys verify sensible. results presented human-readable form, available tibble programmatic inspection.","code":"flights_dm_no_keys %>% dm_examine_constraints() #> ℹ No constraints defined. flights_dm_only_pks %>% dm_examine_constraints() #> ℹ All constraints satisfied. flights_dm_all_keys %>% dm_examine_constraints() #> ! Unsatisfied constraints: #> • Table `flights`: foreign key `tailnum` into table `planes`: values of `flights$tailnum` not in `planes$tailnum`: N725MQ (575), N722MQ (513), N723MQ (507), N713MQ (483), N735MQ (396), … #> • Table `flights`: foreign key `origin`, `time_hour` into table `weather`: values of `flights$origin`, `flights$time_hour` not in `weather$origin`, `weather$time_hour`: EWR, 2013-10-23 06:00:00 (34), EWR, 2013-08-19 17:00:00 (26), EWR, 2013-12-31 06:00:00 (26), EWR, 2013-12-31 07:00:00 (26), JFK, 2013-08-19 17:00:00 (26), …"},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"programming","dir":"Articles","previous_headings":"","what":"Programming","title":"Create a dm object from data frames","text":"Helper functions available access details keys check results. data frame primary keys retrieved dm_get_all_pks(): Similarly, data frame foreign keys retrieved dm_get_all_fks(): can use tibble::as_tibble() result dm_examine_constraints() programmatically inspect constraints satisfied:","code":"flights_dm_only_pks %>% dm_get_all_pks() #> # A tibble: 4 × 3 #> table pk_col autoincrement #> #> 1 airlines carrier FALSE #> 2 airports faa FALSE #> 3 planes tailnum FALSE #> 4 weather origin, time_hour FALSE flights_dm_all_keys %>% dm_get_all_fks() #> # A tibble: 4 × 5 #> child_table child_fk_cols parent_table parent_key_cols on_delete #> #> 1 flights carrier airlines carrier no_action #> 2 flights origin airports faa no_action #> 3 flights tailnum planes tailnum no_action #> 4 flights origin, time_hour weather origin, time_hour no_action flights_dm_all_keys %>% dm_examine_constraints() %>% tibble::as_tibble() #> # A tibble: 8 × 6 #> table kind columns ref_table is_key problem #> #> 1 flights FK tailnum planes FALSE \"values of `flights$ta… #> 2 flights FK origin, time_hour weather FALSE \"values of `flights$or… #> 3 airlines PK carrier NA TRUE \"\" #> 4 airports PK faa NA TRUE \"\" #> 5 planes PK tailnum NA TRUE \"\" #> 6 weather PK origin, time_hour NA TRUE \"\" #> 7 flights FK carrier airlines TRUE \"\" #> 8 flights FK origin airports TRUE \"\""},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"conclusion","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Create a dm object from data frames","text":"tutorial, demonstrated simple create relational data models local data frames using {dm}, including setting primary foreign keys visualizing resulting relational model.","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-df.html","id":"further-reading","dir":"Articles","previous_headings":"","what":"Further reading","title":"Create a dm object from data frames","text":"vignette(\"howto-dm-db\") – article covers accessing working RDBMSs within R session, including manipulating data, filling missing relationships tables, getting data RDBMS model, deploying data model RDBMS. vignette(\"howto-dm-theory\") – know data frames little relational data models? quick introduction walk key similarities differences, show move individual data frames relational data model.","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-rows.html","id":"the-dm_rows_-process","dir":"Articles","previous_headings":"","what":"The dm_rows_* process","title":"Insert, update, or remove rows in a database","text":"six methods take arguments using follows process: Create temporary changeset dm object defines intended changes RDBMS desired, simulate changes in_place = FALSE double-check Apply changes in_place = TRUE. start, dm object created containing tables rows want change. changeset dm copied source dm want modify. dm RDBMS destination dm, call appropriate method, dm_rows_insert(), make planned changes, along argument in_place = FALSE can confirm achieve changes want. verification can done visually, looking row counts like, using {dm}’s constraint checking method, dm_examine_constraints(). biggest danger damaging key relations data spread across multiple tables deleting duplicating rows keys. dm_examine_constraints() catch errors primary keys duplicated foreign keys matching primary key (unless foreign key value NA). changes confirmed, execute method , time argument in_place = TRUE make changes permanent. Note in_place = FALSE default: must opt actually change data database. method requirements order maintain database consistency. involve constraints primary key values uniquely identify rows. ensure integrity relations process, methods automatically determine correct processing order tables involved. operations create records, parent tables (hold primary keys) processed child tables (hold foreign keys). dm_rows_delete(), child tables processed parent tables. Note user still responsible setting transactions ensure integrity operations across multiple tables. details see vignette(\"howto-dm-theory\") vignette(\"howto-dm-db\").","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-rows.html","id":"usage","dir":"Articles","previous_headings":"","what":"Usage","title":"Insert, update, or remove rows in a database","text":"demonstrate use table modifying methods, create simple dm object two tables linked foreign key. Note child table foreign key missing (NA). {dm} doesn’t check key values create dm, add check:1 copy demo_dm SQLite database. Note: default method used, copy_dm_to(), create temporary tables automatically deleted session ends. demo_sql destination dm examples, argument temporary = FALSE used make distinction apparent. {dm}’s table modification methods can piped together create repeatable sequence operations returns dm incorporating changes required. common use case {dm} – manually building sequence operations using temporary results complete correct, committing result.","code":"library(dm) parent <- tibble(value = c(\"A\", \"B\", \"C\"), pk = 1:3) parent #> # A tibble: 3 × 2 #> value pk #> #> 1 A 1 #> 2 B 2 #> 3 C 3 child <- tibble(value = c(\"a\", \"b\", \"c\"), pk = 1:3, fk = c(1, 1, NA)) child #> # A tibble: 3 × 3 #> value pk fk #> #> 1 a 1 1 #> 2 b 2 1 #> 3 c 3 NA demo_dm <- dm(parent = parent, child = child) %>% dm_add_pk(parent, pk) %>% dm_add_pk(child, pk) %>% dm_add_fk(child, fk, parent) demo_dm %>% dm_draw(view_type = \"all\") dm_examine_constraints(demo_dm) #> ℹ All constraints satisfied. library(DBI) sqlite_db <- DBI::dbConnect(RSQLite::SQLite()) demo_sql <- copy_dm_to(sqlite_db, demo_dm, temporary = FALSE) demo_sql #> ── Table source ─────────────────────────────────────────────────────────── #> src: sqlite 3.44.2 [] #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `parent`, `child` #> Columns: 5 #> Primary keys: 2 #> Foreign keys: 1"},{"path":"https://dm.cynkra.com/articles/howto-dm-rows.html","id":"insert","dir":"Articles","previous_headings":"","what":"dm_rows_insert()","title":"Insert, update, or remove rows in a database","text":"demonstrate dm_rows_insert(), create dm tables containing rows insert copy sqlite_db, source demo_sql. dm_rows_...() methods, source destination dm objects must RDBMS. get error message case. code adds parent child table entries letter “D”. First, changeset dm created temporarily copied database: changeset dm used argument dm_rows_insert(). gives us warning changes persist (.e., temporary). Inspecting child table resulting dm_insert_out demo_sql, can see ’s exactly happened. {dm} returned us dm object inserted rows place, underlying database changed. repeat operation, time argument in_place = TRUE changes now persist demo_sql.","code":"new_parent <- tibble(value = \"D\", pk = 4) new_parent #> # A tibble: 1 × 2 #> value pk #> #> 1 D 4 new_child <- tibble(value = \"d\", pk = 4, fk = 4) new_child #> # A tibble: 1 × 3 #> value pk fk #> #> 1 d 4 4 dm_insert_in <- dm(parent = new_parent, child = new_child) %>% copy_dm_to(sqlite_db, ., temporary = TRUE) dm_insert_out <- demo_sql %>% dm_rows_insert(dm_insert_in) #> Result is returned as a dm object with lazy tables. Use `in_place = FALSE` #> to mute this message, or `in_place = TRUE` to write to the underlying #> tables. dm_insert_out$child #> # Source: SQL [4 x 3] #> # Database: sqlite 3.44.2 [] #> value pk fk #> #> 1 a 1 1 #> 2 b 2 1 #> 3 c 3 NA #> 4 d 4 4 demo_sql$child #> # Source: table [3 x 3] #> # Database: sqlite 3.44.2 [] #> value pk fk #> #> 1 a 1 1 #> 2 b 2 1 #> 3 c 3 NA dm_insert_out <- demo_sql %>% dm_rows_insert(dm_insert_in, in_place = TRUE) demo_sql$child #> # Source: table [4 x 3] #> # Database: sqlite 3.44.2 [] #> value pk fk #> #> 1 a 1 1 #> 2 b 2 1 #> 3 c 3 NA #> 4 d 4 4"},{"path":"https://dm.cynkra.com/articles/howto-dm-rows.html","id":"update","dir":"Articles","previous_headings":"","what":"dm_rows_update()","title":"Insert, update, or remove rows in a database","text":"dm_rows_update() works dm_rows_insert(). create dm object copy source destination. change foreign key row child containing “b” point correct row parent. persist changes.","code":"updated_child <- tibble(value = \"b\", pk = 2, fk = 2) updated_child #> # A tibble: 1 × 3 #> value pk fk #> #> 1 b 2 2 dm_update_in <- dm(child = updated_child) %>% copy_dm_to(sqlite_db, ., temporary = TRUE) dm_update_out <- demo_sql %>% dm_rows_update(dm_update_in, in_place = TRUE) demo_sql$child #> # Source: table [4 x 3] #> # Database: sqlite 3.44.2 [] #> value pk fk #> #> 1 a 1 1 #> 2 b 2 2 #> 3 c 3 NA #> 4 d 4 4"},{"path":"https://dm.cynkra.com/articles/howto-dm-rows.html","id":"delete","dir":"Articles","previous_headings":"","what":"dm_rows_delete()","title":"Insert, update, or remove rows in a database","text":"dm_rows_delete() currently implemented work RDBMS, shift demonstrations back local R environment. ’ve made changes demo_sql, use collect() copy current tables SQLite. Note persistence concern local dm objects. Every operation returns new dm object containing changes made.","code":"local_dm <- collect(demo_sql) local_dm$parent #> # A tibble: 4 × 2 #> value pk #> #> 1 A 1 #> 2 B 2 #> 3 C 3 #> 4 D 4 local_dm$child #> # A tibble: 4 × 3 #> value pk fk #> #> 1 a 1 1 #> 2 b 2 2 #> 3 c 3 NA #> 4 d 4 4 dm_deleted <- dm(parent = new_parent, child = new_child) %>% dm_rows_delete(local_dm, .) #> Result is returned as a dm object with lazy tables. Use `in_place = FALSE` to mute this message, or `in_place = TRUE` to write to the underlying tables. #> Ignoring extra `y` columns: value, fk #> Ignoring extra `y` columns: value dm_deleted$child #> # A tibble: 3 × 3 #> value pk fk #> #> 1 a 1 1 #> 2 b 2 2 #> 3 c 3 NA"},{"path":"https://dm.cynkra.com/articles/howto-dm-rows.html","id":"patch","dir":"Articles","previous_headings":"","what":"dm_rows_patch()","title":"Insert, update, or remove rows in a database","text":"dm_rows_patch() updates missing values existing records. use fix missing foreign key child table.","code":"patched_child <- tibble(value = \"c\", pk = 3, fk = 3) patched_child #> # A tibble: 1 × 3 #> value pk fk #> #> 1 c 3 3 dm_patched <- dm(child = patched_child) %>% dm_rows_patch(dm_deleted, .) #> Result is returned as a dm object with lazy tables. Use `in_place = FALSE` #> to mute this message, or `in_place = TRUE` to write to the underlying #> tables. dm_patched$child #> # A tibble: 3 × 3 #> value pk fk #> #> 1 a 1 1 #> 2 b 2 2 #> 3 c 3 3"},{"path":"https://dm.cynkra.com/articles/howto-dm-rows.html","id":"upsert","dir":"Articles","previous_headings":"","what":"dm_rows_upsert()","title":"Insert, update, or remove rows in a database","text":"dm_rows_upsert() updates rows supplied values exist inserts supplied values new rows don’t. example add letter “D” back dm, update foreign key “b”. done, forget disconnect:","code":"upserted_parent <- tibble(value = \"D\", pk = 4) upserted_parent #> # A tibble: 1 × 2 #> value pk #> #> 1 D 4 upserted_child <- tibble(value = c(\"b\", \"d\"), pk = c(2, 4), fk = c(3, 4)) upserted_child #> # A tibble: 2 × 3 #> value pk fk #> #> 1 b 2 3 #> 2 d 4 4 dm_upserted <- dm(parent = upserted_parent, child = upserted_child) %>% dm_rows_upsert(dm_patched, .) #> Result is returned as a dm object with lazy tables. Use `in_place = FALSE` #> to mute this message, or `in_place = TRUE` to write to the underlying #> tables. dm_upserted$parent #> # A tibble: 4 × 2 #> value pk #> #> 1 A 1 #> 2 B 2 #> 3 C 3 #> 4 D 4 dm_upserted$child #> # A tibble: 4 × 3 #> value pk fk #> #> 1 a 1 1 #> 2 b 2 3 #> 3 c 3 3 #> 4 d 4 4 DBI::dbDisconnect(sqlite_db)"},{"path":"https://dm.cynkra.com/articles/howto-dm-rows.html","id":"conclusion","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Insert, update, or remove rows in a database","text":"dm_rows_...() methods give row-level granularity modifications need make relational model. Using common in_place argument, can construct verify modifications committing . limitations, mentioned tutorial, addressed future updates {dm}.","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-rows.html","id":"further-reading","dir":"Articles","previous_headings":"","what":"Further Reading","title":"Insert, update, or remove rows in a database","text":"tutorial answered questions, opened others, resources might assistance. data RDBMS? vignette(\"howto-dm-db\") offers detailed look working existing relational data model. data data frames, may want read vignette(\"howto-dm-df\") next. like know relational data models order get dm, check vignette(\"howto-dm-theory\"). ’re familiar relational data models want know work dm, vignette(\"tech-dm-join\"), vignette(\"tech-dm-filter\"), vignette(\"tech-dm-zoom\") good next step.","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-theory.html","id":"data-frames-and-tables","dir":"Articles","previous_headings":"","what":"1. Data Frames and Tables","title":"Introduction to relational data models","text":"data frame fundamental data structure R. Columns represent variables, rows represent observations. technical terms, data frame list variables identical length unique row names. imagine visually, result typical table structure. working data spreadsheets convenient users popular {dplyr} package data wrangling mainly rely data frames. downside data frames flat file systems like spreadsheets can result bloated tables hold many repetitive values. worst case, data frame can contain multiple columns single value different row. calls better data organization utilizing resemblance data frames database tables, also consist columns rows. elements just named differently: Additionally, number rows columns data frame , respectively, analogous cardinality degree table. Relational databases, unlike data frames, keep data one large table instead split multiple smaller tables. separation sub-tables several advantages: information stored , avoiding redundancy conserving memory information needs updated one place, improving consistency avoiding errors may result updating (forgetting update) value multiple locations information organized topic segmented smaller tables easier handle reasons separation data helps data quality, explain popularity relational databases production-level data management. downside approach harder merge together information different data sources identify entities refer object, common task modeling plotting data. Thus, take full advantage relational database approach, associated data model needed overcome challenges arise working multiple tables. Let’s illustrate challenge data nycflights13 dataset contains detailed information 336,776 flights departed New York City 2013. information stored five tables. Details like full name airport available immediately; can obtained joining merging constituent tables, can result long inflated pipe chains full left_join(), anti_join() forms data merging. classical {dplyr} notation, need four left_join() calls gradually merge flights table airlines, planes, airports, weather tables create one wide data frame. {dm} offers elegant shorter way combine tables augmenting {dplyr}/{dbplyr} workflows. possible best worlds: manage data {dm} linked tables, , necessary, flatten multiple tables single data frame analysis {dplyr}. next step create data model based multiple tables:","code":"library(dm) library(nycflights13) nycflights13_df <- flights %>% left_join(airlines, by = \"carrier\") %>% left_join(planes, by = \"tailnum\") %>% left_join(airports, by = c(\"origin\" = \"faa\")) %>% left_join(weather, by = c(\"origin\", \"time_hour\")) nycflights13_df #> # A tibble: 336,776 × 48 #> year.x month.x day.x dep_time sched_dep_time dep_delay arr_time #> #> 1 2013 1 1 517 515 2 830 #> 2 2013 1 1 533 529 4 850 #> 3 2013 1 1 542 540 2 923 #> 4 2013 1 1 544 545 -1 1004 #> 5 2013 1 1 554 600 -6 812 #> 6 2013 1 1 554 558 -4 740 #> 7 2013 1 1 555 600 -5 913 #> 8 2013 1 1 557 600 -3 709 #> 9 2013 1 1 557 600 -3 838 #> 10 2013 1 1 558 600 -2 753 #> # ℹ 336,766 more rows #> # ℹ 41 more variables: sched_arr_time , arr_delay , #> # carrier , flight , tailnum , origin , dest , #> # air_time , distance , hour.x , minute , #> # time_hour , name.x , year.y , type , #> # manufacturer , model , engines , seats , #> # speed , engine , name.y , lat , lon , …"},{"path":"https://dm.cynkra.com/articles/howto-dm-theory.html","id":"model","dir":"Articles","previous_headings":"","what":"2. Data Model","title":"Introduction to relational data models","text":"data model shows structure multiple tables linked together. nycflights13 relations can transferred following graphical representation: flights table linked four tables: airlines, planes, weather, airports. using directed arrows, visualization shows explicitly connection different columns (called attributes relational data sphere). example: column carrier flights can joined column carrier airlines table. links tables established primary keys foreign keys. aside, can also now see avoiding redundant data building data models multiple tables can save memory compared storing data single data frame: Reading: {dm} methods visualizing data models.","code":"dm <- dm_nycflights13(cycle = TRUE) dm %>% dm_draw() object.size(dm) #> 476256 bytes object.size(nycflights13_df) #> 108020824 bytes"},{"path":"https://dm.cynkra.com/articles/howto-dm-theory.html","id":"pk","dir":"Articles","previous_headings":"","what":"3. Primary Keys","title":"Introduction to relational data models","text":"relational data model, table one several columns uniquely identify row. columns define primary key (abbreviated “pk”). key consists single column, called simple key. key consisting one column called compound key. Example: airlines table nycflights13 column carrier primary key, simple key. weather table combination origin time_hour primary key, compound key. can get primary keys dm calling dm_get_all_pks(): dm_enum_pk_candidates() checks suitability column serve simple primary key: Reading: {dm} package offers several functions dealing primary keys.","code":"dm %>% dm_get_all_pks() #> # A tibble: 4 × 3 #> table pk_col autoincrement #> #> 1 airlines carrier FALSE #> 2 airports faa FALSE #> 3 planes tailnum FALSE #> 4 weather origin, time_hour FALSE dm %>% dm_enum_pk_candidates(airports) #> # A tibble: 8 × 3 #> columns candidate why #> #> 1 faa TRUE \"\" #> 2 name TRUE \"\" #> 3 lat TRUE \"\" #> 4 lon TRUE \"\" #> 5 alt FALSE \"has duplicate values: 30 (4), 13 (3), 9 (2), 19 (2), … #> 6 tz FALSE \"has duplicate values: -5 (48), -6 (21), -8 (12), -7 (… #> 7 dst FALSE \"has duplicate values: A (84), N (2)\" #> 8 tzone FALSE \"has duplicate values: America/New_York (48), America/…"},{"path":"https://dm.cynkra.com/articles/howto-dm-theory.html","id":"fk","dir":"Articles","previous_headings":"","what":"4. Foreign Keys","title":"Introduction to relational data models","text":"counterpart primary key one table foreign key another table. order join two tables, primary key first table needs referenced second table. column columns called foreign key (abbreviated “fk”). example, want link airlines table flights table, primary key airlines needs match foreign key flights. condition satisfied column carrier present primary key airlines table well foreign key flights table. case compound keys, origin time_hour columns (form primary key weather table) also present flights table. can find foreign key candidates simple keys function dm_enum_fk_candidates(); marked TRUE candidate column. Additionally, can also extract summary foreign key relations present dm object using dm_get_all_fks(): Reading: {dm} functions working foreign keys.","code":"dm %>% dm_enum_fk_candidates(flights, airlines) #> # A tibble: 19 × 3 #> columns candidate why #> #> 1 carrier TRUE \"\" #> 2 year FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 3 month FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 4 day FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 5 dep_time FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 6 sched_dep_time FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 7 dep_delay FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 8 arr_time FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 9 sched_arr_time FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 10 arr_delay FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 11 flight FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 12 tailnum FALSE \"values of `flights$tailnum` not in `airlines$… #> 13 origin FALSE \"values of `flights$origin` not in `airlines$c… #> 14 dest FALSE \"values of `flights$dest` not in `airlines$car… #> 15 air_time FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 16 distance FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 17 hour FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 18 minute FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… #> 19 time_hour FALSE \"\\u001b[1m\\u001b[22mCan't join `x$value1` with… dm %>% dm_get_all_fks() #> # A tibble: 5 × 5 #> child_table child_fk_cols parent_table parent_key_cols on_delete #> #> 1 flights carrier airlines carrier no_action #> 2 flights origin airports faa no_action #> 3 flights dest airports faa no_action #> 4 flights tailnum planes tailnum no_action #> 5 flights origin, time_hour weather origin, time_hour no_action"},{"path":"https://dm.cynkra.com/articles/howto-dm-theory.html","id":"referential-integrity","dir":"Articles","previous_headings":"","what":"5. Referential Integrity","title":"Introduction to relational data models","text":"data set referential integrity relations tables valid. , every foreign key holds primary key present parent table. foreign key contains reference corresponding row parent table available, row orphan row database longer referential integrity. {dm} allows checking referential integrity dm_examine_constraints() function. following conditions checked: primary key values must unique missing (.e., NAs allowed). foreign key value must corresponding primary key value. example data model, substantial share flights, detailed information corresponding airplane available: Establishing referential integrity important providing clean data analysis downstream users. See vignette(\"howto-dm-rows\") information adding, deleting, updating individual rows, vignette(\"tech-dm-zoom\") operations data data model.","code":"dm %>% dm_examine_constraints() #> ! Unsatisfied constraints: #> • Table `flights`: foreign key `dest` into table `airports`: values of `flights$dest` not in `airports$faa`: SJU (30), BQN (6), STT (4), PSE (2) #> • Table `flights`: foreign key `tailnum` into table `planes`: values of `flights$tailnum` not in `planes$tailnum`: N725MQ (6), N537MQ (5), N722MQ (5), N730MQ (5), N736MQ (5), …"},{"path":"https://dm.cynkra.com/articles/howto-dm-theory.html","id":"normalization","dir":"Articles","previous_headings":"","what":"6. Normalization","title":"Introduction to relational data models","text":"Normalization technical term describes central design principle relational data model: splitting data multiple tables. normalized data schema consists several relations (tables) linked attributes (columns). relations can joined together means primary foreign keys. main goal normalization keep data organization clean simple possible avoiding redundant data entries. example, want change name one airport nycflights13 dataset, need update single data value. principle sometimes called single point truth. Another way demonstrate normalization splitting table two parts. Let’s look planes table, consists 3322 individual tail numbers corresponding information specific airplane, like year manufactured average cruising speed. function decompose_table() extracts two new tables creates new key model_id, links tables. results parent_table child_table differ massively number rows: child_table contains 3322 unique tailnum rows therefore consists 3322 rows, just like original planes table, parent_table shrunk just 147 rows, enough store relevant combinations avoid storing redundant information. Reading: See Simple English Wikipedia article database normalization details.","code":"# Update in one single location... airlines[airlines$carrier == \"UA\", \"name\"] <- \"United broke my guitar\" airlines %>% filter(carrier == \"UA\") #> # A tibble: 1 × 2 #> carrier name #> #> 1 UA United broke my guitar # ...propagates to all related records flights %>% left_join(airlines) %>% select(flight, name) #> Joining with `by = join_by(carrier)` #> # A tibble: 336,776 × 2 #> flight name #> #> 1 1545 United broke my guitar #> 2 1714 United broke my guitar #> 3 1141 American Airlines Inc. #> 4 725 JetBlue Airways #> 5 461 Delta Air Lines Inc. #> 6 1696 United broke my guitar #> 7 507 JetBlue Airways #> 8 5708 ExpressJet Airlines Inc. #> 9 79 JetBlue Airways #> 10 301 American Airlines Inc. #> # ℹ 336,766 more rows planes %>% decompose_table(model_id, model, manufacturer, type, engines, seats, speed) #> $child_table #> # A tibble: 3,322 × 4 #> tailnum year engine model_id #> #> 1 N10156 2004 Turbo-fan 120 #> 2 N102UW 1998 Turbo-fan 93 #> 3 N103US 1999 Turbo-fan 93 #> 4 N104UW 1999 Turbo-fan 93 #> 5 N10575 2002 Turbo-fan 119 #> 6 N105UW 1999 Turbo-fan 93 #> 7 N107US 1999 Turbo-fan 93 #> 8 N108UW 1999 Turbo-fan 93 #> 9 N109UW 1999 Turbo-fan 93 #> 10 N110UW 1999 Turbo-fan 93 #> # ℹ 3,312 more rows #> #> $parent_table #> # A tibble: 147 × 7 #> model_id model manufacturer type engines seats speed #> #> 1 120 EMB-145XR EMBRAER Fixed wing mu… 2 55 NA #> 2 93 A320-214 AIRBUS INDUSTRIE Fixed wing mu… 2 182 NA #> 3 119 EMB-145LR EMBRAER Fixed wing mu… 2 55 NA #> 4 39 737-824 BOEING Fixed wing mu… 2 149 NA #> 5 68 767-332 BOEING Fixed wing mu… 2 330 NA #> 6 52 757-224 BOEING Fixed wing mu… 2 178 NA #> 7 94 A320-214 AIRBUS Fixed wing mu… 2 182 NA #> 8 112 CL-600-2D24 BOMBARDIER INC Fixed wing mu… 2 95 NA #> 9 30 737-724 BOEING Fixed wing mu… 2 149 NA #> 10 27 737-524 BOEING Fixed wing mu… 2 149 NA #> # ℹ 137 more rows"},{"path":"https://dm.cynkra.com/articles/howto-dm-theory.html","id":"relational-databases","dir":"Articles","previous_headings":"","what":"7. Relational Databases","title":"Introduction to relational data models","text":"{dm} built upon relational data models database . Databases systems data management many constructed relational databases (e.g., SQLite, MySQL, MSSQL, Postgres, etc.). can guess names databases, SQL, short Structured Querying Language, plays important role: invented purpose querying relational databases. production, data stored relational database {dm} used work data. Therefore, {dm} can copy data databases, works transparently -memory data relational database systems. example, let’s create local SQLite database copy dm object : opposite direction, dm can also populated data database. Unfortunately, keys currently can learned Microsoft SQL Server Postgres, SQLite. Therefore, dm contains tables keys: Remember terminate database connection:","code":"con_sqlite <- DBI::dbConnect(RSQLite::SQLite()) con_sqlite #> #> Path: #> Extensions: TRUE DBI::dbListTables(con_sqlite) #> character(0) copy_dm_to(con_sqlite, dm) DBI::dbListTables(con_sqlite) #> [1] \"airlines_1_20200828_071303_12345\" \"airports_1_20200828_071303_12345\" #> [3] \"flights_1_20200828_071303_12345\" \"planes_1_20200828_071303_12345\" #> [5] \"weather_1_20200828_071303_12345\" dm_from_con(con_sqlite) #> ! unable to fetch autoincrement metadata for src 'src_SQLiteConnection' #> Keys could not be queried. #> ── Table source ─────────────────────────────────────────────────────────── #> src: sqlite 3.44.2 [] #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines_1_20200828_071303_12345`, `airports_1_20200828_071303_12345`, `flights_1_20200828_071303_12345`, `planes_1_20200828_071303_12345`, `weather_1_20200828_071303_12345` #> Columns: 53 #> Primary keys: 0 #> Foreign keys: 0 DBI::dbDisconnect(con_sqlite)"},{"path":"https://dm.cynkra.com/articles/howto-dm-theory.html","id":"conclusion","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Introduction to relational data models","text":"article, learned fundamental concepts data structures associated relational database management system (RDBMS).","code":""},{"path":"https://dm.cynkra.com/articles/howto-dm-theory.html","id":"further-reading","dir":"Articles","previous_headings":"","what":"Further reading","title":"Introduction to relational data models","text":"vignette(\"howto-dm-db\") – article covers accessing working RDBMSs within R session, including manipulating data, filling missing relationships tables, getting data RDBMS model, deploying data model RDBMS. vignette(\"howto-dm-df\") – data local data frames? article covers creating data model local data frames, including building relationships data model, verifying model, leveraging power dplyr operate data model.","code":""},{"path":[]},{"path":"https://dm.cynkra.com/articles/tech-dm-cdm.html","id":"replace-cdm-with-dm","dir":"Articles","previous_headings":"Changes required when updating from version 0.0.5 to 0.0.6","what":"Replace cdm with dm","title":"Migration guide: 'cdm' -> 'dm'","text":"update prevalent prefix cdm discarded favor dm. old prefix still job, warning message issued time function beginning cdm used, informing function soft-deprecated suggesting use newer version. script based older {dm} version, still work newer version, albeit complaining time outdated function used. can repaired : either going script step step, testing output line code use new function names provided generated warnings update function calls. just replacing occurrences cdm dm script. can e.g. done RStudio using “Find” terminal using sed -e 's/cdm/dm/g' path--file Windows sed -'' -e 's/cdm/dm/g' path--file Mac. script errors step, need check exactly error happens manually repair damage.","code":""},{"path":"https://dm.cynkra.com/articles/tech-dm-cdm.html","id":"be-careful-with-methods-for-dm-tbl","dir":"Articles","previous_headings":"Changes required when updating from version 0.0.5 to 0.0.6","what":"Be careful with methods for dm: tbl, [[, $","title":"Migration guide: 'cdm' -> 'dm'","text":"Furthermore, need pay attention used one tbl.dm(), [[.dm(), $.dm(). update implementation methods changed well, don’t get convenient warning messages. change , update, mentioned methods return table “filtering” just contain rows values relate via foreign key relations tables filtered earlier. update just table returned. want retain former behavior, need replace methods function dm_apply_filters_to_tbl(), made available update. methods course avoided general, filters set anyway result change update. short example different cases: Formerly access “filtered” tables using following syntax: update result achieved type function call:","code":"library(dm) flights_dm <- dm_nycflights13() tbl(flights_dm, \"airports\") #> Warning: `tbl.dm()` was deprecated in dm 0.2.0. #> ℹ Use `dm[[table_name]]` instead to access a specific table. #> This warning is displayed once every 8 hours. #> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was #> generated. #> # A tibble: 86 × 8 #> faa name lat lon alt tz dst tzone #> #> 1 ALB Albany Intl 42.7 -73.8 285 -5 A Amer… #> 2 ATL Hartsfield Jackson Atlanta I… 33.6 -84.4 1026 -5 A Amer… #> 3 AUS Austin Bergstrom Intl 30.2 -97.7 542 -6 A Amer… #> 4 BDL Bradley Intl 41.9 -72.7 173 -5 A Amer… #> 5 BHM Birmingham Intl 33.6 -86.8 644 -6 A Amer… #> 6 BNA Nashville Intl 36.1 -86.7 599 -6 A Amer… #> 7 BOS General Edward Lawrence Loga… 42.4 -71.0 19 -5 A Amer… #> 8 BTV Burlington Intl 44.5 -73.2 335 -5 A Amer… #> 9 BUF Buffalo Niagara Intl 42.9 -78.7 724 -5 A Amer… #> 10 BUR Bob Hope 34.2 -118. 778 -8 A Amer… #> # ℹ 76 more rows flights_dm$planes #> # A tibble: 945 × 9 #> tailnum year type manufacturer model engines seats speed engine #> #> 1 N10156 2004 Fixed wing … EMBRAER EMB-… 2 55 NA Turbo… #> 2 N104UW 1999 Fixed wing … AIRBUS INDU… A320… 2 182 NA Turbo… #> 3 N10575 2002 Fixed wing … EMBRAER EMB-… 2 55 NA Turbo… #> 4 N105UW 1999 Fixed wing … AIRBUS INDU… A320… 2 182 NA Turbo… #> 5 N110UW 1999 Fixed wing … AIRBUS INDU… A320… 2 182 NA Turbo… #> 6 N11106 2002 Fixed wing … EMBRAER EMB-… 2 55 NA Turbo… #> 7 N11107 2002 Fixed wing … EMBRAER EMB-… 2 55 NA Turbo… #> 8 N11109 2002 Fixed wing … EMBRAER EMB-… 2 55 NA Turbo… #> 9 N11121 2003 Fixed wing … EMBRAER EMB-… 2 55 NA Turbo… #> 10 N11137 2003 Fixed wing … EMBRAER EMB-… 2 55 NA Turbo… #> # ℹ 935 more rows flights_dm[[\"weather\"]] #> # A tibble: 144 × 15 #> origin year month day hour temp dewp humid wind_dir wind_speed #> #> 1 EWR 2013 1 10 0 41 32 70.1 230 8.06 #> 2 EWR 2013 1 10 1 39.0 30.0 69.9 210 9.21 #> 3 EWR 2013 1 10 2 39.0 28.9 66.8 230 6.90 #> 4 EWR 2013 1 10 3 39.9 27.0 59.5 270 5.75 #> 5 EWR 2013 1 10 4 41 26.1 55.0 320 6.90 #> 6 EWR 2013 1 10 5 41 26.1 55.0 300 12.7 #> 7 EWR 2013 1 10 6 39.9 25.0 54.8 280 6.90 #> 8 EWR 2013 1 10 7 41 25.0 52.6 330 6.90 #> 9 EWR 2013 1 10 8 43.0 25.0 48.7 330 8.06 #> 10 EWR 2013 1 10 9 45.0 23 41.6 320 17.3 #> # ℹ 134 more rows #> # ℹ 5 more variables: wind_gust , precip , pressure , #> # visib , time_hour dm_apply_filters_to_tbl(flights_dm, airlines) #> Warning: `dm_apply_filters_to_tbl()` was deprecated in dm 1.0.0. #> ℹ Access tables directly after `dm_filter()`. #> This warning is displayed once every 8 hours. #> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was #> generated. #> # A tibble: 15 × 2 #> carrier name #> #> 1 9E Endeavor Air Inc. #> 2 AA American Airlines Inc. #> 3 AS Alaska Airlines Inc. #> 4 B6 JetBlue Airways #> 5 DL Delta Air Lines Inc. #> 6 EV ExpressJet Airlines Inc. #> 7 F9 Frontier Airlines Inc. #> 8 FL AirTran Airways Corporation #> 9 HA Hawaiian Airlines Inc. #> 10 MQ Envoy Air #> 11 UA United Air Lines Inc. #> 12 US US Airways Inc. #> 13 VX Virgin America #> 14 WN Southwest Airlines Co. #> 15 YV Mesa Airlines Inc."},{"path":"https://dm.cynkra.com/articles/tech-dm-class.html","id":"class-dm","dir":"Articles","previous_headings":"","what":"Class dm","title":"Class dm and basic operations","text":"dm class consists collection tables metadata tables, names tables names columns tables primary foreign keys tables link tables together data (either data frames references database tables) tables dm must obtained data source; csv files spreadsheets need imported data frames R.","code":""},{"path":"https://dm.cynkra.com/articles/tech-dm-class.html","id":"ex_dm","dir":"Articles","previous_headings":"","what":"Examples of dm objects","title":"Class dm and basic operations","text":"currently three options available creating dm object. relevant functions creating dm objects : dm() as_dm() new_dm() dm_from_con() illustrate options, now create dm several different ways. can use tables well-known {nycflights13} package.","code":""},{"path":"https://dm.cynkra.com/articles/tech-dm-class.html","id":"pass-the-tables-directly","dir":"Articles","previous_headings":"Examples of dm objects","what":"Pass the tables directly","title":"Class dm and basic operations","text":"Create dm object directly providing data frames dm():","code":"library(nycflights13) library(dm) dm(airlines, airports, flights, planes, weather) #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `weather` #> Columns: 53 #> Primary keys: 0 #> Foreign keys: 0"},{"path":"https://dm.cynkra.com/articles/tech-dm-class.html","id":"start-with-an-empty-dm","dir":"Articles","previous_headings":"Examples of dm objects","what":"Start with an empty dm","title":"Class dm and basic operations","text":"Start empty dm object created dm() new_dm(), add tables object:","code":"library(nycflights13) library(dm) empty_dm <- dm() empty_dm #> dm() dm(empty_dm, airlines, airports, flights, planes, weather) #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `weather` #> Columns: 53 #> Primary keys: 0 #> Foreign keys: 0"},{"path":"https://dm.cynkra.com/articles/tech-dm-class.html","id":"coerce-a-list-of-tables","dir":"Articles","previous_headings":"Examples of dm objects","what":"Coerce a list of tables","title":"Class dm and basic operations","text":"Turn named list tables dm as_dm():","code":"as_dm(list( airlines = airlines, airports = airports, flights = flights, planes = planes, weather = weather )) #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `weather` #> Columns: 53 #> Primary keys: 0 #> Foreign keys: 0"},{"path":"https://dm.cynkra.com/articles/tech-dm-class.html","id":"turn-tables-from-a-src-into-a-dm","dir":"Articles","previous_headings":"Examples of dm objects","what":"Turn tables from a src into a dm","title":"Class dm and basic operations","text":"Squeeze (subset ) tables belonging src object dm using dm_from_con(): function dm_from_con(con, table_names = NULL) includes available tables source dm object. means can use , example, postgres database access via DBI::dbConnect(RPostgres::Postgres()) (appropriate arguments dbname, host, port, …), produce dm object tables database.","code":"sqlite_con <- dbplyr::nycflights13_sqlite() flights_dm <- dm_from_con(sqlite_con) flights_dm #> ── Table source ─────────────────────────────────────────────────────────── #> src: sqlite 3.44.2 [/tmp/RtmpqSPcvW/nycflights13.sqlite] #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `sqlite_stat1`, … (7 total) #> Columns: 62 #> Primary keys: 0 #> Foreign keys: 0"},{"path":"https://dm.cynkra.com/articles/tech-dm-class.html","id":"low-level-construction","dir":"Articles","previous_headings":"Examples of dm objects","what":"Low-level construction","title":"Class dm and basic operations","text":"Another way creating dm object calling new_dm() list tbl objects: constructor optimized speed perform integrity checks. Use caution, validate using dm_validate() necessary.","code":"base_dm <- new_dm(list( airlines = airlines, airports = airports, flights = flights, planes = planes, weather = weather )) base_dm #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `weather` #> Columns: 53 #> Primary keys: 0 #> Foreign keys: 0 dm_validate(base_dm)"},{"path":"https://dm.cynkra.com/articles/tech-dm-class.html","id":"access-tables","dir":"Articles","previous_headings":"","what":"Access tables","title":"Class dm and basic operations","text":"can get list tables dm_get_tables() src object dm_get_con(). order pull specific table dm, use: can use {dm}-functions manage primary keys tables dm object?","code":"flights_dm[[\"airports\"]] #> # Source: table [?? x 8] #> # Database: sqlite 3.44.2 [/tmp/RtmpqSPcvW/nycflights13.sqlite] #> faa name lat lon alt tz dst tzone #> #> 1 04G Lansdowne Airport 41.1 -80.6 1044 -5 A Amer… #> 2 06A Moton Field Municipal Airport 32.5 -85.7 264 -6 A Amer… #> 3 06C Schaumburg Regional 42.0 -88.1 801 -6 A Amer… #> 4 06N Randall Airport 41.4 -74.4 523 -5 A Amer… #> 5 09J Jekyll Island Airport 31.1 -81.4 11 -5 A Amer… #> 6 0A9 Elizabethton Municipal Airpo… 36.4 -82.2 1593 -5 A Amer… #> 7 0G6 Williams County Airport 41.5 -84.5 730 -5 A Amer… #> 8 0G7 Finger Lakes Regional Airport 42.9 -76.8 492 -5 A Amer… #> 9 0P2 Shoestring Aviation Airfield 39.8 -76.6 1000 -5 U Amer… #> 10 0S9 Jefferson County Intl 48.1 -123. 108 -8 A Amer… #> # ℹ more rows"},{"path":"https://dm.cynkra.com/articles/tech-dm-class.html","id":"pk","dir":"Articles","previous_headings":"","what":"Primary keys of dm objects","title":"Class dm and basic operations","text":"useful functions managing primary key settings : dm_add_pk() dm_get_all_pks() dm_rm_pk() dm_enum_pk_candidates() created dm object according examples “Examples dm objects”, object yet primary keys set. let’s add one. use nycflights13 tables, .e. flights_dm . dm now primary key: get overview tables primary keys, use dm_get_all_pks(): Remove primary key: still need get know data better, already available form dm object, can use dm_enum_pk_candidates() function order get information columns table unique keys: flights table one-column primary key candidates: dm_add_pk() check argument. set TRUE, function checks column table given user unique. performance reasons, default check = FALSE. See also [dm_examine_constraints()] checking constraints dm.","code":"dm_has_pk(flights_dm, airports) #> [1] FALSE flights_dm_with_key <- dm_add_pk(flights_dm, airports, faa) flights_dm_with_key #> ── Table source ─────────────────────────────────────────────────────────── #> src: sqlite 3.44.2 [/tmp/RtmpqSPcvW/nycflights13.sqlite] #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `sqlite_stat1`, … (7 total) #> Columns: 62 #> Primary keys: 1 #> Foreign keys: 0 dm_has_pk(flights_dm_with_key, airports) #> [1] TRUE dm_get_all_pks(flights_dm_with_key) #> # A tibble: 1 × 3 #> table pk_col autoincrement #> #> 1 airports faa FALSE dm_rm_pk(flights_dm_with_key, airports) %>% dm_has_pk(airports) #> [1] FALSE dm_enum_pk_candidates(flights_dm_with_key, airports) #> # A tibble: 8 × 3 #> columns candidate why #> #> 1 faa TRUE \"\" #> 2 lon TRUE \"\" #> 3 name FALSE \"has duplicate values: Municipal Airport (5), All Airp… #> 4 lat FALSE \"has duplicate values: 38.88944 (2), 40.63975 (2)\" #> 5 alt FALSE \"has duplicate values: 0 (51), 13 (13), 14 (12), 15 (1… #> 6 tz FALSE \"has duplicate values: -5 (521), -6 (342), -9 (240), -… #> 7 dst FALSE \"has duplicate values: A (1388), U (47), N (23)\" #> 8 tzone FALSE \"has duplicate values: America/New_York (519), America… dm_enum_pk_candidates(flights_dm_with_key, flights) %>% dplyr::count(candidate) #> # A tibble: 1 × 2 #> candidate n #> #> 1 FALSE 19 try( dm_add_pk(flights_dm, airports, tzone, check = TRUE) ) #> Error in abort_not_unique_key(x_label, orig_names) : #> (`tzone`) not a unique key of `airports`."},{"path":"https://dm.cynkra.com/articles/tech-dm-class.html","id":"foreign-keys","dir":"Articles","previous_headings":"","what":"Foreign keys","title":"Class dm and basic operations","text":"Useful functions managing foreign key relations include: dm_add_fk() dm_get_all_fks() dm_rm_fk() dm_enum_fk_candidates() Now gets (even ) interesting: want define relations different tables. dm_add_fk() function can define column table points another table’s column. done choosing foreign key one table point primary key another table. primary key referred table must set dm_add_pk(). dm_add_fk() find primary key column referenced table make indicated column child table point . throw error: Let’s create dm object foreign key relation work later : tried add another foreign key relation flights airports object? Column dest might work, since also contains airport codes: Checks opt-executed check = TRUE. can still add foreign key default check = FALSE. See also dm_examine_constraints() checking constraints dm. Get overview foreign key relations withdm_get_all_fks(): Remove foreign key relations dm_rm_fk() (parameter columns = NULL means relations removed, message): Since primary keys defined dm object, usually need provide referenced column name ref_table. Another function getting know data better (cf. dm_enum_pk_candidates() “Primary keys dm objects”) dm_enum_fk_candidates(). Use get overview foreign key candidates point one table another:","code":"flights_dm_with_key %>% dm_add_fk(flights, origin, airports) #> ── Table source ─────────────────────────────────────────────────────────── #> src: sqlite 3.44.2 [/tmp/RtmpqSPcvW/nycflights13.sqlite] #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `sqlite_stat1`, … (7 total) #> Columns: 62 #> Primary keys: 1 #> Foreign keys: 1 try( flights_dm %>% dm_add_fk(flights, origin, airports) ) #> Error in abort_ref_tbl_has_no_pk(ref_table_name) : #> ref_table `airports` needs a primary key first. Use `dm_enum_pk_candidates()` to find appropriate columns and `dm_add_pk()` to define a primary key. flights_dm_with_fk <- dm_add_fk(flights_dm_with_key, flights, origin, airports) try( flights_dm_with_fk %>% dm_add_fk(flights, dest, airports, check = TRUE) ) #> Error in abort_not_subset_of(table_name, col_name, ref_table_name, ref_col_name) : #> Column (`dest`) of table `flights` contains values (see examples above) that are not present in column (`faa`) of table `airports`. dm_get_all_fks(dm_nycflights13(cycle = TRUE)) #> # A tibble: 5 × 5 #> child_table child_fk_cols parent_table parent_key_cols on_delete #> #> 1 flights carrier airlines carrier no_action #> 2 flights origin airports faa no_action #> 3 flights dest airports faa no_action #> 4 flights tailnum planes tailnum no_action #> 5 flights origin, time_hour weather origin, time_hour no_action try( flights_dm_with_fk %>% dm_rm_fk(table = flights, column = dest, ref_table = airports) %>% dm_get_all_fks(c(flights, airports)) ) #> Error in abort_is_not_fkc() : No foreign keys to remove. flights_dm_with_fk %>% dm_rm_fk(flights, origin, airports) %>% dm_get_all_fks(c(flights, airports)) #> # A tibble: 0 × 5 #> # ℹ 5 variables: child_table , child_fk_cols , #> # parent_table , parent_key_cols , on_delete flights_dm_with_fk %>% dm_rm_fk(flights, columns = NULL, airports) %>% dm_get_all_fks(c(flights, airports)) #> Removing foreign keys: %>% #> dm_rm_fk(flights, origin, airports) #> # A tibble: 0 × 5 #> # ℹ 5 variables: child_table , child_fk_cols , #> # parent_table , parent_key_cols , on_delete dm_enum_fk_candidates(flights_dm_with_key, weather, airports) #> # A tibble: 15 × 3 #> columns candidate why #> #> 1 origin TRUE \"\" #> 2 year FALSE \"values of `weather$year` not in `airports$faa`: 2… #> 3 month FALSE \"values of `weather$month` not in `airports$faa`: … #> 4 day FALSE \"values of `weather$day` not in `airports$faa`: 3 … #> 5 hour FALSE \"values of `weather$hour` not in `airports$faa`: 1… #> 6 temp FALSE \"values of `weather$temp` not in `airports$faa`: 3… #> 7 dewp FALSE \"values of `weather$dewp` not in `airports$faa`: 2… #> 8 humid FALSE \"values of `weather$humid` not in `airports$faa`: … #> 9 wind_dir FALSE \"values of `weather$wind_dir` not in `airports$faa… #> 10 wind_speed FALSE \"values of `weather$wind_speed` not in `airports$f… #> 11 wind_gust FALSE \"values of `weather$wind_gust` not in `airports$fa… #> 12 precip FALSE \"values of `weather$precip` not in `airports$faa`:… #> 13 pressure FALSE \"values of `weather$pressure` not in `airports$faa… #> 14 visib FALSE \"values of `weather$visib` not in `airports$faa`: … #> 15 time_hour FALSE \"values of `weather$time_hour` not in `airports$fa…"},{"path":"https://dm.cynkra.com/articles/tech-dm-filter.html","id":"data-nycflights13","dir":"Articles","previous_headings":"","what":"Data: nycflights13","title":"Filtering in relational data models","text":"explore filtering {dm}, ’ll use {nycflights13} data flights, planes, airlines, airports weather tables. dataset contains information 336 776 flights departed New York City 2013, 3322 different planes 1458 airports involved. data comes US Bureau Transportation Statistics, documented ?nycflights13::flights. start exploration, create dm object {nycflights13} data. built-dm::dm_nycflights13() function takes care . default uses subset complete data though: flights 10th month considered, reducing number rows flights table 11 227. data model object contains data source tables, metadata tables. like create dm object tables example data, can use new_dm(), dm() as_dm() functions. See vignette(\"howto-dm-df\") details. console output ’dm` object shows data metadata, colored clarity: Now know five tables dm object. connected? relations best displayed visualization entity-relationship model: can look single table tbl. print airports table, call","code":"dm <- dm_nycflights13() dm #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `weather` #> Columns: 53 #> Primary keys: 4 #> Foreign keys: 4 dm_draw(dm) tbl(dm, \"airports\") #> Warning: `tbl.dm()` was deprecated in dm 0.2.0. #> ℹ Use `dm[[table_name]]` instead to access a specific table. #> This warning is displayed once every 8 hours. #> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was #> generated. #> # A tibble: 86 × 8 #> faa name lat lon alt tz dst tzone #> #> 1 ALB Albany Intl 42.7 -73.8 285 -5 A Amer… #> 2 ATL Hartsfield Jackson Atlanta I… 33.6 -84.4 1026 -5 A Amer… #> 3 AUS Austin Bergstrom Intl 30.2 -97.7 542 -6 A Amer… #> 4 BDL Bradley Intl 41.9 -72.7 173 -5 A Amer… #> 5 BHM Birmingham Intl 33.6 -86.8 644 -6 A Amer… #> 6 BNA Nashville Intl 36.1 -86.7 599 -6 A Amer… #> 7 BOS General Edward Lawrence Loga… 42.4 -71.0 19 -5 A Amer… #> 8 BTV Burlington Intl 44.5 -73.2 335 -5 A Amer… #> 9 BUF Buffalo Niagara Intl 42.9 -78.7 724 -5 A Amer… #> 10 BUR Bob Hope 34.2 -118. 778 -8 A Amer… #> # ℹ 76 more rows"},{"path":"https://dm.cynkra.com/articles/tech-dm-filter.html","id":"filter","dir":"Articles","previous_headings":"","what":"Filtering a dm object","title":"Filtering in relational data models","text":"dm_filter() allows select subset dm object.","code":""},{"path":"https://dm.cynkra.com/articles/tech-dm-filter.html","id":"how-it-works","dir":"Articles","previous_headings":"Filtering a dm object","what":"How it works","title":"Filtering in relational data models","text":"Filtering dm object different filtering dataframe tibble dplyr::filter(). corresponding {dm} function dm::dm_filter(). function one filtering conditions can set one tables dm object. conditions immediately evaluated respective tables related tables. resulting table, related tables (directly indirectly) filter condition taken account following way: - filtering semi-joins successively performed along paths filtered tables requested table, join reducing left-hand side tables joins rows key values corresponding values key columns right-hand side tables join. - eventually requested table returned, containing remaining rows filtering joins Currently, works graph induced foreign key relations cycle free. Fortunately, default dm_nycflights13().","code":""},{"path":"https://dm.cynkra.com/articles/tech-dm-filter.html","id":"filtering-examples","dir":"Articles","previous_headings":"Filtering a dm object","what":"Filtering Examples","title":"Filtering in relational data models","text":"Let’s see filtering action: want data related John F. Kennedy International Airport. can get numbers rows table dm_nrow(). total number rows dm drops 2 951 987 (unaffected table disconnected weather table). Next example: Get dm object containing data flights New York Dulles International Airport Washington D.C., abbreviated IAD. Applying multiple filters different tables also supported. example: Get January 2013 flights Delta Air Lines didn’t depart John F. Kennedy International Airport. can inspect filtered tables subsetting . airlines table, Delta remaining carrier: planes used service flights? indeed, included flights departed January (month == 1): comparison, let’s review equivalent manual query flights dplyr syntax: {dm} code leaner foreign key information encoded object.","code":"filtered_dm <- dm %>% dm_filter(airports = (name == \"John F Kennedy Intl\")) filtered_dm #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `weather` #> Columns: 53 #> Primary keys: 4 #> Foreign keys: 4 rows_per_table <- filtered_dm %>% dm_nrow() rows_per_table #> airlines airports flights planes weather #> 10 1 602 336 38 sum(rows_per_table) #> [1] 987 sum_nrow <- sum(dm_nrow(dm)) sum_nrow_filtered <- sum(dm_nrow(dm_apply_filters(filtered_dm))) #> Warning: `dm_apply_filters()` was deprecated in dm 1.0.0. #> ℹ Calling `dm_apply_filters()` after `dm_filter()` is no longer necessary. #> This warning is displayed once every 8 hours. #> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was #> generated. dm %>% dm_filter(flights = (dest == \"IAD\")) %>% dm_nrow() #> airlines airports flights planes weather #> 4 3 32 28 30 dm_delta_may <- dm %>% dm_filter( airlines = (name == \"Delta Air Lines Inc.\"), airports = (name != \"John F Kennedy Intl\"), flights = (month == 1) ) dm_delta_may #> ── Metadata ─────────────────────────────────────────────────────────────── #> Tables: `airlines`, `airports`, `flights`, `planes`, `weather` #> Columns: 53 #> Primary keys: 4 #> Foreign keys: 4 dm_delta_may %>% dm_nrow() #> airlines airports flights planes weather #> 1 2 75 58 25 dm_delta_may$airlines #> # A tibble: 1 × 2 #> carrier name #> #> 1 DL Delta Air Lines Inc. dm_delta_may$planes #> # A tibble: 58 × 9 #> tailnum year type manufacturer model engines seats speed engine #> #> 1 N302NB 1999 Fixed wing … AIRBUS INDU… A319… 2 145 NA Turbo… #> 2 N304DQ 2008 Fixed wing … BOEING 737-… 2 149 NA Turbo… #> 3 N306DQ 2009 Fixed wing … BOEING 737-… 2 149 NA Turbo… #> 4 N307DQ 2009 Fixed wing … BOEING 737-… 2 149 NA Turbo… #> 5 N309US 1990 Fixed wing … AIRBUS INDU… A320… 2 182 NA Turbo… #> 6 N316US 1991 Fixed wing … AIRBUS INDU… A320… 2 182 NA Turbo… #> 7 N317NB 2000 Fixed wing … AIRBUS INDU… A319… 2 145 NA Turbo… #> 8 N318NB 2000 Fixed wing … AIRBUS INDU… A319… 2 145 NA Turbo… #> 9 N318US 1991 Fixed wing … AIRBUS INDU… A320… 2 182 NA Turbo… #> 10 N322NB 2001 Fixed wing … AIRBUS INDU… A319… 2 145 NA Turbo… #> # ℹ 48 more rows dm_delta_may$flights %>% dplyr::count(month) #> # A tibble: 1 × 2 #> month n #> #> 1 1 75 airlines_filtered <- filter(airlines, name == \"Delta Air Lines Inc.\") airports_filtered <- filter(airports, name != \"John F Kennedy Intl\") flights %>% semi_join(airlines_filtered, by = \"carrier\") %>% semi_join(airports_filtered, by = c(\"origin\" = \"faa\")) %>% filter(month == 5) #> # A tibble: 2,340 × 19 #> year month day dep_time sched_dep_time dep_delay arr_time #>