-
Notifications
You must be signed in to change notification settings - Fork 0
/
Script_improved.R
141 lines (92 loc) · 3.7 KB
/
Script_improved.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#- - - - - - - - - - - - - - - - - - -#
# #
# Good coding practice #
# Good example #
# #
# author: Romy Zeiss #
# date: 2022-11-08 #
# #
#- - - - - - - - - - - - - - - - - - -#
# This code presents the improved version of "Script_raw.R" using
# good coding practices.
#- - - - - - - - - - - - - - - - - - -
## Set working directory ####
#- - - - - - - - - - - - - - - - - - -
# please set your working directory using setwd()
# Note:
# 4x "#" at the end of a line will make a header that can be used in RStudio
#- - - - - - - - - - - - - - - - - - -
## Load required packages ####
#- - - - - - - - - - - - - - - - - - -
library(tidyverse)
#- - - - - - - - - - - - - - - - - - -
## Load data ####
#- - - - - - - - - - - - - - - - - - -
# We will use data that are included in the tidyverse packages.
# load properties of different car types
data(mtcars)
head(mtcars)
#- - - - - - - - - - - - - - - - - - -
## Define functions ####
#- - - - - - - - - - - - - - - - - - -
# investigate linear relationship between variable 1 and variable 2
lm_2_vars <- function( var_response, var_predictor, input_data ){
# var_response: response variable
# var_predictor: predictor
# input_data: data set in long format
## Check input objects
if( !(var_response %in% colnames(input_data)) ) {
cat("Please check the name of your response variable.", sep="\n")
} else {
if( !(var_predictor %in% colnames(input_data)) ) {
cat("Please check the name of your predictor column.", sep="\n")
} else {
if( var_response == var_predictor ){
cat("Please check your variables. Response and predictor variable seem to be exactly the same.", sep="\n")
} else {
if( is.character(input_data[,var_response]) ) {
cat("Note: Response variable will be treated as factor.", sep="\n")
} else {
cat("Note: Response variable will be treated as numeric.", sep="\n")
}
if( is.character(input_data[,var_predictor]) ) {
cat("Note: Predictor variable will be treated as factor.", sep="\n")
} else {
cat("Note: Predictor variable will be treated as numeric.", sep="\n")
}
## Main purpose of the function (actual analysis)
summary( lm( get(var_response) ~ get(var_predictor),
data = input_data ) )
# produce scatterplot with regression line
print({
ggplot(data=input_data,
aes(x=get(var_predictor), y=get(var_response)))+
geom_point()+
geom_smooth()
})
}
}
}
}
#- - - - - - - - - - - - - - - - - - -
## Explore data ####
#- - - - - - - - - - - - - - - - - - -
# check how variable cyl is distributed
hist(mtcars$cyl)
# what kind of variable is cyl (i.e., numeric, factor, date)?
str(mtcars$cyl)
# view correlations of all variables
plot(mtcars)
#- - - - - - - - - - - - - - - - - - -
## Analyse data ####
#- - - - - - - - - - - - - - - - - - -
### Influence of cyl on other variables ####
# define response variables
response_variables <- c("mpg", "disp", "hp", "vs", "gear")
# test for linear effects of cyl on response variables
for( variable in response_variables ) {
print("---------------------------------------------")
print( paste("Analysing variable", variable, "now.") )
lm_2_vars(var_response = variable, var_predictor = "cyl",
input_data = mtcars)
}