-
Notifications
You must be signed in to change notification settings - Fork 0
/
0.2.Prepare_Samples_Tables_Re_Sequencing.Rmd
122 lines (78 loc) · 2.65 KB
/
0.2.Prepare_Samples_Tables_Re_Sequencing.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
---
title: "Prepare Samples Tables Re Sequencing"
author: "Gerard Temprano Sagrera"
date: "2022-12-09"
output: html_document
---
```{r}
library(openxlsx)
```
```{r}
ss <- read.xlsx("C://Users/Gerard/Desktop/AAA/RNAseq/_Propuesta-Randomized_AAA_SP-S1.xlsx", sheet = 1)
ss <- read.xlsx("C://Users/Gerard/Downloads/_Propuesta-Randomized_AAA_SP-S1-Gerard.xlsx")
bio <- read.xlsx("C://Users/Gerard/Desktop/AAA/RNAseq/Tables/Technical_CompleteV2.xlsx")
table(bio$Case)/nrow(bio)
ss <- merge(ss, bio[,c("SampleID","Case")], by.x = "SantPau_ID", by.y = "SampleID")
length(unique(ss$SantPau_ID))
sp <- ss[ss$FC == "SP",]
s1 <- ss[ss$FC == "S1",]
nrow(sp)+nrow(s1)
table(sp$Case)/nrow(sp)
table(s1$Case)/nrow(s1)
```
## Check Index in each table
### SP
#### Duplicated i7:
```{r}
n.occur.i7 <- data.frame(table(sp$index.i7))
n.occur.i7[n.occur.i7$Freq > 1,]
dup <- sp[sp$index.i7 %in% n.occur.i7$Var1[n.occur.i7$Freq > 1],]$SantPau_ID
sp$Dup.i7 <- ifelse(sp$SantPau_ID %in% dup, 1,0)
```
#### Duplicated i5:
```{r}
n.occur.i5 <- data.frame(table(sp$indext.i5))
nrow(n.occur.i5[n.occur.i5$Freq > 1,])
dup <- sp[sp$indext.i5 %in% n.occur.i5$Var1[n.occur.i5$Freq > 1],]$SantPau_ID
sp$Dup.i5 <- ifelse(sp$SantPau_ID %in% dup, 1,0)
```
## S1
### Duplicated i7:
```{r}
n.occur.i7 <- data.frame(table(s1$index.i7))
n.occur.i7[n.occur.i7$Freq > 1,]
dup <- s1[s1$index.i7 %in% n.occur.i7$Var1[n.occur.i7$Freq > 1],]$SantPau_ID
s1$Dup.i7 <- ifelse(s1$SantPau_ID %in% dup, 1,0)
```
### Duplicated i5:
```{r}
n.occur.i5 <- data.frame(table(s1$indext.i5))
nrow(n.occur.i5[n.occur.i5$Freq > 1,])
dup <- s1[s1$indext.i5 %in% n.occur.i5$Var1[n.occur.i5$Freq > 1],]$SantPau_ID
s1$Dup.i5 <- ifelse(s1$SantPau_ID %in% dup, 1,0)
sp.r <- sp[sample(1:nrow(sp)), ]
s1.r <- s1[sample(1:nrow(s1)), ]
```
```{r}
#write.xlsx(sp.r, "C://Users/Gerard/Desktop/AAA/RNAseq/SP.xlsx")
#write.xlsx(s1.r, "C://Users/Gerard/Desktop/AAA/RNAseq/S1.xlsx")
```
## Final Checks:
```{r}
ss <- read.xlsx("C://Users/Gerard/Desktop/AAA/RNAseq/_Propuesta-Randomized_AAA_SP-S1-Gerard.xlsx")
ss <- read.xlsx("C://Users/Gerard/Downloads/_Propuesta-Randomized_AAA_SP-S1-Gerard.xlsx")
bio <- read.xlsx("C://Users/Gerard/Desktop/AAA/RNAseq/Tables/Technical_CompleteV2.xlsx")
table(bio$Case)/nrow(bio)
ss2 <- merge(ss, bio[,c("SampleID","Case")], by.x = "SantPau_ID", by.y = "SampleID")
length(unique(ss2$SantPau_ID))
length(unique(ss2$Seq_ID))
sp <- ss2[ss2$FC == "SP",]
s1 <- ss2[ss2$FC == "S1",]
nrow(sp)+nrow(s1)
length(unique(sp$SantPau_ID)) == nrow(sp)
length(unique(s1$SantPau_ID)) == nrow(s1)
length(unique(sp$Seq_ID)) == nrow(sp)
length(unique(s1$Seq_ID)) == nrow(s1)
table(sp$Case)/nrow(sp)
table(s1$Case)/nrow(s1)
```