C.3 Compare different imputation methods

We can check “truth” values for the variables we created, which obviously cannot be done for the empirical data.

Visual inspection

Function imputeTS::ggplot_na_imputations() is an excellent way to visualise the imputation result.

truth <- list(zscore, unif_discrete, cat_unordered, cat_ordered, df_vars$angry, df_vars$ruminate, df_vars$hours)
g1 <- g2 <- g3 <- list()

for(c in 1:NCOL(df_vars)){
  
withNA  <- as.numeric_discrete(df_vars[,c], keepNA = TRUE)
Truth   <- as.numeric_discrete(truth[[c]], keepNA = TRUE)

g1[[c]] <- ggplot_na_imputations(x_with_na = withNA, 
                 x_with_imputations = out.linear[,c], 
                   x_with_truth = Truth,
                   title = "linear interpolation",
                   ylab = colnames(df_vars)[c], legend = FALSE)
  
g2[[c]] <- ggplot_na_imputations(x_with_na = withNA, 
                 x_with_imputations = as.numeric_discrete(out.auto[,c]), 
                   x_with_truth = Truth,
                   title = paste("auto:",imp.mice$method)[c],
                   ylab = colnames(df_vars)[c], legend = FALSE)
  
g3[[c]] <- ggplot_na_imputations(x_with_na = withNA, 
                   x_with_imputations = as.numeric_discrete(out.cart[,c]), 
                     x_with_truth =Truth,
                     title="regression trees",
                     ylab = colnames(df_vars)[c])

print(colnames(df_vars)[c])
print(cowplot::plot_grid(g1[[c]],g2[[c]],g3[[c]], ncol = 1))

}

Effect on analysis results

Finally, we compare the effect of different methods on the results of analyses.

df <- list()

for(c in 1:NCOL(df_vars)){
  
withNA  <- as.numeric_discrete(df_vars[,c], keepNA = FALSE)
Truth   <- as.numeric_discrete(truth[[c]], keepNA = FALSE)
LINEAR  <- as.numeric_discrete(unname(out.linear[,c]))
AUTO    <- as.numeric_discrete(out.auto[,c])
CART    <- as.numeric_discrete(out.cart[,c])


df[[c]] <- data.frame(
                 NAremoved = c(mean(withNA, na.rm = TRUE), sd(withNA,na.rm = TRUE)),
                 Truth     = c(mean(Truth,  na.rm = TRUE), sd(Truth,na.rm = TRUE)),
                 LINEAR    = c(mean(LINEAR, na.rm = TRUE), sd(LINEAR,na.rm = TRUE)),
                 AUTO      = c(mean(AUTO,   na.rm = TRUE), sd(AUTO,na.rm = TRUE)),
                 CART      = c(mean(CART,   na.rm = TRUE), sd(CART,na.rm = TRUE)))
rownames(df[[c]]) <- c("Mean","SD")
}
Table C.1: zscore
x
1 34
2 50
3 32
4 45
5 27
6 35
7 26
8 28
9 31
10 23
11 42
12 NA
13 42
14 59
15 25
16 40
17 44
18 39
19 37
20 61
21 21
22 23
23 29
24 55
25 55
26 67
27 47
28 65
29 19
30 36
31 24
32 59
33 42
34 63
35 39
36 53
37 26
38 64
39 34
40 17
41 29
42 15
43 37
44 30
45 27
46 44
47 32
48 30
49 15
50 16
51 64
52 30
53 33
54 60
55 50
56 33
57 23
58 17
59 35
60 41
61 16
62 20
63 40
64 28
65 30
66 38
67 35
68 50
69 NA
70 68
71 42
72 59
73 31
74 NA
75 53
76 32
77 59
78 35
79 61
80 61
81 56
82 61
83 57
84 66
85 58
86 50
87 54
88 25
89 42
90 12
91 NA
92 56
93 39
94 35
95 67
96 79
97 55
98 56
99 34
100 54
101 72
102 60
103 65
104 NA
105 60
106 66
107 39
108 56
109 34
110 59
111 53
112 27
113 47
114 NA
115 64
116 62
117 70
118 NA
119 39
120 NA
121 NA
122 37
Table C.1: unif_discrete
x
1 28
2 26
3 32
4 24
5 24
6 16
7 26
8 21
9 29
10 26
11 39
12 NA
13 22
14 29
15 27
16 33
17 18
18 27
19 25
20 39
21 11
22 19
23 49
24 65
25 27
26 32
27 33
28 18
29 25
30 19
31 27
32 49
33 37
34 20
35 26
36 32
37 27
38 35
39 30
40 27
41 28
42 35
43 16
44 30
45 22
46 31
47 35
48 19
49 23
50 18
51 30
52 35
53 20
54 26
55 33
56 31
57 35
58 12
59 28
60 40
61 12
62 18
63 50
64 42
65 40
66 20
67 20
68 50
69 NA
70 57
71 50
72 58
73 28
74 NA
75 50
76 26
77 60
78 33
79 41
80 43
81 23
82 31
83 24
84 32
85 43
86 40
87 39
88 33
89 20
90 20
91 NA
92 43
93 55
94 28
95 39
96 30
97 31
98 59
99 21
100 33
101 70
102 63
103 31
104 NA
105 61
106 57
107 28
108 59
109 50
110 63
111 59
112 58
113 26
114 NA
115 44
116 63
117 45
118 NA
119 47
120 NA
121 NA
122 19
Table C.1: cat_unordered
x
1 34
2 31
3 35
4 26
5 21
6 41
7 39
8 19
9 29
10 26
11 33
12 NA
13 34
14 39
15 31
16 25
17 27
18 24
19 17
20 29
21 16
22 21
23 43
24 15
25 35
26 50
27 28
28 31
29 30
30 50
31 22
32 58
33 22
34 38
35 47
36 31
37 37
38 32
39 28
40 31
41 35
42 29
43 28
44 23
45 30
46 23
47 26
48 29
49 18
50 20
51 31
52 29
53 26
54 27
55 19
56 33
57 39
58 18
59 34
60 44
61 13
62 29
63 24
64 50
65 31
66 50
67 18
68 40
69 NA
70 50
71 39
72 59
73 39
74 NA
75 48
76 39
77 67
78 40
79 45
80 50
81 17
82 20
83 29
84 50
85 39
86 60
87 40
88 33
89 11
90 12
91 NA
92 41
93 50
94 56
95 59
96 41
97 27
98 81
99 43
100 45
101 72
102 46
103 64
104 NA
105 35
106 27
107 39
108 39
109 35
110 37
111 60
112 61
113 34
114 NA
115 30
116 50
117 34
118 NA
119 39
120 NA
121 NA
122 41
Table C.1: cat_ordered
x
1 73
2 70
3 63
4 63
5 60
6 32
7 33
8 58
9 61
10 61
11 53
12 NA
13 71
14 68
15 27
16 66
17 63
18 78
19 72
20 54
21 67
22 27
23 73
24 25
25 24
26 27
27 56
28 33
29 67
30 47
31 29
32 23
33 24
34 61
35 61
36 60
37 60
38 57
39 79
40 62
41 60
42 60
43 76
44 82
45 71
46 61
47 71
48 62
49 34
50 79
51 78
52 72
53 55
54 78
55 62
56 32
57 23
58 20
59 60
60 27
61 21
62 74
63 31
64 36
65 55
66 31
67 66
68 22
69 NA
70 66
71 52
72 19
73 21
74 NA
75 63
76 58
77 29
78 62
79 53
80 57
81 83
82 74
83 31
84 33
85 38
86 36
87 20
88 30
89 64
90 27
91 NA
92 55
93 23
94 58
95 64
96 68
97 59
98 25
99 69
100 57
101 31
102 56
103 35
104 NA
105 69
106 60
107 78
108 55
109 62
110 37
111 44
112 33
113 85
114 NA
115 77
116 59
117 79
118 NA
119 61
120 NA
121 NA
122 65
Table C.1: angry
x
1 60
2 60
3 56
4 66
5 61
6 27
7 38
8 56
9 58
10 70
11 23
12 NA
13 67
14 36
15 80
16 80
17 83
18 76
19 62
20 44
21 29
22 29
23 34
24 19
25 23
26 23
27 63
28 22
29 26
30 40
31 62
32 54
33 74
34 57
35 77
36 65
37 64
38 53
39 80
40 59
41 62
42 59
43 74
44 71
45 65
46 62
47 74
48 61
49 28
50 77
51 67
52 70
53 23
54 67
55 63
56 63
57 37
58 17
59 28
60 28
61 90
62 72
63 38
64 57
65 19
66 42
67 71
68 22
69 NA
70 32
71 50
72 22
73 44
74 NA
75 70
76 63
77 61
78 57
79 68
80 52
81 19
82 74
83 78
84 40
85 34
86 61
87 19
88 32
89 83
90 74
91 NA
92 35
93 52
94 31
95 32
96 76
97 38
98 22
99 63
100 64
101 64
102 56
103 27
104 NA
105 68
106 68
107 73
108 34
109 31
110 59
111 50
112 61
113 85
114 NA
115 72
116 61
117 67
118 NA
119 62
120 NA
121 NA
122 62
Table C.1: ruminate
x
1 68
2 64
3 58
4 65
5 64
6 65
7 50
8 63
9 60
10 72
11 54
12 NA
13 72
14 61
15 45
16 68
17 70
18 86
19 76
20 56
21 75
22 31
23 57
24 60
25 57
26 29
27 23
28 24
29 64
30 18
31 56
32 24
33 36
34 56
35 75
36 59
37 73
38 67
39 68
40 55
41 62
42 62
43 70
44 43
45 66
46 66
47 61
48 77
49 70
50 72
51 44
52 72
53 65
54 75
55 67
56 70
57 32
58 9
59 57
60 57
61 87
62 77
63 31
64 61
65 63
66 59
67 71
68 30
69 NA
70 53
71 28
72 26
73 55
74 NA
75 66
76 63
77 70
78 31
79 77
80 71
81 69
82 68
83 75
84 61
85 67
86 40
87 66
88 71
89 69
90 81
91 NA
92 62
93 41
94 67
95 40
96 80
97 60
98 28
99 73
100 65
101 51
102 55
103 65
104 NA
105 65
106 70
107 67
108 58
109 68
110 44
111 50
112 61
113 72
114 NA
115 77
116 66
117 61
118 NA
119 76
120 NA
121 NA
122 67
Table C.1: hours
x
1 59
2 65
3 34
4 24
5 31
6 51
7 37
8 40
9 37
10 38
11 22
12 NA
13 29
14 62
15 62
16 66
17 38
18 43
19 55
20 31
21 39
22 63
23 13
24 15
25 32
26 62
27 22
28 44
29 43
30 36
31 31
32 21
33 29
34 39
35 26
36 56
37 39
38 66
39 19
40 52
41 27
42 23
43 63
44 82
45 62
46 31
47 31
48 43
49 72
50 67
51 68
52 76
53 59
54 67
55 55
56 65
57 41
58 67
59 31
60 38
61 70
62 77
63 46
64 34
65 26
66 64
67 68
68 50
69 NA
70 38
71 50
72 27
73 21
74 NA
75 28
76 37
77 26
78 38
79 37
80 27
81 93
82 79
83 69
84 32
85 35
86 53
87 26
88 39
89 70
90 68
91 NA
92 58
93 36
94 25
95 66
96 16
97 56
98 21
99 30
100 42
101 31
102 27
103 29
104 NA
105 37
106 40
107 60
108 36
109 24
110 42
111 33
112 35
113 24
114 NA
115 23
116 36
117 64
118 NA
119 30
120 NA
121 NA
122 37