data.table

기본 문법

  • i: 행 선택
  • j: 열 선택 or 함수 적용
  • by: 집단 나눔

fread / fwrite

속도가 매우 빠름 (R의 base 함수보다 40배 더 빠름)

fread("https://raw.githubusercontent.com/jinseob2kim/lecture-snuhlab/master/data/example_g1e.csv")
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
   1:         2009  562083     200909            0             0            1
   2:         2009  334536     200911            0             0            0
   3:         2009  911867     200903            0             0            0
   4:         2009  183321     200908           NA            NA           NA
   5:         2009  942671     200909           NA            NA           NA
  ---                                                                        
1640:         2015  266734     201504           NA            NA           NA
1641:         2015  343874     201511            0             0            0
1642:         2015  798472     201507           NA            NA           NA
1643:         2015  720080     201505           NA            NA           NA
1644:         2015  127519     201511            0             0            1
      Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
   1:           0            0           NA        3        1              0
   2:           0            0           NA        2        1              0
   3:           0            0           NA        3        1              0
   4:          NA           NA           NA        3        1              0
   5:          NA           NA           NA        3        1              0
  ---                                                                       
1640:          NA           NA           NA        1        2              2
1641:           1            0            0        2        2              0
1642:          NA           NA           NA        3        2              0
1643:          NA            1           NA        2        2              1
1644:           0            0            0        3        2              4
      HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
   1:  144   61   90 29.4   0.7   0.8    120     80        1 12.6 117      264
   2:  162   51   63 19.4   0.8   1.0    120     80        1 13.8  96      169
   3:  163   65   82 24.5   0.7   0.6    130     80        1 15.0 118      216
   4:  152   51   70 22.1   0.8   0.9    101     62        1 13.1  90      199
   5:  159   50   73 19.8   0.7   0.8    132     78        1 13.0  92      162
  ---                                                                         
1640:  171   76   88 26.0   1.2   1.2    125     89        1 14.1  77      363
1641:  179   80   88 25.0   1.0   0.7    126     83        1 14.9 119      106
1642:  163   63   80 23.7   1.5   1.5    118     77        1 16.1 160      271
1643:  165   70   92 25.7   0.6   0.8    120     80        1 13.6  81      289
1644:  168   84  103 29.8   0.9   0.8    140     85        1 15.2 134      210
       TG HDL LDL CRTN SGOT SGPT GGT GFR
   1: 128  60 179  0.9   25   20  25  59
   2:  92  70  80  0.9   18   15  28  74
   3: 132  55 134  0.8   26   30  30  79
   4: 100  65 114  0.9   18   14  11  61
   5:  58  40 111  0.9   24   23  15  49
  ---                                   
1640: 280  63 244  0.9   19   32  95  96
1641: 247  29  50  0.9  101  157  62  79
1642: 514  51 151  1.0   40   66 104  85
1643: 810  52  NA  1.0   37   33  76  86
1644: 298  56  94  1.1   55   49  66  66

Exam data

data.frame

df <- read.csv("https://raw.githubusercontent.com/jinseob2kim/lecture-snuhlab/master/data/example_g1e.csv")
head(df)
  EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
1         2009  562083     200909            0             0            1
2         2009  334536     200911            0             0            0
3         2009  911867     200903            0             0            0
4         2009  183321     200908           NA            NA           NA
5         2009  942671     200909           NA            NA           NA
6         2009  979358     200912           NA            NA           NA
  Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N HGHT
1           0            0           NA        3        1              0  144
2           0            0           NA        2        1              0  162
3           0            0           NA        3        1              0  163
4          NA           NA           NA        3        1              0  152
5          NA           NA           NA        3        1              0  159
6          NA           NA           NA        2        1              0  157
  WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL
1   61   90 29.4   0.7   0.8    120     80        1 12.6 117      264 128  60
2   51   63 19.4   0.8   1.0    120     80        1 13.8  96      169  92  70
3   65   82 24.5   0.7   0.6    130     80        1 15.0 118      216 132  55
4   51   70 22.1   0.8   0.9    101     62        1 13.1  90      199 100  65
5   50   73 19.8   0.7   0.8    132     78        1 13.0  92      162  58  40
6   55   73 22.3   1.5   1.5    110     70        1 11.9 100      192 109  53
  LDL CRTN SGOT SGPT GGT GFR
1 179  0.9   25   20  25  59
2  80  0.9   18   15  28  74
3 134  0.8   26   30  30  79
4 114  0.9   18   14  11  61
5 111  0.9   24   23  15  49
6 117  0.7   15   12  14  83
class(df)
[1] "data.frame"

Exam data

data.table

dt <- fread("https://raw.githubusercontent.com/jinseob2kim/lecture-snuhlab/master/data/example_g1e.csv")
dt
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
   1:         2009  562083     200909            0             0            1
   2:         2009  334536     200911            0             0            0
   3:         2009  911867     200903            0             0            0
   4:         2009  183321     200908           NA            NA           NA
   5:         2009  942671     200909           NA            NA           NA
  ---                                                                        
1640:         2015  266734     201504           NA            NA           NA
1641:         2015  343874     201511            0             0            0
1642:         2015  798472     201507           NA            NA           NA
1643:         2015  720080     201505           NA            NA           NA
1644:         2015  127519     201511            0             0            1
      Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
   1:           0            0           NA        3        1              0
   2:           0            0           NA        2        1              0
   3:           0            0           NA        3        1              0
   4:          NA           NA           NA        3        1              0
   5:          NA           NA           NA        3        1              0
  ---                                                                       
1640:          NA           NA           NA        1        2              2
1641:           1            0            0        2        2              0
1642:          NA           NA           NA        3        2              0
1643:          NA            1           NA        2        2              1
1644:           0            0            0        3        2              4
      HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
   1:  144   61   90 29.4   0.7   0.8    120     80        1 12.6 117      264
   2:  162   51   63 19.4   0.8   1.0    120     80        1 13.8  96      169
   3:  163   65   82 24.5   0.7   0.6    130     80        1 15.0 118      216
   4:  152   51   70 22.1   0.8   0.9    101     62        1 13.1  90      199
   5:  159   50   73 19.8   0.7   0.8    132     78        1 13.0  92      162
  ---                                                                         
1640:  171   76   88 26.0   1.2   1.2    125     89        1 14.1  77      363
1641:  179   80   88 25.0   1.0   0.7    126     83        1 14.9 119      106
1642:  163   63   80 23.7   1.5   1.5    118     77        1 16.1 160      271
1643:  165   70   92 25.7   0.6   0.8    120     80        1 13.6  81      289
1644:  168   84  103 29.8   0.9   0.8    140     85        1 15.2 134      210
       TG HDL LDL CRTN SGOT SGPT GGT GFR
   1: 128  60 179  0.9   25   20  25  59
   2:  92  70  80  0.9   18   15  28  74
   3: 132  55 134  0.8   26   30  30  79
   4: 100  65 114  0.9   18   14  11  61
   5:  58  40 111  0.9   24   23  15  49
  ---                                   
1640: 280  63 244  0.9   19   32  95  96
1641: 247  29  50  0.9  101  157  62  79
1642: 514  51 151  1.0   40   66 104  85
1643: 810  52  NA  1.0   37   33  76  86
1644: 298  56  94  1.1   55   49  66  66
class(dt)
[1] "data.table" "data.frame"

Row Operation

첫 번째 열부터 다섯 번째 열까지 선택

dt[1:5]
dt[1:5, ]
   EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
1:         2009  562083     200909            0             0            1
2:         2009  334536     200911            0             0            0
3:         2009  911867     200903            0             0            0
4:         2009  183321     200908           NA            NA           NA
5:         2009  942671     200909           NA            NA           NA
   Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N HGHT
1:           0            0           NA        3        1              0  144
2:           0            0           NA        2        1              0  162
3:           0            0           NA        3        1              0  163
4:          NA           NA           NA        3        1              0  152
5:          NA           NA           NA        3        1              0  159
   WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL
1:   61   90 29.4   0.7   0.8    120     80        1 12.6 117      264 128  60
2:   51   63 19.4   0.8   1.0    120     80        1 13.8  96      169  92  70
3:   65   82 24.5   0.7   0.6    130     80        1 15.0 118      216 132  55
4:   51   70 22.1   0.8   0.9    101     62        1 13.1  90      199 100  65
5:   50   73 19.8   0.7   0.8    132     78        1 13.0  92      162  58  40
   LDL CRTN SGOT SGPT GGT GFR
1: 179  0.9   25   20  25  59
2:  80  0.9   18   15  28  74
3: 134  0.8   26   30  30  79
4: 114  0.9   18   14  11  61
5: 111  0.9   24   23  15  49

Row Operation

EXMD_BZ_YYYY가 2009년-2012년 사이이고 BMI가 25 이상인 행만 추출

dt[(EXMD_BZ_YYYY %in% 2009:2012) & (BMI >= 25)]
     EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
  1:         2009  562083     200909            0             0            1
  2:         2009  318669     200904           NA            NA           NA
  3:         2009  668438     200904           NA            NA           NA
  4:         2009  560878     200903           NA            NA           NA
  5:         2009  375694     200906            0             0            1
 ---                                                                        
317:         2012  582044     201210            0             0            0
318:         2012  126561     201210            0             0            0
319:         2012  266734     201204           NA            NA           NA
320:         2012  942204     201209            0             0            0
321:         2012  637160     201209            0             0            0
     Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
  1:           0            0           NA        3        1              0
  2:          NA           NA           NA        2        1              0
  3:          NA           NA           NA        3        1              0
  4:          NA           NA           NA        2        1              0
  5:           1            1           NA        2        1              0
 ---                                                                       
317:           0            0            0        3        3              2
318:           0            0            0        2        3              1
319:          NA           NA           NA        1        3              1
320:           0            0            0        2        3              7
321:           0            0            0        3        3              3
     HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
  1:  144   61   90 29.4   0.7   0.8    120     80        1 12.6 117      264
  2:  155   66   78 27.5   0.8   0.6     95     58        1 13.6 101      294
  3:  160   71   94 27.7   0.7   0.2    100     60        1 13.0 104      195
  4:  144   58   93 28.0   1.0   1.0    110     60        1 13.2  74      178
  5:  151   70   94 30.7   1.0   1.0    119     71        1 13.0 129      173
 ---                                                                         
317:  158   69   90 27.6   1.2   1.2    132     98        1 16.1  84      166
318:  183  103  108 30.8   1.2   1.2    139     89        1 16.6  83      175
319:  170   78   90 27.0   1.0   1.0    134     86        1 15.5  93      300
320:  176   86   91 27.8   1.0   1.5    128     76        1 16.4  94      179
321:  175   95  101 31.0   0.9   0.7    115     69        1 15.4 100      214
      TG HDL LDL CRTN SGOT SGPT GGT GFR
  1: 128  60 179  0.9   25   20  25  59
  2: 119  50 220  0.6   22   22  22 116
  3: 123  63 107  0.7   27   22  43  79
  4: 177  49  93  0.7   15   17   7  86
  5: 100  58  95  1.0   14   10  12  64
 ---                                   
317: 193  48  79  0.8   36   49  69 137
318: 264  44  78  0.9   40   74  68  98
319: 287  50 192  1.1   40   65  85  77
320: 158  47 100  0.9   48   48  95  98
321: 412  53 127  1.5   58   70  97  53

Row Operation

HME_YYYYMM에 따라 오름차순으로 정렬

dt[order(HME_YYYYMM)]
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
   1:         2009  480569     200901            0             1            1
   2:         2009   79250     200901            0             0            0
   3:         2009  418137     200901            0             0            0
   4:         2009  669498     200902            0             0            0
   5:         2009  486441     200902            0             0            1
  ---                                                                        
1640:         2015  284267     201512           NA            NA           NA
1641:         2015  606125     201512           NA            NA           NA
1642:         2015  356551     201512            0             0            1
1643:         2015    4263     201512            0             0            0
1644:         2015  887943     201512           NA            NA           NA
      Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
   1:           0            0           NA        3        1              0
   2:           0            0           NA        1        1              0
   3:           0            0           NA       NA        2             NA
   4:           0            0           NA        2        2              1
   5:           0            0           NA        1        3              4
  ---                                                                       
1640:          NA           NA           NA        3        2              1
1641:           1           NA           NA        2        2              2
1642:           0            0            0        3        2              0
1643:           0            0            0        3        2              0
1644:          NA           NA           NA        2        2              1
      HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
   1:  154   58   84 24.5   0.1   0.8    140     70        1 10.4  84      242
   2:  141   40   69 20.1   0.1   0.7    100     60        1 11.8  86      160
   3:  176   64   80 20.7   0.7   0.6    100     60        1 16.8  75      137
   4:  139   47   86 24.3   0.1   0.1    143     83        1 14.2 114      225
   5:  174   86   86 28.4   1.0   1.2    160    100        1 16.4  84      144
  ---                                                                         
1640:  174   69   82 22.8   1.2   1.5    119     73        1 15.3  97      205
1641:  171   87   98 29.8   0.8   1.5    130     88        1 15.3 134      235
1642:  166   73   89 26.5   1.0   1.0    139     77        1 13.4 110      223
1643:  174   67   79 22.1   1.5   1.2    115     70        1 13.6  86      217
1644:  173   81   89 27.1   1.5   1.5    120     80        1 15.1  65      197
       TG HDL LDL CRTN SGOT SGPT GGT GFR
   1: 134  53 162  1.1   15   10  10  37
   2: 144  37  94  0.6   14   16  11  59
   3:  54  48  78  0.9   17   15  17 105
   4: 189  53 134  0.5   49   55  32  73
   5: 180  44  64  1.1   42   65 174 104
  ---                                   
1640: 123  72 108  0.8   23    8  32 112
1641: 144  48 158  1.0   24   23  40  78
1642: 198  58 125  0.7   22   27  20 102
1643:  38  71 138  1.0   18   18  28  80
1644: 345  34  94  0.8   20   37  90 105

Row Operation

HME_YYYYMM은 오름차순으로, HGHT내림차순으로 정렬

dt[order(HME_YYYYMM, -HGHT)]
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
   1:         2009  418137     200901            0             0            0
   2:         2009  480569     200901            0             1            1
   3:         2009   79250     200901            0             0            0
   4:         2009  486441     200902            0             0            1
   5:         2009   26776     200902           NA            NA           NA
  ---                                                                        
1640:         2015  979090     201512            0             0            0
1641:         2015  181909     201512            0             0            1
1642:         2015  875758     201512           NA            NA           NA
1643:         2015  992522     201512            0             0            0
1644:         2015  975124     201512            0             0            1
      Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
   1:           0            0           NA       NA        2             NA
   2:           0            0           NA        3        1              0
   3:           0            0           NA        1        1              0
   4:           0            0           NA        1        3              4
   5:          NA           NA           NA        2        2              2
  ---                                                                       
1640:           0            0            0        2        1              0
1641:           0            0            0        2        1              0
1642:          NA           NA           NA        2        1              1
1643:           0            0            0        2        1              2
1644:           0            1            0        3        1              3
      HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
   1:  176   64   80 20.7   0.7   0.6    100     60        1 16.8  75      137
   2:  154   58   84 24.5   0.1   0.8    140     70        1 10.4  84      242
   3:  141   40   69 20.1   0.1   0.7    100     60        1 11.8  86      160
   4:  174   86   86 28.4   1.0   1.2    160    100        1 16.4  84      144
   5:  172   78   86 26.4   0.8   1.0    118     73        1 15.6  86      189
  ---                                                                         
1640:  153   58   75 24.8   1.0   1.5    140     95        1 13.3  94      201
1641:  151   70   94 30.7   0.7   0.5    147     78        1 11.2  97      163
1642:  151   51   79 22.4   0.8   1.0    105     78        1 12.9  90      188
1643:  149   40   62 18.0   0.7   0.5    116     67        1  9.2  84      180
1644:  149   49   68 22.1   0.5   0.3    144     84        1 13.2  85      200
       TG HDL LDL CRTN SGOT SGPT GGT GFR
   1:  54  48  78  0.9   17   15  17 105
   2: 134  53 162  1.1   15   10  10  37
   3: 144  37  94  0.6   14   16  11  59
   4: 180  44  64  1.1   42   65 174 104
   5:  62  59 117  0.9   19   16  44 111
  ---                                   
1640: 109  69 110  0.9   20   17  21  71
1641: 105  63  79  0.6   22   18  13 101
1642:  47  51 127  0.6   20   17  13 112
1643:  81 105  59  0.7   27   19  29  95
1644: 154  66 100  0.8   25   15  18  75

Row Operation

EXMD_BZ_YYYY가 2009년-2012년 사이이고 BMI가 25 이상인 행만 추출한 후

HGHT에 따라 오름차순으로 정렬

dt[(EXMD_BZ_YYYY %in% 2009:2012) & (BMI >= 25)][order(HGHT)]
dt[(EXMD_BZ_YYYY %in% 2009:2012) & (BMI >= 25)] %>% .[order(HGHT)]   # same
     EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
  1:         2009  562083     200909            0             0            1
  2:         2009  560878     200903           NA            NA           NA
  3:         2011  562083     201111            0             0            1
  4:         2011  519824     201109           NA            NA           NA
  5:         2011  914987     201103            0             0            1
 ---                                                                        
317:         2011  550255     201104            0             0            0
318:         2009  115809     200905            0             0            0
319:         2010  115809     201006            0             0            0
320:         2011  115809     201109            0             0            0
321:         2012  115809     201209            0             0            0
     Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
  1:           0            0           NA        3        1              0
  2:          NA           NA           NA        2        1              0
  3:           0            0            0        2        1              0
  4:          NA           NA           NA        2        1              0
  5:           0            0            0        3        1              2
 ---                                                                       
317:           0            0            0        2        3              1
318:           0            0           NA        3        3              2
319:           0            0            0        3        3              1
320:           0            0            0        3        3              1
321:           0            0            0        3        3              1
     HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
  1:  144   61   90 29.4   0.7   0.8    120     80        1 12.6 117      264
  2:  144   58   93 28.0   1.0   1.0    110     60        1 13.2  74      178
  3:  144   59   88 28.5   0.6   0.7    148     81        1 12.0 109      242
  4:  145   58   79 27.6   0.8   0.5    144     91        3 13.9 107      300
  5:  145   70   95 33.3   0.5   0.5    130     69        1  8.0 107      183
 ---                                                                         
317:  184   94   94 27.8   0.9   0.7    140    100        1 16.0 100      277
318:  186  110  111 31.8   0.9   1.0    125     90        1 13.2 109      202
319:  186  114  111 33.0   0.8   0.9    130     80        1 14.6  90      194
320:  188  114  104 32.3   0.7   1.0    139     83        1 14.9  86      189
321:  188  111  107 31.4   0.6   0.9    130     82        1 14.9  93      172
      TG HDL LDL CRTN SGOT SGPT GGT GFR
  1: 128  60 179  0.9   25   20  25  59
  2: 177  49  93  0.7   15   17   7  86
  3: 178  67 139  0.9   30   34  28  NA
  4: 427  43  NA  0.5   31   32  36  NA
  5: 104  54 108  0.9   22   17  33  NA
 ---                                   
317: 285  51 188  1.1   16    8  58  NA
318: 150  52 120  1.1   39   18  34 147
319: 297  32 103  1.0   22   13  32  NA
320: 151  46 113  0.9   31   20  36  NA
321:  63  49 110  0.9   33   17  38 100

Column Operation

첫 번째 열부터 다섯 번째 열까지 추출

dt[, 1:5]
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ
   1:         2009  562083     200909            0             0
   2:         2009  334536     200911            0             0
   3:         2009  911867     200903            0             0
   4:         2009  183321     200908           NA            NA
   5:         2009  942671     200909           NA            NA
  ---                                                           
1640:         2015  266734     201504           NA            NA
1641:         2015  343874     201511            0             0
1642:         2015  798472     201507           NA            NA
1643:         2015  720080     201505           NA            NA
1644:         2015  127519     201511            0             0

Column Operation

HGHT 열WGHT 열만 추출

dt[, c("HGHT", "WGHT")]
dt[, .(HGHT, WGHT)]
      HGHT WGHT
   1:  144   61
   2:  162   51
   3:  163   65
   4:  152   51
   5:  159   50
  ---          
1640:  171   76
1641:  179   80
1642:  163   63
1643:  165   70
1644:  168   84

Column Operation

HGHT 열을 Height로, WGHT 열을 Weight로 이름을 바꿔서 추출

dt[, .(Height = HGHT, Weight = WGHT)]   # rename
      Height Weight
   1:    144     61
   2:    162     51
   3:    163     65
   4:    152     51
   5:    159     50
  ---              
1640:    171     76
1641:    179     80
1642:    163     63
1643:    165     70
1644:    168     84

Column Operation

data.table 형식으로 추출

dt[, .(HGHT)]
dt[, "HGHT"]
      HGHT
   1:  144
   2:  162
   3:  163
   4:  152
   5:  159
  ---     
1640:  171
1641:  179
1642:  163
1643:  165
1644:  168

Column Operation

vector 형식으로 추출

dt[, HGHT]   # vector
   [1] 144 162 163 152 159 157 160 159 156 146 164 154 155 155 165 180 149 160
  [19] 162 154 144 155 151 158 165 160 154 177 156 145 156 165 172 148 148 158
  [37] 158 162 168 164 158 169 160 157 173 163 151 156 154 141 160 163 170 149
  [55] 144 145 163 154 160 169 170 151 152 175 160 157 139 163 148 166 165 150
  [73] 157 150 161 158 171 166 165 166 165 166 169 156 170 171 179 159 147 159
  [91] 173 162 150 157 159 175 161 172 159 167 176 170 167 166 165 178 169 172
 [109] 161 164 166 150 158 165 159 155 170 154 164 170 160 173 159 150 151 164
 [127] 162 178 176 165 169 163 181 180 170 180 186 167 175 169 176 172 167 180
 [145] 160 171 170 159 182 181 172 160 172 165 173 165 180 184 163 165 170 162
 [163] 160 176 168 170 176 171 161 173 171 172 174 173 165 173 170 150 166 169
 [181] 162 163 167 172 174 172 164 166 166 166 174 176 161 164 159 165 161 167
 [199] 179 164 172 176 164 174 159 174 169 167 170 176 163 159 175 165 172 160
 [217] 170 156 157 151 160 157 164 162 156 166 166 162 147 163 160 163 168 155
 [235] 172 159 163 151 157 153 154 164 160 176 166 173 164 164 171 166 140 139
 [253] 156 178 150 170 155 166 163 159 168 157 166 151 160 145 150 168 160 158
 [271] 166 147 164 158 165 151 155 156 159 155 161 159 180 152 167 172 148 161
 [289] 171 156 160 165 166 163 154 164 172 161 149 160 164 178 164 164 160 170
 [307] 172 150 159 167 159 158 160 165 158 159 164 160 170 153 169 153 162 151
 [325] 164 170 165 157 150 175 157 160 162 159 167 160 152 169 177 160 155 169
 [343] 157 167 162 155 172 172 165 171 177 170 160 173 164 171 182 159 180 176
 [361] 174 168 165 173 172 164 167 161 173 167 179 176 167 171 163 163 168 175
 [379] 176 186 162 183 184 181 174 161 175 164 159 171 177 166 151 172 180 169
 [397] 178 170 183 177 172 171 174 163 171 174 179 162 165 166 174 159 166 163
 [415] 175 172 164 180 168 177 162 173 170 155 171 166 174 179 163 153 169 177
 [433] 168 168 172 172 175 167 171 172 167 162 166 174 154 144 168 173 163 181
 [451] 144 169 151 165 161 146 160 156 159 160 150 172 156 152 153 139 154 166
 [469] 154 170 154 152 150 171 153 162 159 157 155 152 147 160 168 169 158 156
 [487] 178 168 157 153 152 159 163 163 156 161 163 151 152 152 134 178 160 157
 [505] 155 155 155 164 168 176 168 164 160 160 154 145 144 157 156 160 145 167
 [523] 157 157 159 157 146 165 150 154 167 159 164 161 155 155 179 158 149 157
 [541] 163 163 164 166 163 160 165 163 151 167 155 171 165 172 157 158 170 158
 [559] 169 157 171 169 145 159 156 158 175 146 155 154 169 170 149 154 167 170
 [577] 165 166 166 172 158 164 166 179 170 174 164 170 176 160 156 162 164 170
 [595] 175 168 172 167 166 171 178 168 175 178 175 166 174 174 165 169 164 171
 [613] 171 168 165 169 170 179 173 172 166 163 176 171 175 157 168 161 181 185
 [631] 171 179 165 164 179 182 169 170 169 153 174 173 172 172 169 174 171 173
 [649] 188 163 170 177 176 170 174 180 172 173 180 176 161 167 177 178 165 170
 [667] 182 162 172 173 184 163 182 145 159 163 159 147 151 153 172 140 176 183
 [685] 158 161 158 174 159 161 146 168 151 156 170 166 160 162 149 150 169 155
 [703] 163 167 161 160 158 156 163 149 157 157 164 154 155 160 145 153 158 162
 [721] 170 167 156 164 174 156 158 151 162 160 165 166 175 148 166 158 169 168
 [739] 156 177 152 164 164 163 147 166 160 153 153 144 155 158 151 162 163 159
 [757] 174 166 148 162 156 156 163 175 167 177 161 163 159 161 158 155 161 155
 [775] 169 165 164 166 148 161 178 149 160 154 158 150 172 177 160 169 160 164
 [793] 160 173 157 159 173 164 161 180 169 167 169 160 176 170 173 163 170 164
 [811] 165 156 160 177 168 170 170 175 145 171 169 172 174 177 160 170 167 167
 [829] 166 172 172 162 172 180 165 169 172 179 172 169 167 160 175 172 167 163
 [847] 165 166 172 163 164 156 163 178 179 181 167 176 176 166 171 180 170 176
 [865] 151 157 161 171 183 172 177 183 174 181 180 165 177 174 179 170 181 182
 [883] 178 178 162 175 188 172 174 172 165 162 163 161 170 170 158 183 161 170
 [901] 159 176 162 166 172 173 175 159 159 158 165 149 156 170 166 164 150 158
 [919] 152 153 158 162 153 167 178 159 147 160 168 148 150 169 154 146 139 146
 [937] 156 154 159 158 162 162 164 150 148 164 143 156 157 155 171 158 157 156
 [955] 169 160 174 153 161 153 157 169 154 163 164 169 153 159 152 155 162 178
 [973] 163 170 162 155 156 158 155 154 164 168 165 159 160 174 161 164 164 174
 [991] 166 161 155 165 163 180 156 151 156 151 160 167 160 180 157 158 163 166
[1009] 168 160 157 145 156 160 165 155 171 161 153 159 175 157 178 163 160 165
[1027] 171 149 170 160 160 163 152 173 156 164 176 162 150 159 169 168 160 166
[1045] 177 179 183 165 169 168 166 166 180 178 171 164 183 174 158 168 172 169
[1063] 174 169 167 174 171 173 169 179 169 178 171 162 169 167 173 172 167 173
[1081] 165 174 172 181 163 169 164 160 159 183 175 168 174 165 173 171 172 171
[1099] 181 180 160 177 177 165 167 181 183 173 170 162 162 171 171 177 182 169
[1117] 170 172 172 170 170 164 164 179 175 176 175 167 172 169 168 165 186 171
[1135] 166 172 166 172 172 162 157 176 166 162 175 175 159 162 170 167 159 156
[1153] 160 151 154 159 154 147 163 162 172 151 165 144 162 151 152 161 150 167
[1171] 157 158 157 155 157 158 165 153 160 168 159 164 149 163 149 151 158 155
[1189] 165 146 168 164 140 148 157 156 167 136 162 156 157 159 154 159 163 156
[1207] 168 161 162 158 149 150 155 172 170 162 160 152 160 167 165 160 155 153
[1225] 153 167 162 167 163 152 168 164 165 156 169 160 159 154 158 163 170 164
[1243] 149 159 169 161 163 157 164 153 167 177 163 164 153 161 160 163 168 160
[1261] 167 156 164 177 178 165 166 160 154 158 148 180 170 145 171 176 147 155
[1279] 153 155 165 151 164 169 175 156 149 178 177 169 149 156 161 152 175 172
[1297] 168 146 179 168 159 162 160 170 158 159 159 168 180 162 169 166 158 171
[1315] 165 173 169 169 164 174 177 166 166 180 172 166 163 155 173 170 175 165
[1333] 165 175 174 174 174 177 179 170 169 172 162 163 162 173 177 160 185 169
[1351] 172 176 176 163 180 176 175 162 168 149 173 171 172 166 176 171 168 177
[1369] 181 160 172 171 179 172 183 178 169 165 183 168 176 162 174 181 158 175
[1387] 175 164 162 180 180 178 180 174 176 162 173 162 173 170 184 179 176 171
[1405] 161 155 163 153 162 162 151 153 161 141 178 153 163 157 154 151 154 159
[1423] 151 166 168 152 145 159 159 145 164 154 159 160 153 153 161 158 169 157
[1441] 162 155 164 162 163 158 160 157 161 152 160 164 170 164 158 155 153 170
[1459] 153 161 155 159 163 168 148 158 156 179 159 156 162 174 167 138 169 169
[1477] 145 181 166 152 158 160 159 178 163 150 168 161 144 169 171 159 153 172
[1495] 165 166 168 165 167 157 152 149 151 156 177 166 156 151 154 164 165 160
[1513] 163 162 170 178 163 165 163 181 158 164 162 156 166 169 159 145 167 163
[1531] 161 160 172 158 164 149 169 172 175 149 166 159 149 166 155 138 160 167
[1549] 180 144 158 174 165 155 162 179 176 176 178 172 181 165 182 161 169 182
[1567] 168 172 177 185 174 172 174 169 160 177 179 171 175 180 179 164 158 170
[1585] 175 176 161 183 169 178 168 173 176 171 173 172 182 171 174 173 173 164
[1603] 173 171 167 170 173 178 165 175 153 168 170 160 157 162 174 173 182 161
[1621] 163 172 166 167 174 163 171 170 160 172 167 166 174 181 175 170 171 180
[1639] 173 171 179 163 165 168

Column Operation

변수로 열 이름 선택

colvars <- grep("Q_", names(dt), value = T)
colvars
[1] "Q_PHX_DX_STK"   "Q_PHX_DX_HTDZ"  "Q_PHX_DX_HTN"   "Q_PHX_DX_DM"   
[5] "Q_PHX_DX_DLD"   "Q_PHX_DX_PTB"   "Q_HBV_AG"       "Q_SMK_YN"      
[9] "Q_DRK_FRQ_V09N"
dt[, ..colvars]
dt[, colvars, with = F]
      Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN Q_PHX_DX_DM Q_PHX_DX_DLD
   1:            0             0            1           0            0
   2:            0             0            0           0            0
   3:            0             0            0           0            0
   4:           NA            NA           NA          NA           NA
   5:           NA            NA           NA          NA           NA
  ---                                                                 
1640:           NA            NA           NA          NA           NA
1641:            0             0            0           1            0
1642:           NA            NA           NA          NA           NA
1643:           NA            NA           NA          NA            1
1644:            0             0            1           0            0
      Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
   1:           NA        3        1              0
   2:           NA        2        1              0
   3:           NA        3        1              0
   4:           NA        3        1              0
   5:           NA        3        1              0
  ---                                              
1640:           NA        1        2              2
1641:            0        2        2              0
1642:           NA        3        2              0
1643:           NA        2        2              1
1644:            0        3        2              4

Column Operation

.SD: Subset of Data

dt[, .SD, .SDcols = colvars]
      Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN Q_PHX_DX_DM Q_PHX_DX_DLD
   1:            0             0            1           0            0
   2:            0             0            0           0            0
   3:            0             0            0           0            0
   4:           NA            NA           NA          NA           NA
   5:           NA            NA           NA          NA           NA
  ---                                                                 
1640:           NA            NA           NA          NA           NA
1641:            0             0            0           1            0
1642:           NA            NA           NA          NA           NA
1643:           NA            NA           NA          NA            1
1644:            0             0            1           0            0
      Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
   1:           NA        3        1              0
   2:           NA        2        1              0
   3:           NA        3        1              0
   4:           NA        3        1              0
   5:           NA        3        1              0
  ---                                              
1640:           NA        1        2              2
1641:            0        2        2              0
1642:           NA        3        2              0
1643:           NA        2        2              1
1644:            0        3        2              4

Column Operation

EXMD_BZ_YYYY가 2009년-2012년 사이이고 BMI가 25 이상인 행 중 colvars 열만 추출

dt[(EXMD_BZ_YYYY %in% 2009:2012) & (BMI >= 25), ..colvars]
     Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN Q_PHX_DX_DM Q_PHX_DX_DLD
  1:            0             0            1           0            0
  2:           NA            NA           NA          NA           NA
  3:           NA            NA           NA          NA           NA
  4:           NA            NA           NA          NA           NA
  5:            0             0            1           1            1
 ---                                                                 
317:            0             0            0           0            0
318:            0             0            0           0            0
319:           NA            NA           NA          NA           NA
320:            0             0            0           0            0
321:            0             0            0           0            0
     Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
  1:           NA        3        1              0
  2:           NA        2        1              0
  3:           NA        3        1              0
  4:           NA        2        1              0
  5:           NA        2        1              0
 ---                                              
317:            0        3        3              2
318:            0        2        3              1
319:           NA        1        3              1
320:            0        2        3              7
321:            0        3        3              3

Column Operation

colvars 열 제외

dt[, !..colvars]
dt[, -..colvars]
dt[, .SD, .SDcols = -colvars]
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS
   1:         2009  562083     200909  144   61   90 29.4   0.7   0.8    120
   2:         2009  334536     200911  162   51   63 19.4   0.8   1.0    120
   3:         2009  911867     200903  163   65   82 24.5   0.7   0.6    130
   4:         2009  183321     200908  152   51   70 22.1   0.8   0.9    101
   5:         2009  942671     200909  159   50   73 19.8   0.7   0.8    132
  ---                                                                       
1640:         2015  266734     201504  171   76   88 26.0   1.2   1.2    125
1641:         2015  343874     201511  179   80   88 25.0   1.0   0.7    126
1642:         2015  798472     201507  163   63   80 23.7   1.5   1.5    118
1643:         2015  720080     201505  165   70   92 25.7   0.6   0.8    120
1644:         2015  127519     201511  168   84  103 29.8   0.9   0.8    140
      BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL LDL CRTN SGOT SGPT GGT GFR
   1:     80        1 12.6 117      264 128  60 179  0.9   25   20  25  59
   2:     80        1 13.8  96      169  92  70  80  0.9   18   15  28  74
   3:     80        1 15.0 118      216 132  55 134  0.8   26   30  30  79
   4:     62        1 13.1  90      199 100  65 114  0.9   18   14  11  61
   5:     78        1 13.0  92      162  58  40 111  0.9   24   23  15  49
  ---                                                                     
1640:     89        1 14.1  77      363 280  63 244  0.9   19   32  95  96
1641:     83        1 14.9 119      106 247  29  50  0.9  101  157  62  79
1642:     77        1 16.1 160      271 514  51 151  1.0   40   66 104  85
1643:     80        1 13.6  81      289 810  52  NA  1.0   37   33  76  86
1644:     85        1 15.2 134      210 298  56  94  1.1   55   49  66  66

Column Summary

HGHT의 평균, WGHT의 평균, BMI의 평균

dt[, .(mean(HGHT), mean(WGHT), mean(BMI))]
         V1       V2       V3
1: 164.5487 65.09672 23.92257

Column Summary

HGHT의 평균, WGHT의 평균, BMI의 평균을 이름 지정해서 출력

dt[, .(HGHT = mean(HGHT), WGHT = mean(WGHT), BMI = mean(BMI))]
       HGHT     WGHT      BMI
1: 164.5487 65.09672 23.92257

Column Summary

lapply로 HGHT, WGHT, BMI에 한번에 mean 함수 적용

dt[, lapply(.SD, mean), .SDcols = c("HGHT", "WGHT", "BMI")]
       HGHT     WGHT      BMI
1: 164.5487 65.09672 23.92257

By Operation

EXMD_BZ_YYYY으로 그룹화하여 HGHT의 평균, WGHT의 평균, BMI의 평균 구하기

dt[, .(HGHT = mean(HGHT), WGHT = mean(WGHT), BMI = mean(BMI)), by = EXMD_BZ_YYYY]
dt[, .(HGHT = mean(HGHT), WGHT = mean(WGHT), BMI = mean(BMI)), by = "EXMD_BZ_YYYY"]
dt[, lapply(.SD, mean), .SDcols = c("HGHT", "WGHT", "BMI"), by = EXMD_BZ_YYYY]
   EXMD_BZ_YYYY     HGHT     WGHT      BMI
1:         2009 164.0841 64.32710 23.76402
2:         2010 164.9280 65.13983 23.82712
3:         2011 164.1480 64.89686 23.96009
4:         2012 164.9188 65.82051 24.08932
5:         2013 164.9095 64.90535 23.75391
6:         2014 164.3189 64.47244 23.78031
7:         2015 164.4792 66.07500 24.28167

By Operation

HGHT가 175 이상인 데이터를 EXMD_BZ_YYYY, Q_SMK_YN으로 그룹화하여 개수 구함

.N: length()

dt[HGHT >= 175, .N, by = .(EXMD_BZ_YYYY, Q_SMK_YN)]
dt[HGHT >= 175, .N, by = c("EXMD_BZ_YYYY", "Q_SMK_YN")]
    EXMD_BZ_YYYY Q_SMK_YN  N
 1:         2009        1  7
 2:         2009       NA  1
 3:         2009        3 14
 4:         2009        2  5
 5:         2010        1  6
 6:         2010        3 19
 7:         2010        2  7
 8:         2011        1  7
 9:         2011        2  8
10:         2011        3 16
11:         2012        1 12
12:         2012        2  6
13:         2012        3 21
14:         2013        1 10
15:         2013        3 16
16:         2013        2  8
17:         2014        1 10
18:         2014        2  7
19:         2014        3 26
20:         2015        1 12
21:         2015        3 18
22:         2015        2  6
    EXMD_BZ_YYYY Q_SMK_YN  N

By Operation

keyby를 통해 정렬할 수 있음

dt[HGHT >= 175, .N, keyby = c("EXMD_BZ_YYYY", "Q_SMK_YN")]
    EXMD_BZ_YYYY Q_SMK_YN  N
 1:         2009       NA  1
 2:         2009        1  7
 3:         2009        2  5
 4:         2009        3 14
 5:         2010        1  6
 6:         2010        2  7
 7:         2010        3 19
 8:         2011        1  7
 9:         2011        2  8
10:         2011        3 16
11:         2012        1 12
12:         2012        2  6
13:         2012        3 21
14:         2013        1 10
15:         2013        2  8
16:         2013        3 16
17:         2014        1 10
18:         2014        2  7
19:         2014        3 26
20:         2015        1 12
21:         2015        2  6
22:         2015        3 18
    EXMD_BZ_YYYY Q_SMK_YN  N

By Operation

조건으로 그룹화할 수도 있음

dt[HGHT >= 175, .N, keyby= .(EXMD_BZ_YYYY >= 2015, Q_PHX_DX_STK == 1)]
   EXMD_BZ_YYYY Q_PHX_DX_STK   N
1:        FALSE           NA  50
2:        FALSE        FALSE 156
3:         TRUE           NA   6
4:         TRUE        FALSE  30

By Operation

조건으로 그룹화할 수도 있음

dt[HGHT >= 175, .N, keyby= .(get("EXMD_BZ_YYYY") >= 2015, get("Q_PHX_DX_STK") == 1)]
     get get.1   N
1: FALSE    NA  50
2: FALSE FALSE 156
3:  TRUE    NA   6
4:  TRUE FALSE  30

By Operation

조건으로 그룹화할 수도 있음

dt[HGHT >= 175, .N, keyby= .(Y2015 = ifelse(EXMD_BZ_YYYY >= 2015, ">=2015", "<2015"))]
    Y2015   N
1:  <2015 206
2: >=2015  36

Merge

예시 데이터

dt1 <- dt[1:10, .SD, .SDcols = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM", colvars)]
dt1
    EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
 1:         2009  562083     200909            0             0            1
 2:         2009  334536     200911            0             0            0
 3:         2009  911867     200903            0             0            0
 4:         2009  183321     200908           NA            NA           NA
 5:         2009  942671     200909           NA            NA           NA
 6:         2009  979358     200912           NA            NA           NA
 7:         2009  554112     200911           NA            NA           NA
 8:         2009  487160     200908           NA            NA           NA
 9:         2009  793017     200906           NA            NA           NA
10:         2009  219397     200912            0             0            1
    Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
 1:           0            0           NA        3        1              0
 2:           0            0           NA        2        1              0
 3:           0            0           NA        3        1              0
 4:          NA           NA           NA        3        1              0
 5:          NA           NA           NA        3        1              0
 6:          NA           NA           NA        2        1              0
 7:          NA           NA           NA        2        1              0
 8:          NA           NA           NA        3        1              0
 9:          NA           NA           NA        3        1              0
10:           0            0           NA        3        1              0
dt2 <- dt[6:15, -..colvars]
dt2
    EXMD_BZ_YYYY RN_INDI HME_YYYYMM HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS
 1:         2009  979358     200912  157   55   73 22.3   1.5   1.5    110
 2:         2009  554112     200911  160   56   67 21.9   1.5   1.5    119
 3:         2009  487160     200908  159   54   66 21.4   1.2   1.5    111
 4:         2009  793017     200906  156   53   67 21.8   1.2   1.0    138
 5:         2009  219397     200912  146   48   78 22.5   1.5   1.5    138
 6:         2009  831349     200912  164   66   85 24.5   1.2   1.0    130
 7:         2009  480569     200901  154   58   84 24.5   0.1   0.8    140
 8:         2009  559370     200912  155   51   75 21.2   1.0   1.0     93
 9:         2009  318669     200904  155   66   78 27.5   0.8   0.6     95
10:         2009  395781     200907  165   60   71 22.0   0.6   0.4    120
    BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL LDL CRTN SGOT SGPT GGT GFR
 1:     70        1 11.9 100      192 109  53 117  0.7   15   12  14  83
 2:     78        1 11.2  84      152  38  43 101  0.8    8    6  10  97
 3:     60        1 12.2  88      166  42  58  99  1.0   16   11  12  65
 4:     72        1 11.0  74      155  86  52  85  0.6   15   13  13  96
 5:     84        1 12.8 107      178  87  35 125  0.7   21   21  23  70
 6:     90        1 16.3 108      209  57  59 138  0.8   32   38  16  98
 7:     70        1 10.4  84      242 134  53 162  1.1   15   10  10  37
 8:     53        1 11.1  92      212  44  69 134  1.0   20   10  10  59
 9:     58        1 13.6 101      294 119  50 220  0.6   22   22  22 116
10:     70        1 15.3  85      237 112  56 158  1.0   24   20  18  70

Merge

Merge

Full join

merge(dt1, dt2, by = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM"), all = T)
    EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
 1:         2009  183321     200908           NA            NA           NA
 2:         2009  219397     200912            0             0            1
 3:         2009  318669     200904           NA            NA           NA
 4:         2009  334536     200911            0             0            0
 5:         2009  395781     200907           NA            NA           NA
 6:         2009  480569     200901           NA            NA           NA
 7:         2009  487160     200908           NA            NA           NA
 8:         2009  554112     200911           NA            NA           NA
 9:         2009  559370     200912           NA            NA           NA
10:         2009  562083     200909            0             0            1
11:         2009  793017     200906           NA            NA           NA
12:         2009  831349     200912           NA            NA           NA
13:         2009  911867     200903            0             0            0
14:         2009  942671     200909           NA            NA           NA
15:         2009  979358     200912           NA            NA           NA
    Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N HGHT
 1:          NA           NA           NA        3        1              0   NA
 2:           0            0           NA        3        1              0  146
 3:          NA           NA           NA       NA       NA             NA  155
 4:           0            0           NA        2        1              0   NA
 5:          NA           NA           NA       NA       NA             NA  165
 6:          NA           NA           NA       NA       NA             NA  154
 7:          NA           NA           NA        3        1              0  159
 8:          NA           NA           NA        2        1              0  160
 9:          NA           NA           NA       NA       NA             NA  155
10:           0            0           NA        3        1              0   NA
11:          NA           NA           NA        3        1              0  156
12:          NA           NA           NA       NA       NA             NA  164
13:           0            0           NA        3        1              0   NA
14:          NA           NA           NA        3        1              0   NA
15:          NA           NA           NA        2        1              0  157
    WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL
 1:   NA   NA   NA    NA    NA     NA     NA       NA   NA  NA       NA  NA  NA
 2:   48   78 22.5   1.5   1.5    138     84        1 12.8 107      178  87  35
 3:   66   78 27.5   0.8   0.6     95     58        1 13.6 101      294 119  50
 4:   NA   NA   NA    NA    NA     NA     NA       NA   NA  NA       NA  NA  NA
 5:   60   71 22.0   0.6   0.4    120     70        1 15.3  85      237 112  56
 6:   58   84 24.5   0.1   0.8    140     70        1 10.4  84      242 134  53
 7:   54   66 21.4   1.2   1.5    111     60        1 12.2  88      166  42  58
 8:   56   67 21.9   1.5   1.5    119     78        1 11.2  84      152  38  43
 9:   51   75 21.2   1.0   1.0     93     53        1 11.1  92      212  44  69
10:   NA   NA   NA    NA    NA     NA     NA       NA   NA  NA       NA  NA  NA
11:   53   67 21.8   1.2   1.0    138     72        1 11.0  74      155  86  52
12:   66   85 24.5   1.2   1.0    130     90        1 16.3 108      209  57  59
13:   NA   NA   NA    NA    NA     NA     NA       NA   NA  NA       NA  NA  NA
14:   NA   NA   NA    NA    NA     NA     NA       NA   NA  NA       NA  NA  NA
15:   55   73 22.3   1.5   1.5    110     70        1 11.9 100      192 109  53
    LDL CRTN SGOT SGPT GGT GFR
 1:  NA   NA   NA   NA  NA  NA
 2: 125  0.7   21   21  23  70
 3: 220  0.6   22   22  22 116
 4:  NA   NA   NA   NA  NA  NA
 5: 158  1.0   24   20  18  70
 6: 162  1.1   15   10  10  37
 7:  99  1.0   16   11  12  65
 8: 101  0.8    8    6  10  97
 9: 134  1.0   20   10  10  59
10:  NA   NA   NA   NA  NA  NA
11:  85  0.6   15   13  13  96
12: 138  0.8   32   38  16  98
13:  NA   NA   NA   NA  NA  NA
14:  NA   NA   NA   NA  NA  NA
15: 117  0.7   15   12  14  83

Merge

Inner join

merge(dt1, dt2, by = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM"), all = F)
   EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
1:         2009  219397     200912            0             0            1
2:         2009  487160     200908           NA            NA           NA
3:         2009  554112     200911           NA            NA           NA
4:         2009  793017     200906           NA            NA           NA
5:         2009  979358     200912           NA            NA           NA
   Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N HGHT
1:           0            0           NA        3        1              0  146
2:          NA           NA           NA        3        1              0  159
3:          NA           NA           NA        2        1              0  160
4:          NA           NA           NA        3        1              0  156
5:          NA           NA           NA        2        1              0  157
   WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL
1:   48   78 22.5   1.5   1.5    138     84        1 12.8 107      178  87  35
2:   54   66 21.4   1.2   1.5    111     60        1 12.2  88      166  42  58
3:   56   67 21.9   1.5   1.5    119     78        1 11.2  84      152  38  43
4:   53   67 21.8   1.2   1.0    138     72        1 11.0  74      155  86  52
5:   55   73 22.3   1.5   1.5    110     70        1 11.9 100      192 109  53
   LDL CRTN SGOT SGPT GGT GFR
1: 125  0.7   21   21  23  70
2:  99  1.0   16   11  12  65
3: 101  0.8    8    6  10  97
4:  85  0.6   15   13  13  96
5: 117  0.7   15   12  14  83

Merge

Left join

merge(dt1, dt2, by = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM"), all.x = T)
    EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
 1:         2009  183321     200908           NA            NA           NA
 2:         2009  219397     200912            0             0            1
 3:         2009  334536     200911            0             0            0
 4:         2009  487160     200908           NA            NA           NA
 5:         2009  554112     200911           NA            NA           NA
 6:         2009  562083     200909            0             0            1
 7:         2009  793017     200906           NA            NA           NA
 8:         2009  911867     200903            0             0            0
 9:         2009  942671     200909           NA            NA           NA
10:         2009  979358     200912           NA            NA           NA
    Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N HGHT
 1:          NA           NA           NA        3        1              0   NA
 2:           0            0           NA        3        1              0  146
 3:           0            0           NA        2        1              0   NA
 4:          NA           NA           NA        3        1              0  159
 5:          NA           NA           NA        2        1              0  160
 6:           0            0           NA        3        1              0   NA
 7:          NA           NA           NA        3        1              0  156
 8:           0            0           NA        3        1              0   NA
 9:          NA           NA           NA        3        1              0   NA
10:          NA           NA           NA        2        1              0  157
    WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL
 1:   NA   NA   NA    NA    NA     NA     NA       NA   NA  NA       NA  NA  NA
 2:   48   78 22.5   1.5   1.5    138     84        1 12.8 107      178  87  35
 3:   NA   NA   NA    NA    NA     NA     NA       NA   NA  NA       NA  NA  NA
 4:   54   66 21.4   1.2   1.5    111     60        1 12.2  88      166  42  58
 5:   56   67 21.9   1.5   1.5    119     78        1 11.2  84      152  38  43
 6:   NA   NA   NA    NA    NA     NA     NA       NA   NA  NA       NA  NA  NA
 7:   53   67 21.8   1.2   1.0    138     72        1 11.0  74      155  86  52
 8:   NA   NA   NA    NA    NA     NA     NA       NA   NA  NA       NA  NA  NA
 9:   NA   NA   NA    NA    NA     NA     NA       NA   NA  NA       NA  NA  NA
10:   55   73 22.3   1.5   1.5    110     70        1 11.9 100      192 109  53
    LDL CRTN SGOT SGPT GGT GFR
 1:  NA   NA   NA   NA  NA  NA
 2: 125  0.7   21   21  23  70
 3:  NA   NA   NA   NA  NA  NA
 4:  99  1.0   16   11  12  65
 5: 101  0.8    8    6  10  97
 6:  NA   NA   NA   NA  NA  NA
 7:  85  0.6   15   13  13  96
 8:  NA   NA   NA   NA  NA  NA
 9:  NA   NA   NA   NA  NA  NA
10: 117  0.7   15   12  14  83

Merge

Left join 다른 방법

dt2[dt1, on = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM")]
    EXMD_BZ_YYYY RN_INDI HME_YYYYMM HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS
 1:         2009  562083     200909   NA   NA   NA   NA    NA    NA     NA
 2:         2009  334536     200911   NA   NA   NA   NA    NA    NA     NA
 3:         2009  911867     200903   NA   NA   NA   NA    NA    NA     NA
 4:         2009  183321     200908   NA   NA   NA   NA    NA    NA     NA
 5:         2009  942671     200909   NA   NA   NA   NA    NA    NA     NA
 6:         2009  979358     200912  157   55   73 22.3   1.5   1.5    110
 7:         2009  554112     200911  160   56   67 21.9   1.5   1.5    119
 8:         2009  487160     200908  159   54   66 21.4   1.2   1.5    111
 9:         2009  793017     200906  156   53   67 21.8   1.2   1.0    138
10:         2009  219397     200912  146   48   78 22.5   1.5   1.5    138
    BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL LDL CRTN SGOT SGPT GGT GFR
 1:     NA       NA   NA  NA       NA  NA  NA  NA   NA   NA   NA  NA  NA
 2:     NA       NA   NA  NA       NA  NA  NA  NA   NA   NA   NA  NA  NA
 3:     NA       NA   NA  NA       NA  NA  NA  NA   NA   NA   NA  NA  NA
 4:     NA       NA   NA  NA       NA  NA  NA  NA   NA   NA   NA  NA  NA
 5:     NA       NA   NA  NA       NA  NA  NA  NA   NA   NA   NA  NA  NA
 6:     70        1 11.9 100      192 109  53 117  0.7   15   12  14  83
 7:     78        1 11.2  84      152  38  43 101  0.8    8    6  10  97
 8:     60        1 12.2  88      166  42  58  99  1.0   16   11  12  65
 9:     72        1 11.0  74      155  86  52  85  0.6   15   13  13  96
10:     84        1 12.8 107      178  87  35 125  0.7   21   21  23  70
    Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN Q_PHX_DX_DM Q_PHX_DX_DLD
 1:            0             0            1           0            0
 2:            0             0            0           0            0
 3:            0             0            0           0            0
 4:           NA            NA           NA          NA           NA
 5:           NA            NA           NA          NA           NA
 6:           NA            NA           NA          NA           NA
 7:           NA            NA           NA          NA           NA
 8:           NA            NA           NA          NA           NA
 9:           NA            NA           NA          NA           NA
10:            0             0            1           0            0
    Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
 1:           NA        3        1              0
 2:           NA        2        1              0
 3:           NA        3        1              0
 4:           NA        3        1              0
 5:           NA        3        1              0
 6:           NA        2        1              0
 7:           NA        2        1              0
 8:           NA        3        1              0
 9:           NA        3        1              0
10:           NA        3        1              0

Merge

Right join

merge(dt1, dt2, by = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM"), all.y = T)
    EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
 1:         2009  219397     200912            0             0            1
 2:         2009  318669     200904           NA            NA           NA
 3:         2009  395781     200907           NA            NA           NA
 4:         2009  480569     200901           NA            NA           NA
 5:         2009  487160     200908           NA            NA           NA
 6:         2009  554112     200911           NA            NA           NA
 7:         2009  559370     200912           NA            NA           NA
 8:         2009  793017     200906           NA            NA           NA
 9:         2009  831349     200912           NA            NA           NA
10:         2009  979358     200912           NA            NA           NA
    Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N HGHT
 1:           0            0           NA        3        1              0  146
 2:          NA           NA           NA       NA       NA             NA  155
 3:          NA           NA           NA       NA       NA             NA  165
 4:          NA           NA           NA       NA       NA             NA  154
 5:          NA           NA           NA        3        1              0  159
 6:          NA           NA           NA        2        1              0  160
 7:          NA           NA           NA       NA       NA             NA  155
 8:          NA           NA           NA        3        1              0  156
 9:          NA           NA           NA       NA       NA             NA  164
10:          NA           NA           NA        2        1              0  157
    WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL
 1:   48   78 22.5   1.5   1.5    138     84        1 12.8 107      178  87  35
 2:   66   78 27.5   0.8   0.6     95     58        1 13.6 101      294 119  50
 3:   60   71 22.0   0.6   0.4    120     70        1 15.3  85      237 112  56
 4:   58   84 24.5   0.1   0.8    140     70        1 10.4  84      242 134  53
 5:   54   66 21.4   1.2   1.5    111     60        1 12.2  88      166  42  58
 6:   56   67 21.9   1.5   1.5    119     78        1 11.2  84      152  38  43
 7:   51   75 21.2   1.0   1.0     93     53        1 11.1  92      212  44  69
 8:   53   67 21.8   1.2   1.0    138     72        1 11.0  74      155  86  52
 9:   66   85 24.5   1.2   1.0    130     90        1 16.3 108      209  57  59
10:   55   73 22.3   1.5   1.5    110     70        1 11.9 100      192 109  53
    LDL CRTN SGOT SGPT GGT GFR
 1: 125  0.7   21   21  23  70
 2: 220  0.6   22   22  22 116
 3: 158  1.0   24   20  18  70
 4: 162  1.1   15   10  10  37
 5:  99  1.0   16   11  12  65
 6: 101  0.8    8    6  10  97
 7: 134  1.0   20   10  10  59
 8:  85  0.6   15   13  13  96
 9: 138  0.8   32   38  16  98
10: 117  0.7   15   12  14  83

Merge

Right join 다른 방법

dt1[dt2, on = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM")]
    EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
 1:         2009  979358     200912           NA            NA           NA
 2:         2009  554112     200911           NA            NA           NA
 3:         2009  487160     200908           NA            NA           NA
 4:         2009  793017     200906           NA            NA           NA
 5:         2009  219397     200912            0             0            1
 6:         2009  831349     200912           NA            NA           NA
 7:         2009  480569     200901           NA            NA           NA
 8:         2009  559370     200912           NA            NA           NA
 9:         2009  318669     200904           NA            NA           NA
10:         2009  395781     200907           NA            NA           NA
    Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N HGHT
 1:          NA           NA           NA        2        1              0  157
 2:          NA           NA           NA        2        1              0  160
 3:          NA           NA           NA        3        1              0  159
 4:          NA           NA           NA        3        1              0  156
 5:           0            0           NA        3        1              0  146
 6:          NA           NA           NA       NA       NA             NA  164
 7:          NA           NA           NA       NA       NA             NA  154
 8:          NA           NA           NA       NA       NA             NA  155
 9:          NA           NA           NA       NA       NA             NA  155
10:          NA           NA           NA       NA       NA             NA  165
    WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL
 1:   55   73 22.3   1.5   1.5    110     70        1 11.9 100      192 109  53
 2:   56   67 21.9   1.5   1.5    119     78        1 11.2  84      152  38  43
 3:   54   66 21.4   1.2   1.5    111     60        1 12.2  88      166  42  58
 4:   53   67 21.8   1.2   1.0    138     72        1 11.0  74      155  86  52
 5:   48   78 22.5   1.5   1.5    138     84        1 12.8 107      178  87  35
 6:   66   85 24.5   1.2   1.0    130     90        1 16.3 108      209  57  59
 7:   58   84 24.5   0.1   0.8    140     70        1 10.4  84      242 134  53
 8:   51   75 21.2   1.0   1.0     93     53        1 11.1  92      212  44  69
 9:   66   78 27.5   0.8   0.6     95     58        1 13.6 101      294 119  50
10:   60   71 22.0   0.6   0.4    120     70        1 15.3  85      237 112  56
    LDL CRTN SGOT SGPT GGT GFR
 1: 117  0.7   15   12  14  83
 2: 101  0.8    8    6  10  97
 3:  99  1.0   16   11  12  65
 4:  85  0.6   15   13  13  96
 5: 125  0.7   21   21  23  70
 6: 138  0.8   32   38  16  98
 7: 162  1.1   15   10  10  37
 8: 134  1.0   20   10  10  59
 9: 220  0.6   22   22  22 116
10: 158  1.0   24   20  18  70

Merge

Left anti join

dt1[!dt2, on = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM")]
   EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
1:         2009  562083     200909            0             0            1
2:         2009  334536     200911            0             0            0
3:         2009  911867     200903            0             0            0
4:         2009  183321     200908           NA            NA           NA
5:         2009  942671     200909           NA            NA           NA
   Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
1:           0            0           NA        3        1              0
2:           0            0           NA        2        1              0
3:           0            0           NA        3        1              0
4:          NA           NA           NA        3        1              0
5:          NA           NA           NA        3        1              0

Merge

Right anti join

dt2[!dt1, on = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM")]
   EXMD_BZ_YYYY RN_INDI HME_YYYYMM HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS
1:         2009  831349     200912  164   66   85 24.5   1.2   1.0    130
2:         2009  480569     200901  154   58   84 24.5   0.1   0.8    140
3:         2009  559370     200912  155   51   75 21.2   1.0   1.0     93
4:         2009  318669     200904  155   66   78 27.5   0.8   0.6     95
5:         2009  395781     200907  165   60   71 22.0   0.6   0.4    120
   BP_DIA URN_PROT  HGB FBS TOT_CHOL  TG HDL LDL CRTN SGOT SGPT GGT GFR
1:     90        1 16.3 108      209  57  59 138  0.8   32   38  16  98
2:     70        1 10.4  84      242 134  53 162  1.1   15   10  10  37
3:     53        1 11.1  92      212  44  69 134  1.0   20   10  10  59
4:     58        1 13.6 101      294 119  50 220  0.6   22   22  22 116
5:     70        1 15.3  85      237 112  56 158  1.0   24   20  18  70

Mutate

새로운 변수 생성

dt[, BMI2 := round(WGHT/(HGHT/100)^2, 1)][]
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
   1:         2009  562083     200909            0             0            1
   2:         2009  334536     200911            0             0            0
   3:         2009  911867     200903            0             0            0
   4:         2009  183321     200908           NA            NA           NA
   5:         2009  942671     200909           NA            NA           NA
  ---                                                                        
1640:         2015  266734     201504           NA            NA           NA
1641:         2015  343874     201511            0             0            0
1642:         2015  798472     201507           NA            NA           NA
1643:         2015  720080     201505           NA            NA           NA
1644:         2015  127519     201511            0             0            1
      Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
   1:           0            0           NA        3        1              0
   2:           0            0           NA        2        1              0
   3:           0            0           NA        3        1              0
   4:          NA           NA           NA        3        1              0
   5:          NA           NA           NA        3        1              0
  ---                                                                       
1640:          NA           NA           NA        1        2              2
1641:           1            0            0        2        2              0
1642:          NA           NA           NA        3        2              0
1643:          NA            1           NA        2        2              1
1644:           0            0            0        3        2              4
      HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
   1:  144   61   90 29.4   0.7   0.8    120     80        1 12.6 117      264
   2:  162   51   63 19.4   0.8   1.0    120     80        1 13.8  96      169
   3:  163   65   82 24.5   0.7   0.6    130     80        1 15.0 118      216
   4:  152   51   70 22.1   0.8   0.9    101     62        1 13.1  90      199
   5:  159   50   73 19.8   0.7   0.8    132     78        1 13.0  92      162
  ---                                                                         
1640:  171   76   88 26.0   1.2   1.2    125     89        1 14.1  77      363
1641:  179   80   88 25.0   1.0   0.7    126     83        1 14.9 119      106
1642:  163   63   80 23.7   1.5   1.5    118     77        1 16.1 160      271
1643:  165   70   92 25.7   0.6   0.8    120     80        1 13.6  81      289
1644:  168   84  103 29.8   0.9   0.8    140     85        1 15.2 134      210
       TG HDL LDL CRTN SGOT SGPT GGT GFR BMI2
   1: 128  60 179  0.9   25   20  25  59 29.4
   2:  92  70  80  0.9   18   15  28  74 19.4
   3: 132  55 134  0.8   26   30  30  79 24.5
   4: 100  65 114  0.9   18   14  11  61 22.1
   5:  58  40 111  0.9   24   23  15  49 19.8
  ---                                        
1640: 280  63 244  0.9   19   32  95  96 26.0
1641: 247  29  50  0.9  101  157  62  79 25.0
1642: 514  51 151  1.0   40   66 104  85 23.7
1643: 810  52  NA  1.0   37   33  76  86 25.7
1644: 298  56  94  1.1   55   49  66  66 29.8

Mutate

새로운 변수 생성

dt[, `:=`(BP_SYS140 = factor(as.integer(BP_SYS >= 140)), BMI25 = factor(as.integer(BMI >= 25)))][]
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
   1:         2009  562083     200909            0             0            1
   2:         2009  334536     200911            0             0            0
   3:         2009  911867     200903            0             0            0
   4:         2009  183321     200908           NA            NA           NA
   5:         2009  942671     200909           NA            NA           NA
  ---                                                                        
1640:         2015  266734     201504           NA            NA           NA
1641:         2015  343874     201511            0             0            0
1642:         2015  798472     201507           NA            NA           NA
1643:         2015  720080     201505           NA            NA           NA
1644:         2015  127519     201511            0             0            1
      Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
   1:           0            0           NA        3        1              0
   2:           0            0           NA        2        1              0
   3:           0            0           NA        3        1              0
   4:          NA           NA           NA        3        1              0
   5:          NA           NA           NA        3        1              0
  ---                                                                       
1640:          NA           NA           NA        1        2              2
1641:           1            0            0        2        2              0
1642:          NA           NA           NA        3        2              0
1643:          NA            1           NA        2        2              1
1644:           0            0            0        3        2              4
      HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
   1:  144   61   90 29.4   0.7   0.8    120     80        1 12.6 117      264
   2:  162   51   63 19.4   0.8   1.0    120     80        1 13.8  96      169
   3:  163   65   82 24.5   0.7   0.6    130     80        1 15.0 118      216
   4:  152   51   70 22.1   0.8   0.9    101     62        1 13.1  90      199
   5:  159   50   73 19.8   0.7   0.8    132     78        1 13.0  92      162
  ---                                                                         
1640:  171   76   88 26.0   1.2   1.2    125     89        1 14.1  77      363
1641:  179   80   88 25.0   1.0   0.7    126     83        1 14.9 119      106
1642:  163   63   80 23.7   1.5   1.5    118     77        1 16.1 160      271
1643:  165   70   92 25.7   0.6   0.8    120     80        1 13.6  81      289
1644:  168   84  103 29.8   0.9   0.8    140     85        1 15.2 134      210
       TG HDL LDL CRTN SGOT SGPT GGT GFR BMI2 BP_SYS140 BMI25
   1: 128  60 179  0.9   25   20  25  59 29.4         0     1
   2:  92  70  80  0.9   18   15  28  74 19.4         0     0
   3: 132  55 134  0.8   26   30  30  79 24.5         0     0
   4: 100  65 114  0.9   18   14  11  61 22.1         0     0
   5:  58  40 111  0.9   24   23  15  49 19.8         0     0
  ---                                                        
1640: 280  63 244  0.9   19   32  95  96 26.0         0     1
1641: 247  29  50  0.9  101  157  62  79 25.0         0     1
1642: 514  51 151  1.0   40   66 104  85 23.7         0     0
1643: 810  52  NA  1.0   37   33  76  86 25.7         0     1
1644: 298  56  94  1.1   55   49  66  66 29.8         1     1

Mutate

BMI2 열 삭제

dt[, BMI2 := NULL][]
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
   1:         2009  562083     200909            0             0            1
   2:         2009  334536     200911            0             0            0
   3:         2009  911867     200903            0             0            0
   4:         2009  183321     200908           NA            NA           NA
   5:         2009  942671     200909           NA            NA           NA
  ---                                                                        
1640:         2015  266734     201504           NA            NA           NA
1641:         2015  343874     201511            0             0            0
1642:         2015  798472     201507           NA            NA           NA
1643:         2015  720080     201505           NA            NA           NA
1644:         2015  127519     201511            0             0            1
      Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
   1:           0            0           NA        3        1              0
   2:           0            0           NA        2        1              0
   3:           0            0           NA        3        1              0
   4:          NA           NA           NA        3        1              0
   5:          NA           NA           NA        3        1              0
  ---                                                                       
1640:          NA           NA           NA        1        2              2
1641:           1            0            0        2        2              0
1642:          NA           NA           NA        3        2              0
1643:          NA            1           NA        2        2              1
1644:           0            0            0        3        2              4
      HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
   1:  144   61   90 29.4   0.7   0.8    120     80        1 12.6 117      264
   2:  162   51   63 19.4   0.8   1.0    120     80        1 13.8  96      169
   3:  163   65   82 24.5   0.7   0.6    130     80        1 15.0 118      216
   4:  152   51   70 22.1   0.8   0.9    101     62        1 13.1  90      199
   5:  159   50   73 19.8   0.7   0.8    132     78        1 13.0  92      162
  ---                                                                         
1640:  171   76   88 26.0   1.2   1.2    125     89        1 14.1  77      363
1641:  179   80   88 25.0   1.0   0.7    126     83        1 14.9 119      106
1642:  163   63   80 23.7   1.5   1.5    118     77        1 16.1 160      271
1643:  165   70   92 25.7   0.6   0.8    120     80        1 13.6  81      289
1644:  168   84  103 29.8   0.9   0.8    140     85        1 15.2 134      210
       TG HDL LDL CRTN SGOT SGPT GGT GFR BP_SYS140 BMI25
   1: 128  60 179  0.9   25   20  25  59         0     1
   2:  92  70  80  0.9   18   15  28  74         0     0
   3: 132  55 134  0.8   26   30  30  79         0     0
   4: 100  65 114  0.9   18   14  11  61         0     0
   5:  58  40 111  0.9   24   23  15  49         0     0
  ---                                                   
1640: 280  63 244  0.9   19   32  95  96         0     1
1641: 247  29  50  0.9  101  157  62  79         0     1
1642: 514  51 151  1.0   40   66 104  85         0     0
1643: 810  52  NA  1.0   37   33  76  86         0     1
1644: 298  56  94  1.1   55   49  66  66         1     1

Specific symbol

.SD: Subset of Data

dt[, .SD]   # all column
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
   1:         2009  562083     200909            0             0            1
   2:         2009  334536     200911            0             0            0
   3:         2009  911867     200903            0             0            0
   4:         2009  183321     200908           NA            NA           NA
   5:         2009  942671     200909           NA            NA           NA
  ---                                                                        
1640:         2015  266734     201504           NA            NA           NA
1641:         2015  343874     201511            0             0            0
1642:         2015  798472     201507           NA            NA           NA
1643:         2015  720080     201505           NA            NA           NA
1644:         2015  127519     201511            0             0            1
      Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
   1:           0            0           NA        3        1              0
   2:           0            0           NA        2        1              0
   3:           0            0           NA        3        1              0
   4:          NA           NA           NA        3        1              0
   5:          NA           NA           NA        3        1              0
  ---                                                                       
1640:          NA           NA           NA        1        2              2
1641:           1            0            0        2        2              0
1642:          NA           NA           NA        3        2              0
1643:          NA            1           NA        2        2              1
1644:           0            0            0        3        2              4
      HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
   1:  144   61   90 29.4   0.7   0.8    120     80        1 12.6 117      264
   2:  162   51   63 19.4   0.8   1.0    120     80        1 13.8  96      169
   3:  163   65   82 24.5   0.7   0.6    130     80        1 15.0 118      216
   4:  152   51   70 22.1   0.8   0.9    101     62        1 13.1  90      199
   5:  159   50   73 19.8   0.7   0.8    132     78        1 13.0  92      162
  ---                                                                         
1640:  171   76   88 26.0   1.2   1.2    125     89        1 14.1  77      363
1641:  179   80   88 25.0   1.0   0.7    126     83        1 14.9 119      106
1642:  163   63   80 23.7   1.5   1.5    118     77        1 16.1 160      271
1643:  165   70   92 25.7   0.6   0.8    120     80        1 13.6  81      289
1644:  168   84  103 29.8   0.9   0.8    140     85        1 15.2 134      210
       TG HDL LDL CRTN SGOT SGPT GGT GFR BP_SYS140 BMI25
   1: 128  60 179  0.9   25   20  25  59         0     1
   2:  92  70  80  0.9   18   15  28  74         0     0
   3: 132  55 134  0.8   26   30  30  79         0     0
   4: 100  65 114  0.9   18   14  11  61         0     0
   5:  58  40 111  0.9   24   23  15  49         0     0
  ---                                                   
1640: 280  63 244  0.9   19   32  95  96         0     1
1641: 247  29  50  0.9  101  157  62  79         0     1
1642: 514  51 151  1.0   40   66 104  85         0     0
1643: 810  52  NA  1.0   37   33  76  86         0     1
1644: 298  56  94  1.1   55   49  66  66         1     1

Specific symbol

.SD: Subset of Data

dt[, lapply(.SD, class)]
   EXMD_BZ_YYYY RN_INDI HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
1:      integer integer    integer      integer       integer      integer
   Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
1:     integer      integer      integer  integer  integer        integer
      HGHT    WGHT    WSTC     BMI   VA_LT   VA_RT  BP_SYS  BP_DIA URN_PROT
1: integer integer integer numeric numeric numeric integer integer  integer
       HGB     FBS TOT_CHOL      TG     HDL     LDL    CRTN    SGOT    SGPT
1: numeric integer  integer integer integer integer numeric integer integer
       GGT     GFR BP_SYS140  BMI25
1: integer integer    factor factor

Specific symbol

.SD: Subset of Data

dt[order(EXMD_BZ_YYYY), .SD[1], keyby = "RN_INDI"]
     RN_INDI EXMD_BZ_YYYY HME_YYYYMM Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN
  1:    2270         2010     201012            0             0            0
  2:    3690         2010     201005            0             0            0
  3:    4263         2009     200910           NA            NA           NA
  4:    4664         2009     200910            0             0            0
  5:    5707         2010     201007           NA            NA           NA
 ---                                                                        
543:  995838         2013     201312            0             0            0
544:  999693         2010     201003            0             0            0
545: 1005250         2012     201204            0             0            0
546: 1009573         2009     200904            0             0            0
547: 1010623         2010     201005            0             0            0
     Q_PHX_DX_DM Q_PHX_DX_DLD Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
  1:           0            0            0        2        1              2
  2:           0            0            0        2        3              1
  3:          NA           NA           NA        2        2              1
  4:           0            0           NA        2        1              0
  5:          NA           NA           NA        3        1              1
 ---                                                                       
543:           0            0            0        2        1              0
544:           0            0            0        2        2              1
545:           0            0            0        3        3              1
546:           0            1           NA        2        2              2
547:           0            0            0        3        3              1
     HGHT WGHT WSTC  BMI VA_LT VA_RT BP_SYS BP_DIA URN_PROT  HGB FBS TOT_CHOL
  1:  161   58   68 22.4   2.0   1.5    119     72        1 14.1 102      220
  2:  159   51   77 20.2   1.0   0.9    110     60        1 13.3  89      145
  3:  175   66   78 21.6   1.5   2.0    110     70        1 13.5  79      264
  4:  160   53   63 20.7   0.9   0.9    110     80        1 12.7  84      192
  5:  160   50   67 19.5   0.7   1.0     98     51        1 13.1  83      182
 ---                                                                         
543:  156   44   66 18.1   1.2   1.0    100     60        1 13.6 118      242
544:  167   70   92 25.1   1.0   1.2    127     79        1 14.8  97      289
545:  181   72   78 22.0   1.5   1.5    115     70        1 14.0 102      171
546:  170   74   88 25.6   1.2   1.2    110     70        1 13.8  70      185
547:  173   80   86 26.7   0.6   0.9    110     72        1 15.5  92      234
      TG HDL LDL CRTN SGOT SGPT GGT GFR BP_SYS140 BMI25
  1:  99  55 145  0.8   17   13  17  NA         0     0
  2: 101  47  78  0.6   20   13  22  NA         0     0
  3:  54  81 172  1.2   15   15  17  77         0     0
  4:  74  57 120  0.9   17    9   9  70         0     0
  5:  68  67 101  0.7   11    7  10  NA         0     0
 ---                                                   
543: 170  62 146  0.9   17   13  11  73         0     0
544: 685  45  NA  1.0   28   38  64  NA         0     1
545:  58  69  NA  1.2   21   18  19  80         0     0
546:  46 117  58  1.0   21   31  24  89         0     1
547: 427  52  NA  1.1   19   20  21  NA         0     1

Specific symbol

.SDcols: 연산 대상이 되는 특정 열 지정

dt[order(EXMD_BZ_YYYY), .SD[1], .SDcols = colvars, keyby = "RN_INDI"]
     RN_INDI Q_PHX_DX_STK Q_PHX_DX_HTDZ Q_PHX_DX_HTN Q_PHX_DX_DM Q_PHX_DX_DLD
  1:    2270            0             0            0           0            0
  2:    3690            0             0            0           0            0
  3:    4263           NA            NA           NA          NA           NA
  4:    4664            0             0            0           0            0
  5:    5707           NA            NA           NA          NA           NA
 ---                                                                         
543:  995838            0             0            0           0            0
544:  999693            0             0            0           0            0
545: 1005250            0             0            0           0            0
546: 1009573            0             0            0           0            1
547: 1010623            0             0            0           0            0
     Q_PHX_DX_PTB Q_HBV_AG Q_SMK_YN Q_DRK_FRQ_V09N
  1:            0        2        1              2
  2:            0        2        3              1
  3:           NA        2        2              1
  4:           NA        2        1              0
  5:           NA        3        1              1
 ---                                              
543:            0        2        1              0
544:            0        2        2              1
545:            0        3        3              1
546:           NA        2        2              2
547:            0        3        3              1

Specific symbol

.N: length()

dt[, .N, keyby = "RN_INDI"]
     RN_INDI N
  1:    2270 2
  2:    3690 1
  3:    4263 7
  4:    4664 4
  5:    5707 2
 ---          
543:  995838 2
544:  999693 2
545: 1005250 4
546: 1009573 6
547: 1010623 4

melt

melt

wide to long

dt.long1 <- melt(dt, 
                 id.vars = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM"),   # 고정할 열
                 measure.vars = c("TOT_CHOL", "TG", "HDL", "LDL"),   # 재구조화할 열
                 variable.name = "Lipid",   # 재구조화한 후 variable 열의 이름
                 value.name = "Value")   # 재구조화한 후 value 열의 이름
dt.long1
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM    Lipid Value
   1:         2009  562083     200909 TOT_CHOL   264
   2:         2009  334536     200911 TOT_CHOL   169
   3:         2009  911867     200903 TOT_CHOL   216
   4:         2009  183321     200908 TOT_CHOL   199
   5:         2009  942671     200909 TOT_CHOL   162
  ---                                               
6572:         2015  266734     201504      LDL   244
6573:         2015  343874     201511      LDL    50
6574:         2015  798472     201507      LDL   151
6575:         2015  720080     201505      LDL    NA
6576:         2015  127519     201511      LDL    94

melt

동시에 여러 개의 열로 melt 가능

col1 <- c("BP_SYS", "BP_DIA")
col2 <- c("VA_LT", "VA_RT")
dt.long2 <- melt(dt,
                 id.vars = c("EXMD_BZ_YYYY", "RN_INDI", "HME_YYYYMM"),
                 measure = list(col1, col2),
                 value.name = c("BP", "VA"))
dt.long2
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM variable  BP  VA
   1:         2009  562083     200909        1 120 0.7
   2:         2009  334536     200911        1 120 0.8
   3:         2009  911867     200903        1 130 0.7
   4:         2009  183321     200908        1 101 0.8
   5:         2009  942671     200909        1 132 0.7
  ---                                                 
3284:         2015  266734     201504        2  89 1.2
3285:         2015  343874     201511        2  83 0.7
3286:         2015  798472     201507        2  77 1.5
3287:         2015  720080     201505        2  80 0.8
3288:         2015  127519     201511        2  85 0.8

dcast

long to wide

dt.wide1 <- dcast(dt.long1, EXMD_BZ_YYYY + RN_INDI + HME_YYYYMM ~ Lipid, value.var = "Value")
dt.wide1
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM TOT_CHOL  TG HDL LDL
   1:         2009    4263     200910      264  54  81 172
   2:         2009    4664     200910      192  74  57 120
   3:         2009    9866     200912      112 116  48  40
   4:         2009   17079     200911      161  52  72  78
   5:         2009   26776     200902      189  62  59 117
  ---                                                     
1640:         2015  985608     201512      189  61  70 106
1641:         2015  992522     201512      180  81 105  59
1642:         2015  995240     201511      222 319  74  84
1643:         2015 1005250     201504      159 105  60  78
1644:         2015 1010623     201502      244 440  48  NA

dcast

그룹별 요약 통계량을 계산한 결과를 재구조화하여 반환

dt.wide2 <- dcast(dt.long1, RN_INDI ~ Lipid, value.var = "Value", fun.aggregate = mean, na.rm =T)
dt.wide2
     RN_INDI TOT_CHOL        TG      HDL      LDL
  1:    2270 205.5000 105.50000 58.00000 126.5000
  2:    3690 145.0000 101.00000 47.00000  78.0000
  3:    4263 243.4286  47.14286 75.71429 158.0000
  4:    4664 180.5000  88.25000 52.50000 110.0000
  5:    5707 209.0000  78.00000 68.50000 124.5000
 ---                                             
543:  995838 243.5000 122.00000 64.00000 155.0000
544:  999693 268.5000 454.00000 42.50000 163.0000
545: 1005250 158.0000  65.00000 64.50000  77.0000
546: 1009573 229.6667 153.16667 52.83333 145.8333
547: 1010623 222.5000 351.00000 48.75000 104.5000

dcast

여러 열을 동시에 dcast 가능

dt.wide3 <- dcast(dt.long2, ... ~ variable, value.var = c("BP", "VA"))
dt.wide3
      EXMD_BZ_YYYY RN_INDI HME_YYYYMM BP_1 BP_2 VA_1 VA_2
   1:         2009    4263     200910  110   70  1.5  2.0
   2:         2009    4664     200910  110   80  0.9  0.9
   3:         2009    9866     200912  121   63  1.0  0.5
   4:         2009   17079     200911  106   60  1.0  0.8
   5:         2009   26776     200902  118   73  0.8  1.0
  ---                                                    
1640:         2015  985608     201512  117   70  1.0  1.0
1641:         2015  992522     201512  116   67  0.7  0.5
1642:         2015  995240     201511  130   80  1.5  1.5
1643:         2015 1005250     201504  110   75  1.2  1.5
1644:         2015 1010623     201502  123   85  0.8  0.8