class: center, middle, inverse, title-slide .title[ # Introduction to Data Science ] .subtitle[ ## Week 5: Data Manipulation (Cont’d) and Visualization ] .author[ ### Ugur Aytun ] .institute[ ### METU, Department of Economics | ECON 413 ] --- --- # Import larger data -- - BACI 2022 trade network data by CEPII: -- - t: year, i: origin (UN country code), j: destination (UN country code), k: six-digit HS product, v: flow value (thousand USD), q: weight (tonnage) ``` r library(data.table) ``` ``` ## Warning: package 'data.table' was built under R version 4.3.3 ``` ``` r setwd("H:/My Drive/ECON413") trade_dataset_2022 = setDT(fread("Data/BACI_HS02_Y2022_V202401b.csv")) trade_dataset_2022[, .N] # number of observations ``` ``` ## [1] 11113911 ``` ``` r summary(trade_dataset_2022) ``` ``` ## t i j k ## Min. :2022 Min. : 4.0 Min. : 4.0 Min. : 10110 ## 1st Qu.:2022 1st Qu.:246.0 1st Qu.:226.0 1st Qu.:392099 ## Median :2022 Median :442.0 Median :440.0 Median :640399 ## Mean :2022 Mean :460.6 Mean :443.1 Mean :609260 ## 3rd Qu.:2022 3rd Qu.:704.0 3rd Qu.:686.0 3rd Qu.:846791 ## Max. :2022 Max. :894.0 Max. :894.0 Max. :970600 ## ## v q ## Min. : 0 Min. : 0 ## 1st Qu.: 1 1st Qu.: 0 ## Median : 10 Median : 1 ## Mean : 2119 Mean : 1314 ## 3rd Qu.: 105 3rd Qu.: 10 ## Max. :117397713 Max. :743788774 ## NA's :360796 ``` --- # Import larger data (cont'd) -- - Turkey's total export in 2022? ``` r setwd("H:/My Drive/ECON413") trade_dataset_2022 = setDT(fread("Data/BACI_HS02_Y2022_V202401b.csv")) trade_dataset_2022[i == 792, sum(v, na.rm = TRUE)] ``` ``` ## [1] 262640430 ``` -- - Turkey's total export to Germany in 2022? ``` r trade_dataset_2022[i == 792 & j == 276, sum(v, na.rm = TRUE)] ``` ``` ## [1] 21828137 ``` --- # Import larger data (cont'd) -- - Create a dataset showing Turkey's total export by each destination country in descending order ``` r partners <- trade_dataset_2022[i == 792, .(total_export = sum(v, na.rm = TRUE)), by = c("j")][order(-total_export)] partners ``` ``` ## j total_export ## <int> <num> ## 1: 276 21828137.114 ## 2: 842 17135635.726 ## 3: 826 13753341.300 ## 4: 368 13742031.076 ## 5: 380 12567485.738 ## --- ## 215: 304 16.555 ## 216: 534 0.839 ## 217: 500 0.354 ## 218: 162 0.020 ## 219: 520 0.007 ``` --- # Binding two dataset -- - 2021 data? Let's call it and merge with 2022. ``` r setwd("H:/My Drive/ECON413") trade_dataset_2021 <- setDT(fread("Data/BACI_HS02_Y2021_V202401b.csv")) trade_dataset <- rbind(trade_dataset_2022, # 11,113,911 obs. trade_dataset_2021) # 11,358,186 obs. trade_dataset[, .N] ``` ``` ## [1] 22472097 ``` -- - Turkey's total robot export by year ``` r trade_dataset[j == 792 & k == 847950, sum(v, na.rm = TRUE), by = c("t")] ``` ``` ## t V1 ## <int> <num> ## 1: 2022 140851.3 ## 2: 2021 101729.9 ``` --- # Merging country and product names -- - Tired of checking the country codes online? Lets merge them. ``` r library(countrycode) ``` ``` ## Warning: package 'countrycode' was built under R version 4.3.3 ``` ``` r countries <- codelist countries <- setDT(countries) # convert countries to data.table names(countries) # see the variable names and find appropriate columns ``` ``` ## [1] "ar5" "cctld" ## [3] "continent" "country.name.de" ## [5] "country.name.de.regex" "country.name.en" ## [7] "country.name.en.regex" "country.name.fr" ## [9] "country.name.fr.regex" "country.name.it" ## [11] "country.name.it.regex" "cow.name" ## [13] "cowc" "cown" ## [15] "currency" "dhs" ## [17] "ecb" "eu28" ## [19] "eurocontrol_pru" "eurocontrol_statfor" ## [21] "eurostat" "fao" ## [23] "fips" "gaul" ## [25] "genc2c" "genc3c" ## [27] "genc3n" "gwc" ## [29] "gwn" "icao.region" ## [31] "imf" "ioc" ## [33] "iso.name.en" "iso.name.fr" ## [35] "iso2c" "iso3c" ## [37] "iso3n" "iso4217c" ## [39] "iso4217n" "p4.name" ## [41] "p4c" "p4n" ## [43] "p5c" "p5n" ## [45] "region" "region23" ## [47] "un" "un.name.ar" ## [49] "un.name.en" "un.name.es" ## [51] "un.name.fr" "un.name.ru" ## [53] "un.name.zh" "un.region.code" ## [55] "un.region.name" "un.regionintermediate.code" ## [57] "un.regionintermediate.name" "un.regionsub.code" ## [59] "un.regionsub.name" "unhcr" ## [61] "unhcr.region" "unicode.symbol" ## [63] "unpd" "vdem" ## [65] "vdem.name" "wb" ## [67] "wb_api2c" "wb_api3c" ## [69] "wvs" "cldr.name.af" ## [71] "cldr.name.agq" "cldr.name.ak" ## [73] "cldr.name.am" "cldr.name.ar" ## [75] "cldr.name.ar_ly" "cldr.name.ar_sa" ## [77] "cldr.name.as" "cldr.name.asa" ## [79] "cldr.name.ast" "cldr.name.az" ## [81] "cldr.name.az_cyrl" "cldr.name.bas" ## [83] "cldr.name.be" "cldr.name.bem" ## [85] "cldr.name.bez" "cldr.name.bg" ## [87] "cldr.name.bm" "cldr.name.bn" ## [89] "cldr.name.bo" "cldr.name.br" ## [91] "cldr.name.brx" "cldr.name.bs" ## [93] "cldr.name.bs_cyrl" "cldr.name.ca" ## [95] "cldr.name.ccp" "cldr.name.ce" ## [97] "cldr.name.ceb" "cldr.name.cgg" ## [99] "cldr.name.chr" "cldr.name.ckb" ## [101] "cldr.name.cs" "cldr.name.cu" ## [103] "cldr.name.cy" "cldr.name.da" ## [105] "cldr.name.dav" "cldr.name.de" ## [107] "cldr.name.de_at" "cldr.name.de_ch" ## [109] "cldr.name.dje" "cldr.name.dsb" ## [111] "cldr.name.dua" "cldr.name.dyo" ## [113] "cldr.name.dz" "cldr.name.ee" ## [115] "cldr.name.el" "cldr.name.en" ## [117] "cldr.name.en_001" "cldr.name.en_au" ## [119] "cldr.name.eo" "cldr.name.es" ## [121] "cldr.name.es_419" "cldr.name.es_ar" ## [123] "cldr.name.es_cl" "cldr.name.es_mx" ## [125] "cldr.name.es_us" "cldr.name.et" ## [127] "cldr.name.eu" "cldr.name.ewo" ## [129] "cldr.name.fa" "cldr.name.fa_af" ## [131] "cldr.name.ff" "cldr.name.fi" ## [133] "cldr.name.fil" "cldr.name.fo" ## [135] "cldr.name.fr" "cldr.name.fr_be" ## [137] "cldr.name.fr_ca" "cldr.name.fur" ## [139] "cldr.name.fy" "cldr.name.ga" ## [141] "cldr.name.gd" "cldr.name.gl" ## [143] "cldr.name.gsw" "cldr.name.gu" ## [145] "cldr.name.gv" "cldr.name.ha" ## [147] "cldr.name.haw" "cldr.name.he" ## [149] "cldr.name.hi" "cldr.name.hr" ## [151] "cldr.name.hsb" "cldr.name.hu" ## [153] "cldr.name.hy" "cldr.name.ia" ## [155] "cldr.name.id" "cldr.name.ig" ## [157] "cldr.name.ii" "cldr.name.is" ## [159] "cldr.name.it" "cldr.name.ja" ## [161] "cldr.name.jgo" "cldr.name.jv" ## [163] "cldr.name.ka" "cldr.name.kab" ## [165] "cldr.name.kam" "cldr.name.kde" ## [167] "cldr.name.kea" "cldr.name.khq" ## [169] "cldr.name.ki" "cldr.name.kk" ## [171] "cldr.name.kkj" "cldr.name.kl" ## [173] "cldr.name.kln" "cldr.name.km" ## [175] "cldr.name.kn" "cldr.name.ko" ## [177] "cldr.name.ko_kp" "cldr.name.kok" ## [179] "cldr.name.ks" "cldr.name.ksb" ## [181] "cldr.name.ksf" "cldr.name.ksh" ## [183] "cldr.name.ku" "cldr.name.kw" ## [185] "cldr.name.ky" "cldr.name.lag" ## [187] "cldr.name.lb" "cldr.name.lg" ## [189] "cldr.name.lkt" "cldr.name.ln" ## [191] "cldr.name.lo" "cldr.name.lrc" ## [193] "cldr.name.lt" "cldr.name.lu" ## [195] "cldr.name.luo" "cldr.name.luy" ## [197] "cldr.name.lv" "cldr.name.mas" ## [199] "cldr.name.mer" "cldr.name.mfe" ## [201] "cldr.name.mg" "cldr.name.mgh" ## [203] "cldr.name.mgo" "cldr.name.mi" ## [205] "cldr.name.mk" "cldr.name.ml" ## [207] "cldr.name.mn" "cldr.name.mr" ## [209] "cldr.name.ms" "cldr.name.mt" ## [211] "cldr.name.mua" "cldr.name.my" ## [213] "cldr.name.mzn" "cldr.name.naq" ## [215] "cldr.name.nb" "cldr.name.nd" ## [217] "cldr.name.ne" "cldr.name.nl" ## [219] "cldr.name.nmg" "cldr.name.nn" ## [221] "cldr.name.nnh" "cldr.name.nus" ## [223] "cldr.name.nyn" "cldr.name.om" ## [225] "cldr.name.or" "cldr.name.os" ## [227] "cldr.name.pa" "cldr.name.pa_arab" ## [229] "cldr.name.pl" "cldr.name.ps" ## [231] "cldr.name.ps_pk" "cldr.name.pt" ## [233] "cldr.name.pt_ao" "cldr.name.qu" ## [235] "cldr.name.rm" "cldr.name.rn" ## [237] "cldr.name.ro" "cldr.name.ro_md" ## [239] "cldr.name.rof" "cldr.name.ru" ## [241] "cldr.name.ru_ua" "cldr.name.rw" ## [243] "cldr.name.sah" "cldr.name.sbp" ## [245] "cldr.name.sd" "cldr.name.se" ## [247] "cldr.name.se_fi" "cldr.name.seh" ## [249] "cldr.name.sg" "cldr.name.shi" ## [251] "cldr.name.shi_latn" "cldr.name.si" ## [253] "cldr.name.sk" "cldr.name.sl" ## [255] "cldr.name.smn" "cldr.name.sn" ## [257] "cldr.name.so" "cldr.name.sq" ## [259] "cldr.name.sr" "cldr.name.sr_cyrl_ba" ## [261] "cldr.name.sr_cyrl_me" "cldr.name.sr_cyrl_xk" ## [263] "cldr.name.sr_latn" "cldr.name.sr_latn_ba" ## [265] "cldr.name.sr_latn_me" "cldr.name.sr_latn_xk" ## [267] "cldr.name.sv" "cldr.name.sw" ## [269] "cldr.name.sw_cd" "cldr.name.sw_ke" ## [271] "cldr.name.ta" "cldr.name.te" ## [273] "cldr.name.teo" "cldr.name.tg" ## [275] "cldr.name.th" "cldr.name.ti" ## [277] "cldr.name.tk" "cldr.name.to" ## [279] "cldr.name.tr" "cldr.name.tt" ## [281] "cldr.name.twq" "cldr.name.tzm" ## [283] "cldr.name.ug" "cldr.name.uk" ## [285] "cldr.name.ur" "cldr.name.ur_in" ## [287] "cldr.name.uz" "cldr.name.uz_arab" ## [289] "cldr.name.uz_cyrl" "cldr.name.vai" ## [291] "cldr.name.vai_latn" "cldr.name.vi" ## [293] "cldr.name.wae" "cldr.name.wo" ## [295] "cldr.name.xh" "cldr.name.xog" ## [297] "cldr.name.yav" "cldr.name.yi" ## [299] "cldr.name.yo" "cldr.name.yo_bj" ## [301] "cldr.name.yue" "cldr.name.yue_hans" ## [303] "cldr.name.zgh" "cldr.name.zh" ## [305] "cldr.name.zh_hant" "cldr.name.zh_hant_hk" ## [307] "cldr.name.zu" "cldr.short.af" ## [309] "cldr.short.am" "cldr.short.ar" ## [311] "cldr.short.ar_ly" "cldr.short.ar_sa" ## [313] "cldr.short.as" "cldr.short.ast" ## [315] "cldr.short.az" "cldr.short.az_cyrl" ## [317] "cldr.short.be" "cldr.short.bg" ## [319] "cldr.short.bn" "cldr.short.br" ## [321] "cldr.short.bs" "cldr.short.bs_cyrl" ## [323] "cldr.short.ca" "cldr.short.ccp" ## [325] "cldr.short.ce" "cldr.short.ceb" ## [327] "cldr.short.chr" "cldr.short.ckb" ## [329] "cldr.short.cs" "cldr.short.cy" ## [331] "cldr.short.da" "cldr.short.de" ## [333] "cldr.short.de_at" "cldr.short.de_ch" ## [335] "cldr.short.dsb" "cldr.short.dz" ## [337] "cldr.short.ee" "cldr.short.el" ## [339] "cldr.short.en" "cldr.short.en_001" ## [341] "cldr.short.en_au" "cldr.short.es" ## [343] "cldr.short.es_419" "cldr.short.es_ar" ## [345] "cldr.short.es_cl" "cldr.short.es_mx" ## [347] "cldr.short.es_us" "cldr.short.et" ## [349] "cldr.short.eu" "cldr.short.fa" ## [351] "cldr.short.fa_af" "cldr.short.fi" ## [353] "cldr.short.fil" "cldr.short.fo" ## [355] "cldr.short.fr" "cldr.short.fr_be" ## [357] "cldr.short.fr_ca" "cldr.short.fur" ## [359] "cldr.short.fy" "cldr.short.ga" ## [361] "cldr.short.gd" "cldr.short.gl" ## [363] "cldr.short.gsw" "cldr.short.gu" ## [365] "cldr.short.he" "cldr.short.hi" ## [367] "cldr.short.hr" "cldr.short.hsb" ## [369] "cldr.short.hu" "cldr.short.hy" ## [371] "cldr.short.ia" "cldr.short.id" ## [373] "cldr.short.ig" "cldr.short.is" ## [375] "cldr.short.it" "cldr.short.ja" ## [377] "cldr.short.jv" "cldr.short.ka" ## [379] "cldr.short.kea" "cldr.short.kk" ## [381] "cldr.short.km" "cldr.short.kn" ## [383] "cldr.short.ko" "cldr.short.ko_kp" ## [385] "cldr.short.kok" "cldr.short.ksh" ## [387] "cldr.short.ku" "cldr.short.ky" ## [389] "cldr.short.lb" "cldr.short.lo" ## [391] "cldr.short.lt" "cldr.short.lv" ## [393] "cldr.short.mk" "cldr.short.ml" ## [395] "cldr.short.mn" "cldr.short.mr" ## [397] "cldr.short.ms" "cldr.short.mt" ## [399] "cldr.short.my" "cldr.short.mzn" ## [401] "cldr.short.nb" "cldr.short.ne" ## [403] "cldr.short.nl" "cldr.short.nn" ## [405] "cldr.short.or" "cldr.short.pa" ## [407] "cldr.short.pl" "cldr.short.ps" ## [409] "cldr.short.ps_pk" "cldr.short.pt" ## [411] "cldr.short.pt_ao" "cldr.short.ro" ## [413] "cldr.short.ro_md" "cldr.short.ru" ## [415] "cldr.short.ru_ua" "cldr.short.sah" ## [417] "cldr.short.sd" "cldr.short.se" ## [419] "cldr.short.se_fi" "cldr.short.si" ## [421] "cldr.short.sk" "cldr.short.sl" ## [423] "cldr.short.smn" "cldr.short.so" ## [425] "cldr.short.sq" "cldr.short.sr" ## [427] "cldr.short.sr_cyrl_ba" "cldr.short.sr_cyrl_me" ## [429] "cldr.short.sr_cyrl_xk" "cldr.short.sr_latn" ## [431] "cldr.short.sr_latn_ba" "cldr.short.sr_latn_me" ## [433] "cldr.short.sr_latn_xk" "cldr.short.sv" ## [435] "cldr.short.sw" "cldr.short.sw_cd" ## [437] "cldr.short.sw_ke" "cldr.short.ta" ## [439] "cldr.short.te" "cldr.short.tg" ## [441] "cldr.short.th" "cldr.short.ti" ## [443] "cldr.short.tk" "cldr.short.to" ## [445] "cldr.short.tr" "cldr.short.tt" ## [447] "cldr.short.ug" "cldr.short.uk" ## [449] "cldr.short.ur" "cldr.short.ur_in" ## [451] "cldr.short.uz" "cldr.short.uz_cyrl" ## [453] "cldr.short.vai" "cldr.short.vi" ## [455] "cldr.short.wae" "cldr.short.wo" ## [457] "cldr.short.yi" "cldr.short.yo" ## [459] "cldr.short.yo_bj" "cldr.short.yue" ## [461] "cldr.short.yue_hans" "cldr.short.zh" ## [463] "cldr.short.zh_hant" "cldr.short.zh_hant_hk" ## [465] "cldr.short.zu" "cldr.variant.af" ## [467] "cldr.variant.am" "cldr.variant.ar" ## [469] "cldr.variant.ar_ae" "cldr.variant.ar_ly" ## [471] "cldr.variant.ar_sa" "cldr.variant.as" ## [473] "cldr.variant.ast" "cldr.variant.az" ## [475] "cldr.variant.az_cyrl" "cldr.variant.be" ## [477] "cldr.variant.bg" "cldr.variant.bn" ## [479] "cldr.variant.bn_in" "cldr.variant.br" ## [481] "cldr.variant.bs" "cldr.variant.bs_cyrl" ## [483] "cldr.variant.ca" "cldr.variant.ccp" ## [485] "cldr.variant.ce" "cldr.variant.ceb" ## [487] "cldr.variant.chr" "cldr.variant.ckb" ## [489] "cldr.variant.cs" "cldr.variant.cy" ## [491] "cldr.variant.da" "cldr.variant.de" ## [493] "cldr.variant.de_at" "cldr.variant.de_ch" ## [495] "cldr.variant.dsb" "cldr.variant.dz" ## [497] "cldr.variant.ee" "cldr.variant.el" ## [499] "cldr.variant.en" "cldr.variant.en_001" ## [501] "cldr.variant.en_au" "cldr.variant.es" ## [503] "cldr.variant.es_419" "cldr.variant.es_ar" ## [505] "cldr.variant.es_cl" "cldr.variant.es_mx" ## [507] "cldr.variant.es_us" "cldr.variant.et" ## [509] "cldr.variant.eu" "cldr.variant.fa" ## [511] "cldr.variant.fa_af" "cldr.variant.fi" ## [513] "cldr.variant.fil" "cldr.variant.fo" ## [515] "cldr.variant.fr" "cldr.variant.fr_be" ## [517] "cldr.variant.fr_ca" "cldr.variant.fur" ## [519] "cldr.variant.fy" "cldr.variant.ga" ## [521] "cldr.variant.gd" "cldr.variant.gl" ## [523] "cldr.variant.gu" "cldr.variant.ha" ## [525] "cldr.variant.he" "cldr.variant.hi" ## [527] "cldr.variant.hr" "cldr.variant.hsb" ## [529] "cldr.variant.hu" "cldr.variant.hy" ## [531] "cldr.variant.ia" "cldr.variant.id" ## [533] "cldr.variant.ig" "cldr.variant.is" ## [535] "cldr.variant.it" "cldr.variant.ja" ## [537] "cldr.variant.jv" "cldr.variant.ka" ## [539] "cldr.variant.kea" "cldr.variant.kk" ## [541] "cldr.variant.km" "cldr.variant.kn" ## [543] "cldr.variant.ko" "cldr.variant.ko_kp" ## [545] "cldr.variant.kok" "cldr.variant.ksh" ## [547] "cldr.variant.ku" "cldr.variant.ky" ## [549] "cldr.variant.lb" "cldr.variant.ln" ## [551] "cldr.variant.lo" "cldr.variant.lt" ## [553] "cldr.variant.lv" "cldr.variant.mk" ## [555] "cldr.variant.ml" "cldr.variant.mn" ## [557] "cldr.variant.mr" "cldr.variant.ms" ## [559] "cldr.variant.mt" "cldr.variant.my" ## [561] "cldr.variant.mzn" "cldr.variant.nb" ## [563] "cldr.variant.ne" "cldr.variant.nl" ## [565] "cldr.variant.nn" "cldr.variant.or" ## [567] "cldr.variant.pa" "cldr.variant.pl" ## [569] "cldr.variant.ps" "cldr.variant.ps_pk" ## [571] "cldr.variant.pt" "cldr.variant.pt_ao" ## [573] "cldr.variant.qu" "cldr.variant.ro" ## [575] "cldr.variant.ro_md" "cldr.variant.ru" ## [577] "cldr.variant.ru_ua" "cldr.variant.sd" ## [579] "cldr.variant.se_fi" "cldr.variant.si" ## [581] "cldr.variant.sk" "cldr.variant.sl" ## [583] "cldr.variant.smn" "cldr.variant.so" ## [585] "cldr.variant.sq" "cldr.variant.sr" ## [587] "cldr.variant.sr_cyrl_ba" "cldr.variant.sr_cyrl_me" ## [589] "cldr.variant.sr_cyrl_xk" "cldr.variant.sr_latn" ## [591] "cldr.variant.sr_latn_ba" "cldr.variant.sr_latn_me" ## [593] "cldr.variant.sr_latn_xk" "cldr.variant.sv" ## [595] "cldr.variant.sw" "cldr.variant.sw_cd" ## [597] "cldr.variant.sw_ke" "cldr.variant.ta" ## [599] "cldr.variant.te" "cldr.variant.tg" ## [601] "cldr.variant.th" "cldr.variant.ti" ## [603] "cldr.variant.tk" "cldr.variant.to" ## [605] "cldr.variant.tr" "cldr.variant.tt" ## [607] "cldr.variant.ug" "cldr.variant.uk" ## [609] "cldr.variant.ur" "cldr.variant.ur_in" ## [611] "cldr.variant.uz" "cldr.variant.uz_cyrl" ## [613] "cldr.variant.vi" "cldr.variant.wae" ## [615] "cldr.variant.wo" "cldr.variant.yi" ## [617] "cldr.variant.yo" "cldr.variant.yo_bj" ## [619] "cldr.variant.yue" "cldr.variant.yue_hans" ## [621] "cldr.variant.zh" "cldr.variant.zh_hant" ## [623] "cldr.variant.zh_hant_hk" "cldr.variant.zu" ``` --- # Merging country names -- - we need to create two dataset to merge origin (i) and destination (j) columns ``` r countries_i = countries[, .(i = un, i_name = un.name.en)] countries_j = countries[, .(j = un, j_name = un.name.en)] # merge origins trade_dataset <- merge(trade_dataset, countries_i, by = c("i"), all.x = TRUE) # merge destinations trade_dataset <- merge(trade_dataset, countries_j, by = c("j"), all.x = TRUE) ``` --- # Check the merged dataset ``` r # merge origins head(trade_dataset) ``` ``` ## Key: <j> ## j i t k v q i_name j_name ## <int> <int> <int> <int> <num> <num> <char> <char> ## 1: 4 20 2021 610463 0.200 NA Andorra Afghanistan ## 2: 4 31 2022 40700 42.900 32.604 Azerbaijan Afghanistan ## 3: 4 31 2022 150200 81.485 200.966 Azerbaijan Afghanistan ## 4: 4 31 2022 170490 60.020 25.850 Azerbaijan Afghanistan ## 5: 4 31 2022 180690 83.936 34.626 Azerbaijan Afghanistan ## 6: 4 31 2022 271000 364.714 409.617 Azerbaijan Afghanistan ``` ``` r # Total exports of Turkey to Russia, sorted by year trade_dataset[i_name == "Türkiye" & j_name == "Russian Federation", sum(v), by = c("t")][order(t)] ``` ``` ## t V1 ## <int> <num> ## 1: 2021 5936627 ## 2: 2022 9239736 ``` --- # Merge product descriptions ``` r library(concordance) ``` ``` ## Warning: package 'concordance' was built under R version 4.3.3 ``` ``` r products <- hs1_desc products <- setDT(products) head(products) ``` ``` ## code desc ## <char> <char> ## 1: 01 Animals; live ## 2: 0101 Horses, asses, mules and hinnies; live ## 3: 010111 Horses; live, pure-bred breeding animals ## 4: 010119 Horses; live, other than pure-bred breeding animals ## 5: 010120 Asses, mules and hinnies; live ## 6: 0102 Bovine animals; live ``` --- # Merge product descriptions -- - Product codes are string (character) and consists aggregated groupings such as two or four digit. We need to get rid of them. ``` r products[, code := as.integer(code)] ``` ``` ## Warning in eval(jsub, SDenv, parent.frame()): NAs introduced by coercion ``` ``` r products <- products[code > 9999] # five digit products are agriculture sector. We need keep them. # rename the code name code setnames(products, c("code"), c("k")) # now we merge trade_dataset <- merge(trade_dataset, products, by = c("k"), all.x = TRUE) ``` -- - Some product descriptions may be not available in product dataset. We keep them by adding the option "all.x = TRUE" --- # Exercise -- - What is the total export from Türkiye to Russia containing the word "orange" in product description? -- - We can use the function "str_detect" from the package "stringr" to check if a string contains another string. ``` r library(stringr) ``` ``` ## Warning: package 'stringr' was built under R version 4.3.3 ``` ``` r trade_dataset[i_name == "Türkiye" & j_name == "Russian Federation" & str_detect(desc, "orange"), sum(v), by = c("t")][order(t)] ``` ``` ## t V1 ## <int> <num> ## 1: 2021 48251.75 ## 2: 2022 35782.40 ``` --- # ggplot2 -- - ggplot2 is a data visualization package. It is very powerful and flexible. It is based on the grammar of graphics. -- - It is a bit complicated at the beginning but it is worth to learn. -- - It is a bit slow for large datasets. But it is very flexible and powerful. -- - more? https://ggplot2.tidyverse.org/ --- # ggplot2 (cont'd) -- - Example: Let's plot the Total external debt of Turkey ``` r library(ggplot2) ``` ``` ## Warning: package 'ggplot2' was built under R version 4.3.3 ``` ``` r library(CBRT) ``` ``` ## Warning: package 'CBRT' was built under R version 4.3.3 ``` ``` ## Loading required package: curl ``` ``` ## Warning: package 'curl' was built under R version 4.3.3 ``` ``` ## Using libcurl 8.10.1 with Schannel ``` ``` r # data catagories catagories <- allCBRTCategories # data groups groups <- (allCBRTGroups) # series series <- (allCBRTSeries) ``` --- # ggplot2 (cont'd) -- - Example: Let's plot the Total external debt of Turkey ``` r library(ggplot2) library(CBRT) mySeries <- getDataSeries("TP.D1TOP", # External debt, A.Total Payments(Million USD) CBRTKey = "MQbGGYULHO", # my API freq = 5, # monthly startDate="01-01-2013") mySeries <- setDT(mySeries) setnames(mySeries, c("TP.D1TOP"), c("external_debt")) p <- ggplot(data = mySeries, aes(x = time, y = external_debt)) + geom_line() + theme_minimal() + labs(title = "External debt of Turkey", x = "Date", y = "External debt (million USD)" ) ``` --- # ggplot2 (cont'd) -- - Example: Let's plot the Total external debt of Turkey ``` r print(p) ``` <!-- -->