7  その他メモ

便利だが普段はあまりつかわないため忘れがちな点についてメモしておく. 本テキストのどこかで扱う内容もメモする.

7.1 文字列の処理

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
fruit
 [1] "apple"             "apricot"           "avocado"          
 [4] "banana"            "bell pepper"       "bilberry"         
 [7] "blackberry"        "blackcurrant"      "blood orange"     
[10] "blueberry"         "boysenberry"       "breadfruit"       
[13] "canary melon"      "cantaloupe"        "cherimoya"        
[16] "cherry"            "chili pepper"      "clementine"       
[19] "cloudberry"        "coconut"           "cranberry"        
[22] "cucumber"          "currant"           "damson"           
[25] "date"              "dragonfruit"       "durian"           
[28] "eggplant"          "elderberry"        "feijoa"           
[31] "fig"               "goji berry"        "gooseberry"       
[34] "grape"             "grapefruit"        "guava"            
[37] "honeydew"          "huckleberry"       "jackfruit"        
[40] "jambul"            "jujube"            "kiwi fruit"       
[43] "kumquat"           "lemon"             "lime"             
[46] "loquat"            "lychee"            "mandarine"        
[49] "mango"             "mulberry"          "nectarine"        
[52] "nut"               "olive"             "orange"           
[55] "pamelo"            "papaya"            "passionfruit"     
[58] "peach"             "pear"              "persimmon"        
[61] "physalis"          "pineapple"         "plum"             
[64] "pomegranate"       "pomelo"            "purple mangosteen"
[67] "quince"            "raisin"            "rambutan"         
[70] "raspberry"         "redcurrant"        "rock melon"       
[73] "salal berry"       "satsuma"           "star fruit"       
[76] "strawberry"        "tamarillo"         "tangerine"        
[79] "ugli fruit"        "watermelon"       
# 長さ
str_length(fruit)
 [1]  5  7  7  6 11  8 10 12 12  9 11 10 12 10  9  6 12 10 10  7  9  8  7  6  4
[26] 11  6  8 10  6  3 10 10  5 10  5  8 11  9  6  6 10  7  5  4  6  6  9  5  8
[51]  9  3  5  6  6  6 12  5  4  9  8  9  4 11  6 17  6  6  8  9 10 10 11  7 10
[76] 10  9  9 10 10
# くっつける
str_c("fruit_", fruit)
 [1] "fruit_apple"             "fruit_apricot"          
 [3] "fruit_avocado"           "fruit_banana"           
 [5] "fruit_bell pepper"       "fruit_bilberry"         
 [7] "fruit_blackberry"        "fruit_blackcurrant"     
 [9] "fruit_blood orange"      "fruit_blueberry"        
[11] "fruit_boysenberry"       "fruit_breadfruit"       
[13] "fruit_canary melon"      "fruit_cantaloupe"       
[15] "fruit_cherimoya"         "fruit_cherry"           
[17] "fruit_chili pepper"      "fruit_clementine"       
[19] "fruit_cloudberry"        "fruit_coconut"          
[21] "fruit_cranberry"         "fruit_cucumber"         
[23] "fruit_currant"           "fruit_damson"           
[25] "fruit_date"              "fruit_dragonfruit"      
[27] "fruit_durian"            "fruit_eggplant"         
[29] "fruit_elderberry"        "fruit_feijoa"           
[31] "fruit_fig"               "fruit_goji berry"       
[33] "fruit_gooseberry"        "fruit_grape"            
[35] "fruit_grapefruit"        "fruit_guava"            
[37] "fruit_honeydew"          "fruit_huckleberry"      
[39] "fruit_jackfruit"         "fruit_jambul"           
[41] "fruit_jujube"            "fruit_kiwi fruit"       
[43] "fruit_kumquat"           "fruit_lemon"            
[45] "fruit_lime"              "fruit_loquat"           
[47] "fruit_lychee"            "fruit_mandarine"        
[49] "fruit_mango"             "fruit_mulberry"         
[51] "fruit_nectarine"         "fruit_nut"              
[53] "fruit_olive"             "fruit_orange"           
[55] "fruit_pamelo"            "fruit_papaya"           
[57] "fruit_passionfruit"      "fruit_peach"            
[59] "fruit_pear"              "fruit_persimmon"        
[61] "fruit_physalis"          "fruit_pineapple"        
[63] "fruit_plum"              "fruit_pomegranate"      
[65] "fruit_pomelo"            "fruit_purple mangosteen"
[67] "fruit_quince"            "fruit_raisin"           
[69] "fruit_rambutan"          "fruit_raspberry"        
[71] "fruit_redcurrant"        "fruit_rock melon"       
[73] "fruit_salal berry"       "fruit_satsuma"          
[75] "fruit_star fruit"        "fruit_strawberry"       
[77] "fruit_tamarillo"         "fruit_tangerine"        
[79] "fruit_ugli fruit"        "fruit_watermelon"       
# パターンの数
str_count(fruit, "[a-b]")
 [1] 1 1 2 4 1 2 3 3 2 2 2 2 2 2 1 0 0 0 1 0 2 1 1 1 1 1 1 1 1 1 0 1 1 1 1 2 0 1
[39] 1 2 1 0 1 0 0 1 0 2 1 1 1 0 0 1 1 3 1 1 1 0 1 1 0 2 0 1 0 1 3 2 1 0 3 2 1 2
[77] 2 1 0 1
# パターンの発見
str_detect(fruit, "[a-b]")
 [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[13]  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
[25]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
[37] FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE
[49]  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
[61]  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE
[73]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
# 繰り返す
str_dup(letters, 5)
 [1] "aaaaa" "bbbbb" "ccccc" "ddddd" "eeeee" "fffff" "ggggg" "hhhhh" "iiiii"
[10] "jjjjj" "kkkkk" "lllll" "mmmmm" "nnnnn" "ooooo" "ppppp" "qqqqq" "rrrrr"
[19] "sssss" "ttttt" "uuuuu" "vvvvv" "wwwww" "xxxxx" "yyyyy" "zzzzz"
# 終わる
fruit[str_ends(fruit, "a")]
[1] "banana"    "cherimoya" "feijoa"    "guava"     "papaya"    "satsuma"  
# 同じ
str_equal(fruit, "banana")
 [1] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# エスケープ
str_escape("^")
[1] "\\^"
str_extract(fruit, "[a-c]")
 [1] "a" "a" "a" "b" "b" "b" "b" "b" "b" "b" "b" "b" "c" "c" "c" "c" "c" "c" "c"
[20] "c" "c" "c" "c" "a" "a" "a" "a" "a" "b" "a" NA  "b" "b" "a" "a" "a" NA  "c"
[39] "a" "a" "b" NA  "a" NA  NA  "a" "c" "a" "a" "b" "c" NA  NA  "a" "a" "a" "a"
[58] "a" "a" NA  "a" "a" NA  "a" NA  "a" "c" "a" "a" "a" "c" "c" "a" "a" "a" "a"
[77] "a" "a" NA  "a"
str_extract(fruit, "b[a-n]")
 [1] NA   NA   NA   "ba" "be" "bi" "bl" "bl" "bl" "bl" "be" NA   NA   NA   NA  
[16] NA   NA   NA   "be" NA   "be" "be" NA   NA   NA   NA   NA   NA   "be" NA  
[31] NA   "be" "be" NA   NA   NA   NA   "be" NA   NA   "be" NA   NA   NA   NA  
[46] NA   NA   NA   NA   "be" NA   NA   NA   NA   NA   NA   NA   NA   NA   NA  
[61] NA   NA   NA   NA   NA   NA   NA   NA   NA   "be" NA   NA   "be" NA   NA  
[76] "be" NA   NA   NA   NA  
str_extract_all(fruit, "apple", simplify = TRUE)
      [,1]   
 [1,] "apple"
 [2,] ""     
 [3,] ""     
 [4,] ""     
 [5,] ""     
 [6,] ""     
 [7,] ""     
 [8,] ""     
 [9,] ""     
[10,] ""     
[11,] ""     
[12,] ""     
[13,] ""     
[14,] ""     
[15,] ""     
[16,] ""     
[17,] ""     
[18,] ""     
[19,] ""     
[20,] ""     
[21,] ""     
[22,] ""     
[23,] ""     
[24,] ""     
[25,] ""     
[26,] ""     
[27,] ""     
[28,] ""     
[29,] ""     
[30,] ""     
[31,] ""     
[32,] ""     
[33,] ""     
[34,] ""     
[35,] ""     
[36,] ""     
[37,] ""     
[38,] ""     
[39,] ""     
[40,] ""     
[41,] ""     
[42,] ""     
[43,] ""     
[44,] ""     
[45,] ""     
[46,] ""     
[47,] ""     
[48,] ""     
[49,] ""     
[50,] ""     
[51,] ""     
[52,] ""     
[53,] ""     
[54,] ""     
[55,] ""     
[56,] ""     
[57,] ""     
[58,] ""     
[59,] ""     
[60,] ""     
[61,] ""     
[62,] "apple"
[63,] ""     
[64,] ""     
[65,] ""     
[66,] ""     
[67,] ""     
[68,] ""     
[69,] ""     
[70,] ""     
[71,] ""     
[72,] ""     
[73,] ""     
[74,] ""     
[75,] ""     
[76,] ""     
[77,] ""     
[78,] ""     
[79,] ""     
[80,] ""     
str_detect(c("a", "."), str_escape("."))
[1] FALSE  TRUE
x <- "Hello, World!"
str_extract_all(x, boundary("character"), simplify = TRUE)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
[1,] "H"  "e"  "l"  "l"  "o"  ","  " "  "W"  "o"  "r"   "l"   "d"   "!"  
str_extract_all(x, boundary("line_break"), simplify = TRUE)
     [,1]      [,2]    
[1,] "Hello, " "World!"
str_extract_all(x, boundary("sentence"), simplify = TRUE)
     [,1]           
[1,] "Hello, World!"
str_extract_all(x, boundary("word"), simplify = TRUE)
     [,1]    [,2]   
[1,] "Hello" "World"
str_flatten(fruit)
[1] "appleapricotavocadobananabell pepperbilberryblackberryblackcurrantblood orangeblueberryboysenberrybreadfruitcanary meloncantaloupecherimoyacherrychili pepperclementinecloudberrycoconutcranberrycucumbercurrantdamsondatedragonfruitdurianeggplantelderberryfeijoafiggoji berrygooseberrygrapegrapefruitguavahoneydewhuckleberryjackfruitjambuljujubekiwi fruitkumquatlemonlimeloquatlycheemandarinemangomulberrynectarinenutoliveorangepamelopapayapassionfruitpeachpearpersimmonphysalispineappleplumpomegranatepomelopurple mangosteenquinceraisinrambutanraspberryredcurrantrock melonsalal berrysatsumastar fruitstrawberrytamarillotangerineugli fruitwatermelon"
str_flatten_comma(fruit)
[1] "apple, apricot, avocado, banana, bell pepper, bilberry, blackberry, blackcurrant, blood orange, blueberry, boysenberry, breadfruit, canary melon, cantaloupe, cherimoya, cherry, chili pepper, clementine, cloudberry, coconut, cranberry, cucumber, currant, damson, date, dragonfruit, durian, eggplant, elderberry, feijoa, fig, goji berry, gooseberry, grape, grapefruit, guava, honeydew, huckleberry, jackfruit, jambul, jujube, kiwi fruit, kumquat, lemon, lime, loquat, lychee, mandarine, mango, mulberry, nectarine, nut, olive, orange, pamelo, papaya, passionfruit, peach, pear, persimmon, physalis, pineapple, plum, pomegranate, pomelo, purple mangosteen, quince, raisin, rambutan, raspberry, redcurrant, rock melon, salal berry, satsuma, star fruit, strawberry, tamarillo, tangerine, ugli fruit, watermelon"
str_glue("I bought ", "{fruit}", " by {rnorm(length(fruit), mean = 100, sd = 10) |> format(digits = 3)} yen")
I bought apple by  90.2 yen
I bought apricot by  92.7 yen
I bought avocado by  73.7 yen
I bought banana by  97.2 yen
I bought bell pepper by 102.6 yen
I bought bilberry by 117.3 yen
I bought blackberry by 118.3 yen
I bought blackcurrant by  98.3 yen
I bought blood orange by 104.6 yen
I bought blueberry by 101.8 yen
I bought boysenberry by 101.9 yen
I bought breadfruit by  99.0 yen
I bought canary melon by  95.0 yen
I bought cantaloupe by  90.9 yen
I bought cherimoya by  86.2 yen
I bought cherry by 101.5 yen
I bought chili pepper by  86.6 yen
I bought clementine by  95.8 yen
I bought cloudberry by  83.2 yen
I bought coconut by 103.6 yen
I bought cranberry by  88.3 yen
I bought cucumber by  97.6 yen
I bought currant by  99.1 yen
I bought damson by 106.2 yen
I bought date by  77.6 yen
I bought dragonfruit by 110.4 yen
I bought durian by  92.0 yen
I bought eggplant by 102.2 yen
I bought elderberry by  95.3 yen
I bought feijoa by 104.8 yen
I bought fig by  94.7 yen
I bought goji berry by 105.5 yen
I bought gooseberry by  97.8 yen
I bought grape by 109.0 yen
I bought grapefruit by  96.5 yen
I bought guava by 103.1 yen
I bought honeydew by 104.5 yen
I bought huckleberry by 105.7 yen
I bought jackfruit by 101.7 yen
I bought jambul by  93.4 yen
I bought jujube by  91.3 yen
I bought kiwi fruit by 101.3 yen
I bought kumquat by  91.7 yen
I bought lemon by 103.5 yen
I bought lime by  96.1 yen
I bought loquat by  89.4 yen
I bought lychee by  88.1 yen
I bought mandarine by 106.6 yen
I bought mango by 111.8 yen
I bought mulberry by 100.0 yen
I bought nectarine by  92.8 yen
I bought nut by  97.1 yen
I bought olive by 101.4 yen
I bought orange by  96.4 yen
I bought pamelo by  90.7 yen
I bought papaya by 102.5 yen
I bought passionfruit by 103.1 yen
I bought peach by 100.7 yen
I bought pear by 108.8 yen
I bought persimmon by 100.8 yen
I bought physalis by 104.1 yen
I bought pineapple by 110.3 yen
I bought plum by  91.1 yen
I bought pomegranate by  96.6 yen
I bought pomelo by  75.1 yen
I bought purple mangosteen by 112.2 yen
I bought quince by  97.8 yen
I bought raisin by  93.3 yen
I bought rambutan by  86.0 yen
I bought raspberry by 108.2 yen
I bought redcurrant by 106.1 yen
I bought rock melon by 100.5 yen
I bought salal berry by  75.5 yen
I bought satsuma by  98.6 yen
I bought star fruit by 102.5 yen
I bought strawberry by 113.2 yen
I bought tamarillo by  94.9 yen
I bought tangerine by  95.5 yen
I bought ugli fruit by  92.1 yen
I bought watermelon by  99.9 yen
starwars |> str_glue_data("Is {height} over 100? {ifelse(height >= 100, 'Yes','No')}.")
Is 172 over 100? Yes.
Is 167 over 100? Yes.
Is 96 over 100? No.
Is 202 over 100? Yes.
Is 150 over 100? Yes.
Is 178 over 100? Yes.
Is 165 over 100? Yes.
Is 97 over 100? No.
Is 183 over 100? Yes.
Is 182 over 100? Yes.
Is 188 over 100? Yes.
Is 180 over 100? Yes.
Is 228 over 100? Yes.
Is 180 over 100? Yes.
Is 173 over 100? Yes.
Is 175 over 100? Yes.
Is 170 over 100? Yes.
Is 180 over 100? Yes.
Is 66 over 100? No.
Is 170 over 100? Yes.
Is 183 over 100? Yes.
Is 200 over 100? Yes.
Is 190 over 100? Yes.
Is 177 over 100? Yes.
Is 175 over 100? Yes.
Is 180 over 100? Yes.
Is 150 over 100? Yes.
Is NA over 100? NA.
Is 88 over 100? No.
Is 160 over 100? Yes.
Is 193 over 100? Yes.
Is 191 over 100? Yes.
Is 170 over 100? Yes.
Is 196 over 100? Yes.
Is 224 over 100? Yes.
Is 206 over 100? Yes.
Is 183 over 100? Yes.
Is 137 over 100? Yes.
Is 112 over 100? Yes.
Is 183 over 100? Yes.
Is 163 over 100? Yes.
Is 175 over 100? Yes.
Is 180 over 100? Yes.
Is 178 over 100? Yes.
Is 94 over 100? No.
Is 122 over 100? Yes.
Is 163 over 100? Yes.
Is 188 over 100? Yes.
Is 198 over 100? Yes.
Is 196 over 100? Yes.
Is 171 over 100? Yes.
Is 184 over 100? Yes.
Is 188 over 100? Yes.
Is 264 over 100? Yes.
Is 188 over 100? Yes.
Is 196 over 100? Yes.
Is 185 over 100? Yes.
Is 157 over 100? Yes.
Is 183 over 100? Yes.
Is 183 over 100? Yes.
Is 170 over 100? Yes.
Is 166 over 100? Yes.
Is 165 over 100? Yes.
Is 193 over 100? Yes.
Is 191 over 100? Yes.
Is 183 over 100? Yes.
Is 168 over 100? Yes.
Is 198 over 100? Yes.
Is 229 over 100? Yes.
Is 213 over 100? Yes.
Is 167 over 100? Yes.
Is 79 over 100? No.
Is 96 over 100? No.
Is 193 over 100? Yes.
Is 191 over 100? Yes.
Is 178 over 100? Yes.
Is 216 over 100? Yes.
Is 234 over 100? Yes.
Is 188 over 100? Yes.
Is 178 over 100? Yes.
Is 206 over 100? Yes.
Is NA over 100? NA.
Is NA over 100? NA.
Is NA over 100? NA.
Is NA over 100? NA.
Is NA over 100? NA.
Is 165 over 100? Yes.
x <- c(NA,"a","b","c",NA)
x
[1] NA  "a" "b" "c" NA 
str_c("fruit_", x)
[1] NA        "fruit_a" "fruit_b" "fruit_c" NA       
str_c("fruit_", str_replace_na(x, replacement = "DKNA"))
[1] "fruit_DKNA" "fruit_a"    "fruit_b"    "fruit_c"    "fruit_DKNA"
str_to_lower(LETTERS)
 [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
[20] "t" "u" "v" "w" "x" "y" "z"
str_sub(fruit, 1, 1) <- str_to_upper(str_sub(fruit, 1, 1))
fruit
 [1] "Apple"             "Apricot"           "Avocado"          
 [4] "Banana"            "Bell pepper"       "Bilberry"         
 [7] "Blackberry"        "Blackcurrant"      "Blood orange"     
[10] "Blueberry"         "Boysenberry"       "Breadfruit"       
[13] "Canary melon"      "Cantaloupe"        "Cherimoya"        
[16] "Cherry"            "Chili pepper"      "Clementine"       
[19] "Cloudberry"        "Coconut"           "Cranberry"        
[22] "Cucumber"          "Currant"           "Damson"           
[25] "Date"              "Dragonfruit"       "Durian"           
[28] "Eggplant"          "Elderberry"        "Feijoa"           
[31] "Fig"               "Goji berry"        "Gooseberry"       
[34] "Grape"             "Grapefruit"        "Guava"            
[37] "Honeydew"          "Huckleberry"       "Jackfruit"        
[40] "Jambul"            "Jujube"            "Kiwi fruit"       
[43] "Kumquat"           "Lemon"             "Lime"             
[46] "Loquat"            "Lychee"            "Mandarine"        
[49] "Mango"             "Mulberry"          "Nectarine"        
[52] "Nut"               "Olive"             "Orange"           
[55] "Pamelo"            "Papaya"            "Passionfruit"     
[58] "Peach"             "Pear"              "Persimmon"        
[61] "Physalis"          "Pineapple"         "Plum"             
[64] "Pomegranate"       "Pomelo"            "Purple mangosteen"
[67] "Quince"            "Raisin"            "Rambutan"         
[70] "Raspberry"         "Redcurrant"        "Rock melon"       
[73] "Salal berry"       "Satsuma"           "Star fruit"       
[76] "Strawberry"        "Tamarillo"         "Tangerine"        
[79] "Ugli fruit"        "Watermelon"       
str_to_upper("gender")
[1] "GENDER"
str_to_upper("全角のa")
[1] "全角のA"
str_to_lower("ESTSIMATE")
[1] "estsimate"
str_to_lower("A")
[1] "a"
str_view(fruit, "a")
 [3] │ Avoc<a>do
 [4] │ B<a>n<a>n<a>
 [7] │ Bl<a>ckberry
 [8] │ Bl<a>ckcurr<a>nt
 [9] │ Blood or<a>nge
[12] │ Bre<a>dfruit
[13] │ C<a>n<a>ry melon
[14] │ C<a>nt<a>loupe
[15] │ Cherimoy<a>
[21] │ Cr<a>nberry
[23] │ Curr<a>nt
[24] │ D<a>mson
[25] │ D<a>te
[26] │ Dr<a>gonfruit
[27] │ Duri<a>n
[28] │ Eggpl<a>nt
[30] │ Feijo<a>
[34] │ Gr<a>pe
[35] │ Gr<a>pefruit
[36] │ Gu<a>v<a>
... and 28 more
str_view(fruit, "[ae]")
 [1] │ Appl<e>
 [3] │ Avoc<a>do
 [4] │ B<a>n<a>n<a>
 [5] │ B<e>ll p<e>pp<e>r
 [6] │ Bilb<e>rry
 [7] │ Bl<a>ckb<e>rry
 [8] │ Bl<a>ckcurr<a>nt
 [9] │ Blood or<a>ng<e>
[10] │ Blu<e>b<e>rry
[11] │ Boys<e>nb<e>rry
[12] │ Br<e><a>dfruit
[13] │ C<a>n<a>ry m<e>lon
[14] │ C<a>nt<a>loup<e>
[15] │ Ch<e>rimoy<a>
[16] │ Ch<e>rry
[17] │ Chili p<e>pp<e>r
[18] │ Cl<e>m<e>ntin<e>
[19] │ Cloudb<e>rry
[21] │ Cr<a>nb<e>rry
[22] │ Cucumb<e>r
... and 53 more
str_view(fruit, ".a.")
 [3] │ Avo<cad>o
 [4] │ <Ban>ana
 [7] │ B<lac>kberry
 [8] │ B<lac>kcur<ran>t
 [9] │ Blood o<ran>ge
[12] │ Br<ead>fruit
[13] │ <Can>ary melon
[14] │ <Can><tal>oupe
[21] │ C<ran>berry
[23] │ Cur<ran>t
[24] │ <Dam>son
[25] │ <Dat>e
[26] │ D<rag>onfruit
[27] │ Dur<ian>
[28] │ Eggp<lan>t
[34] │ G<rap>e
[35] │ G<rap>efruit
[36] │ G<uav>a
[39] │ <Jac>kfruit
[40] │ <Jam>bul
... and 26 more
str_view(fruit, "^A")
[1] │ <A>pple
[2] │ <A>pricot
[3] │ <A>vocado
str_view(fruit, "a$")
 [4] │ Banan<a>
[15] │ Cherimoy<a>
[30] │ Feijo<a>
[36] │ Guav<a>
[56] │ Papay<a>
[74] │ Satsum<a>
str_view(fruit, ".a.|.b.")
 [3] │ Avo<cad>o
 [4] │ <Ban>ana
 [6] │ Bi<lbe>rry
 [7] │ B<lac><kbe>rry
 [8] │ B<lac>kcur<ran>t
 [9] │ Blood o<ran>ge
[10] │ Blu<ebe>rry
[11] │ Boyse<nbe>rry
[12] │ Br<ead>fruit
[13] │ <Can>ary melon
[14] │ <Can><tal>oupe
[19] │ Clou<dbe>rry
[21] │ C<ran>berry
[22] │ Cucu<mbe>r
[23] │ Cur<ran>t
[24] │ <Dam>son
[25] │ <Dat>e
[26] │ D<rag>onfruit
[27] │ Dur<ian>
[28] │ Eggp<lan>t
... and 37 more
str_view(words, "^y")
[975] │ <y>ear
[976] │ <y>es
[977] │ <y>esterday
[978] │ <y>et
[979] │ <y>ou
[980] │ <y>oung
str_view(words, "x$")
[108] │ bo<x>
[747] │ se<x>
[772] │ si<x>
[841] │ ta<x>
str_view(words, "^...$")
  [9] │ <act>
 [12] │ <add>
 [22] │ <age>
 [24] │ <ago>
 [26] │ <air>
 [27] │ <all>
 [38] │ <and>
 [41] │ <any>
 [51] │ <arm>
 [54] │ <art>
 [56] │ <ask>
 [68] │ <bad>
 [69] │ <bag>
 [73] │ <bar>
 [82] │ <bed>
 [89] │ <bet>
 [91] │ <big>
 [94] │ <bit>
[108] │ <box>
[109] │ <boy>
... and 90 more
str_view(words, "^*.......*$")
 [4] │ <absolute>
 [5] │ <accept>
 [6] │ <account>
 [7] │ <achieve>
 [8] │ <across>
[10] │ <active>
[11] │ <actual>
[13] │ <address>
[15] │ <advertise>
[16] │ <affect>
[17] │ <afford>
[19] │ <afternoon>
[21] │ <against>
[29] │ <almost>
[31] │ <already>
[32] │ <alright>
[34] │ <although>
[35] │ <always>
[36] │ <america>
[37] │ <amount>
... and 368 more
str_view(words, "[^a-u]")
  [7] │ achie<v>e
 [10] │ acti<v>e
 [15] │ ad<v>ertise
 [28] │ allo<w>
 [31] │ alread<y>
 [35] │ al<w>a<y>s
 [40] │ ans<w>er
 [41] │ an<y>
 [45] │ appl<y>
 [61] │ authorit<y>
 [62] │ a<v>ailable
 [63] │ a<w>are
 [64] │ a<w>a<y>
 [65] │ a<w>ful
 [66] │ bab<y>
 [79] │ beaut<y>
 [86] │ belie<v>e
 [90] │ bet<w>een
 [98] │ blo<w>
[102] │ bod<y>
... and 229 more
str_view(words, "a|b")
 [1] │ <a>
 [2] │ <a><b>le
 [3] │ <a><b>out
 [4] │ <a><b>solute
 [5] │ <a>ccept
 [6] │ <a>ccount
 [7] │ <a>chieve
 [8] │ <a>cross
 [9] │ <a>ct
[10] │ <a>ctive
[11] │ <a>ctu<a>l
[12] │ <a>dd
[13] │ <a>ddress
[14] │ <a>dmit
[15] │ <a>dvertise
[16] │ <a>ffect
[17] │ <a>fford
[18] │ <a>fter
[19] │ <a>fternoon
[20] │ <a>g<a>in
... and 391 more
str_view(words, "(a|b)")
 [1] │ <a>
 [2] │ <a><b>le
 [3] │ <a><b>out
 [4] │ <a><b>solute
 [5] │ <a>ccept
 [6] │ <a>ccount
 [7] │ <a>chieve
 [8] │ <a>cross
 [9] │ <a>ct
[10] │ <a>ctive
[11] │ <a>ctu<a>l
[12] │ <a>dd
[13] │ <a>ddress
[14] │ <a>dmit
[15] │ <a>dvertise
[16] │ <a>ffect
[17] │ <a>fford
[18] │ <a>fter
[19] │ <a>fternoon
[20] │ <a>g<a>in
... and 391 more

7.2 正規表現(regular expression)

  • extended regular expressions (the default)
  • Perl-like regular expressions
  • literal regular expressions

メタキャラクター:. \ | ( ) [ { ^ $ * + ?

letters
 [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
[20] "t" "u" "v" "w" "x" "y" "z"
grep("j|k", fruit)
[1]  7  8 30 32 38 39 41 72
grep("[jk]", fruit)
[1]  7  8 30 32 38 39 41 72
grepl("j|k", fruit)
 [1] FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
[37] FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
grepl("[jk]", fruit)
 [1] FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
[37] FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
sub("[jk]","XXXXX", fruit)
 [1] "Apple"             "Apricot"           "Avocado"          
 [4] "Banana"            "Bell pepper"       "Bilberry"         
 [7] "BlacXXXXXberry"    "BlacXXXXXcurrant"  "Blood orange"     
[10] "Blueberry"         "Boysenberry"       "Breadfruit"       
[13] "Canary melon"      "Cantaloupe"        "Cherimoya"        
[16] "Cherry"            "Chili pepper"      "Clementine"       
[19] "Cloudberry"        "Coconut"           "Cranberry"        
[22] "Cucumber"          "Currant"           "Damson"           
[25] "Date"              "Dragonfruit"       "Durian"           
[28] "Eggplant"          "Elderberry"        "FeiXXXXXoa"       
[31] "Fig"               "GoXXXXXi berry"    "Gooseberry"       
[34] "Grape"             "Grapefruit"        "Guava"            
[37] "Honeydew"          "HucXXXXXleberry"   "JacXXXXXfruit"    
[40] "Jambul"            "JuXXXXXube"        "Kiwi fruit"       
[43] "Kumquat"           "Lemon"             "Lime"             
[46] "Loquat"            "Lychee"            "Mandarine"        
[49] "Mango"             "Mulberry"          "Nectarine"        
[52] "Nut"               "Olive"             "Orange"           
[55] "Pamelo"            "Papaya"            "Passionfruit"     
[58] "Peach"             "Pear"              "Persimmon"        
[61] "Physalis"          "Pineapple"         "Plum"             
[64] "Pomegranate"       "Pomelo"            "Purple mangosteen"
[67] "Quince"            "Raisin"            "Rambutan"         
[70] "Raspberry"         "Redcurrant"        "RocXXXXX melon"   
[73] "Salal berry"       "Satsuma"           "Star fruit"       
[76] "Strawberry"        "Tamarillo"         "Tangerine"        
[79] "Ugli fruit"        "Watermelon"       
gsub("[jk]","XXXXX", fruit)
 [1] "Apple"             "Apricot"           "Avocado"          
 [4] "Banana"            "Bell pepper"       "Bilberry"         
 [7] "BlacXXXXXberry"    "BlacXXXXXcurrant"  "Blood orange"     
[10] "Blueberry"         "Boysenberry"       "Breadfruit"       
[13] "Canary melon"      "Cantaloupe"        "Cherimoya"        
[16] "Cherry"            "Chili pepper"      "Clementine"       
[19] "Cloudberry"        "Coconut"           "Cranberry"        
[22] "Cucumber"          "Currant"           "Damson"           
[25] "Date"              "Dragonfruit"       "Durian"           
[28] "Eggplant"          "Elderberry"        "FeiXXXXXoa"       
[31] "Fig"               "GoXXXXXi berry"    "Gooseberry"       
[34] "Grape"             "Grapefruit"        "Guava"            
[37] "Honeydew"          "HucXXXXXleberry"   "JacXXXXXfruit"    
[40] "Jambul"            "JuXXXXXube"        "Kiwi fruit"       
[43] "Kumquat"           "Lemon"             "Lime"             
[46] "Loquat"            "Lychee"            "Mandarine"        
[49] "Mango"             "Mulberry"          "Nectarine"        
[52] "Nut"               "Olive"             "Orange"           
[55] "Pamelo"            "Papaya"            "Passionfruit"     
[58] "Peach"             "Pear"              "Persimmon"        
[61] "Physalis"          "Pineapple"         "Plum"             
[64] "Pomegranate"       "Pomelo"            "Purple mangosteen"
[67] "Quince"            "Raisin"            "Rambutan"         
[70] "Raspberry"         "Redcurrant"        "RocXXXXX melon"   
[73] "Salal berry"       "Satsuma"           "Star fruit"       
[76] "Strawberry"        "Tamarillo"         "Tangerine"        
[79] "Ugli fruit"        "Watermelon"       
x <- "Hello World!"

# 文頭
grepl("^e", x)
[1] FALSE
grepl("^H", x)
[1] TRUE
grepl("^h", x)
[1] FALSE
# 文末
grepl("d$", x)
[1] FALSE
grepl("!$", x)
[1] TRUE
# いずれか
grepl("[!]", x)
[1] TRUE
grepl("[a]", x)
[1] FALSE
grepl("[A-Za-z0-9]", "!!!!!")
[1] FALSE
grepl("[A-Za-z0-9]", "99999")
[1] TRUE
grepl("[A-Za-z0-9]", "#####")
[1] FALSE
grepl("[A-Za-z0-9]", "####1#")
[1] TRUE
# グルーピング
grepl("(e)", x)
[1] TRUE
grepl("(e) +(d)", x)
[1] FALSE
grepl("(e)+(w)", x)
[1] FALSE
grepl("(H)(e)", x)
[1] TRUE
grepl("(H)(l)", x)
[1] FALSE
grep("[e]", x)
[1] 1
grepl("[e]", x)
[1] TRUE
grepl("e", x)
[1] TRUE
grepl("l|k", x)
[1] TRUE
grepl("e", x)
[1] TRUE
grepl("h", x)
[1] FALSE
grepl(".", x)
[1] TRUE
grep("[e]", letters)
[1] 5
grepl("[e]", letters)
 [1] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE
grepl("e", letters)
 [1] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE
grepl("l|k", letters)
 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE
grepl("e", letters)
 [1] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE
grepl("h", letters)
 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE
grepl(".", letters)
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
library(tidyverse)
starwars$name[grepl("aa|[k]", starwars$name)]
 [1] "Luke Skywalker"        "Biggs Darklighter"     "Anakin Skywalker"     
 [4] "Wilhuff Tarkin"        "Jek Tono Porkins"      "Bossk"                
 [7] "Ackbar"                "Wicket Systri Warrick" "Jar Jar Binks"        
[10] "Quarsh Panaka"         "Shmi Skywalker"        "Dooku"                
[13] "Shaak Ti"             

7.3 any_of, all_of

変数名をオブジェクトとして用いるときに便利.

select_vars <- c("height", "mass")
starwars |> select(select_vars) 
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
  # Was:
  data %>% select(select_vars)

  # Now:
  data %>% select(all_of(select_vars))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
# A tibble: 87 × 2
   height  mass
    <int> <dbl>
 1    172    77
 2    167    75
 3     96    32
 4    202   136
 5    150    49
 6    178   120
 7    165    75
 8     97    32
 9    183    84
10    182    77
# ℹ 77 more rows
starwars |> select(any_of(select_vars))
# A tibble: 87 × 2
   height  mass
    <int> <dbl>
 1    172    77
 2    167    75
 3     96    32
 4    202   136
 5    150    49
 6    178   120
 7    165    75
 8     97    32
 9    183    84
10    182    77
# ℹ 77 more rows

7.4 map

library(purrr)
x <- 1:10
sqrt(x)
 [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751 2.828427
 [9] 3.000000 3.162278
map(x, sqrt)
[[1]]
[1] 1

[[2]]
[1] 1.414214

[[3]]
[1] 1.732051

[[4]]
[1] 2

[[5]]
[1] 2.236068

[[6]]
[1] 2.44949

[[7]]
[1] 2.645751

[[8]]
[1] 2.828427

[[9]]
[1] 3

[[10]]
[1] 3.162278
map_vec(x, sqrt)
 [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751 2.828427
 [9] 3.000000 3.162278

d <- d %>% mutate(gpa_std = (gpa_sum - mean(gpa_sum, na.rm = TRUE)) / sd(gpa_sum, na.rm = TRUE))

7.5 部分一致

data.frame形式であれば$で変数名の部分一致が利用できるが,tibble形式ではできない.部分一致によってエラーが生じる可能性もあるので省略せずに入力するほうが良い.

library(tidyverse)
starwars
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
 1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
 2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
 3 R2-D2        96    32 <NA>       white, bl… red             33   none  mascu…
 4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
 5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
 6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
 7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
 8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu…
 9 Biggs D…    183    84 black      light      brown           24   male  mascu…
10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
# ℹ 77 more rows
# ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#   vehicles <list>, starships <list>
starwars$hei
Warning: Unknown or uninitialised column: `hei`.
NULL
starwars$ha
Warning: Unknown or uninitialised column: `ha`.
NULL
starwars_df <- as.data.frame(starwars) 
starwars_df$hei
 [1] 172 167  96 202 150 178 165  97 183 182 188 180 228 180 173 175 170 180  66
[20] 170 183 200 190 177 175 180 150  NA  88 160 193 191 170 196 224 206 183 137
[39] 112 183 163 175 180 178  94 122 163 188 198 196 171 184 188 264 188 196 185
[58] 157 183 183 170 166 165 193 191 183 168 198 229 213 167  79  96 193 191 178
[77] 216 234 188 178 206  NA  NA  NA  NA  NA 165
starwars_df$ha
 [1] "blond"         NA              NA              "none"         
 [5] "brown"         "brown, grey"   "brown"         NA             
 [9] "black"         "auburn, white" "blond"         "auburn, grey" 
[13] "brown"         "brown"         NA              NA             
[17] "brown"         "brown"         "white"         "grey"         
[21] "black"         "none"          "none"          "black"        
[25] "none"          "none"          "auburn"        "brown"        
[29] "brown"         "none"          "brown"         "none"         
[33] "blond"         "none"          "none"          "none"         
[37] "brown"         "black"         "none"          "black"        
[41] "black"         "none"          "none"          "none"         
[45] "none"          "none"          "none"          "none"         
[49] "white"         "none"          "black"         "none"         
[53] "none"          "none"          "none"          "none"         
[57] "black"         "brown"         "brown"         "none"         
[61] "black"         "black"         "brown"         "white"        
[65] "black"         "black"         "blonde"        "none"         
[69] "none"          "none"          "white"         "none"         
[73] "none"          "none"          "none"          "none"         
[77] "none"          "brown"         "brown"         "none"         
[81] "none"          "black"         "brown"         "brown"        
[85] "none"          "unknown"       "brown"        

7.6 因子

x <- c(0,1,1,1,0)
y <- factor(x)
y
[1] 0 1 1 1 0
Levels: 0 1
str(y)
 Factor w/ 2 levels "0","1": 1 2 2 2 1
attributes(y)
$levels
[1] "0" "1"

$class
[1] "factor"
levels(y)
[1] "0" "1"
as.numeric(y)
[1] 1 2 2 2 1
as.numeric(as.character(y))
[1] 0 1 1 1 0

7.7 集計データを個票データに変換する

# クロス表
occupationalStatus
      destination
origin   1   2   3   4   5   6   7   8
     1  50  19  26   8   7  11   6   2
     2  16  40  34  18  11  20   8   3
     3  12  35  65  66  35  88  23  21
     4  11  20  58 110  40 183  64  32
     5   2   8  12  23  25  46  28  12
     6  12  28 102 162  90 554 230 177
     7   0   6  19  40  21 158 143  71
     8   0   3  14  32  15 126  91 106
# 集計データ
data.frame(occupationalStatus)
   origin destination Freq
1       1           1   50
2       2           1   16
3       3           1   12
4       4           1   11
5       5           1    2
6       6           1   12
7       7           1    0
8       8           1    0
9       1           2   19
10      2           2   40
11      3           2   35
12      4           2   20
13      5           2    8
14      6           2   28
15      7           2    6
16      8           2    3
17      1           3   26
18      2           3   34
19      3           3   65
20      4           3   58
21      5           3   12
22      6           3  102
23      7           3   19
24      8           3   14
25      1           4    8
26      2           4   18
27      3           4   66
28      4           4  110
29      5           4   23
30      6           4  162
31      7           4   40
32      8           4   32
33      1           5    7
34      2           5   11
35      3           5   35
36      4           5   40
37      5           5   25
38      6           5   90
39      7           5   21
40      8           5   15
41      1           6   11
42      2           6   20
43      3           6   88
44      4           6  183
45      5           6   46
46      6           6  554
47      7           6  158
48      8           6  126
49      1           7    6
50      2           7    8
51      3           7   23
52      4           7   64
53      5           7   28
54      6           7  230
55      7           7  143
56      8           7   91
57      1           8    2
58      2           8    3
59      3           8   21
60      4           8   32
61      5           8   12
62      6           8  177
63      7           8   71
64      8           8  106
# 個票データ
df_occupationalStatus <- vcdExtra::expand.dft(data.frame(occupationalStatus), dreq = "Freq") |> 
  mutate(across(origin:destination, factor))
# 
df_occupationalStatus |> summary()
     origin      destination  
 6      :1355   6      :1186  
 4      : 518   7      : 593  
 7      : 458   4      : 459  
 8      : 387   8      : 424  
 3      : 345   3      : 330  
 5      : 156   5      : 244  
 (Other): 279   (Other): 262  

7.8 関数の中身を確認する

mean()関数がどのようにして作成されているのかを確認したい場合,meanのように()をとって入力すればよい.しかし,UseMethod("mean")のように関数が表示され,中身を確認できない場合もある.

mean
function (x, ...) 
UseMethod("mean")
<bytecode: 0x1232d37c8>
<environment: namespace:base>

このような場合は,methods()関数を用いるとよい

methods(mean)
 [1] mean,ANY-method          mean,Matrix-method       mean,sparseMatrix-method
 [4] mean,sparseVector-method mean.Date*               mean.default*           
 [7] mean.difftime*           mean.POSIXct*            mean.POSIXlt*           
[10] mean.quosure*            mean.vctrs_vctr*         mean.yearmon*           
[13] mean.yearqtr*            mean.zoo*               
see '?methods' for accessing help and source code

様々な結果が示されるがここでは,2めの要素にあるmean.default*を確認しよう.*をとってmean.defaultと入力するだけでよい.

mean.default
function (x, trim = 0, na.rm = FALSE, ...) 
{
    if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) {
        warning("argument is not numeric or logical: returning NA")
        return(NA_real_)
    }
    if (isTRUE(na.rm)) 
        x <- x[!is.na(x)]
    if (!is.numeric(trim) || length(trim) != 1L) 
        stop("'trim' must be numeric of length one")
    n <- length(x)
    if (trim > 0 && n) {
        if (is.complex(x)) 
            stop("trimmed means are not defined for complex data")
        if (anyNA(x)) 
            return(NA_real_)
        if (trim >= 0.5) 
            return(stats::median(x, na.rm = FALSE))
        lo <- floor(n * trim) + 1
        hi <- n + 1 - lo
        x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi]
    }
    .Internal(mean(x))
}
<bytecode: 0x124693e28>
<environment: namespace:base>
getAnywhere(mean.default)
A single object matching 'mean.default' was found
It was found in the following places
  package:base
  registered S3 method for mean from namespace base
  namespace:base
with value

function (x, trim = 0, na.rm = FALSE, ...) 
{
    if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) {
        warning("argument is not numeric or logical: returning NA")
        return(NA_real_)
    }
    if (isTRUE(na.rm)) 
        x <- x[!is.na(x)]
    if (!is.numeric(trim) || length(trim) != 1L) 
        stop("'trim' must be numeric of length one")
    n <- length(x)
    if (trim > 0 && n) {
        if (is.complex(x)) 
            stop("trimmed means are not defined for complex data")
        if (anyNA(x)) 
            return(NA_real_)
        if (trim >= 0.5) 
            return(stats::median(x, na.rm = FALSE))
        lo <- floor(n * trim) + 1
        hi <- n + 1 - lo
        x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi]
    }
    .Internal(mean(x))
}
<bytecode: 0x124693e28>
<environment: namespace:base>
library(DescTools)
OddsRatio
function (x, conf.level = NULL, ...) 
{
    UseMethod("OddsRatio")
}
<bytecode: 0x142c60b50>
<environment: namespace:DescTools>
methods(OddsRatio)
[1] OddsRatio.default*  OddsRatio.glm*      OddsRatio.multinom*
[4] OddsRatio.zeroinfl*
see '?methods' for accessing help and source code
DescTools:::OddsRatio.default
function (x, conf.level = NULL, y = NULL, method = c("wald", 
    "mle", "midp"), interval = c(0, 1000), ...) 
{
    if (!is.null(y)) 
        x <- table(x, y, ...)
    if (is.null(conf.level)) 
        conf.level <- NA
    p <- (d <- dim(x))[1L]
    if (!is.numeric(x) || length(d) != 2L || p != d[2L] || p != 
        2L) 
        stop("'x' is not a 2x2 numeric matrix")
    switch(match.arg(arg = method, choices = c("wald", "mle", 
        "midp")), wald = {
        if (any(x == 0)) x <- x + 0.5
        lx <- log(x)
        or <- exp(lx[1, 1] + lx[2, 2] - lx[1, 2] - lx[2, 1])
        if (is.na(conf.level)) {
            res <- or
        } else {
            sigma2lor <- sum(1/x)
            ci <- or * exp(c(1, -1) * qnorm((1 - conf.level)/2) * 
                sqrt(sigma2lor))
            res <- c(`odds ratio` = or, lwr.ci = ci[1], upr.ci = ci[2])
        }
    }, mle = {
        if (is.na(conf.level)) {
            res <- unname(fisher.test(x, conf.int = FALSE)$estimate)
        } else {
            res <- fisher.test(x, conf.level = conf.level)
            res <- c(res$estimate, lwr.ci = res$conf.int[1], 
                upr.ci = res$conf.int[2])
        }
    }, midp = {
        a1 <- x[1, 1]
        a0 <- x[1, 2]
        b1 <- x[2, 1]
        b0 <- x[2, 2]
        or <- 1
        mue <- function(a1, a0, b1, b0, or) {
            mm <- matrix(c(a1, a0, b1, b0), 2, 2, byrow = TRUE)
            fisher.test(mm, or = or, alternative = "l")$p - fisher.test(x = x, 
                or = or, alternative = "g")$p
        }
        midp <- function(a1, a0, b1, b0, or = 1) {
            mm <- matrix(c(a1, a0, b1, b0), 2, 2, byrow = TRUE)
            lteqtoa1 <- fisher.test(mm, or = or, alternative = "l")$p.val
            gteqtoa1 <- fisher.test(mm, or = or, alternative = "g")$p.val
            0.5 * (lteqtoa1 - gteqtoa1 + 1)
        }
        EST <- uniroot(function(or) {
            mue(a1, a0, b1, b0, or)
        }, interval = interval)$root
        if (is.na(conf.level)) {
            res <- EST
        } else {
            alpha <- 1 - conf.level
            LCL <- uniroot(function(or) {
                1 - midp(a1, a0, b1, b0, or) - alpha/2
            }, interval = interval)$root
            UCL <- 1/uniroot(function(or) {
                midp(a1, a0, b1, b0, or = 1/or) - alpha/2
            }, interval = interval)$root
            res <- c(`odds ratio` = EST, lwr.ci = LCL, upr.ci = UCL)
        }
    })
    return(res)
}
<bytecode: 0x110276500>
<environment: namespace:DescTools>
getAnywhere(OddsRatio.default)
A single object matching 'OddsRatio.default' was found
It was found in the following places
  registered S3 method for OddsRatio from namespace DescTools
  namespace:DescTools
with value

function (x, conf.level = NULL, y = NULL, method = c("wald", 
    "mle", "midp"), interval = c(0, 1000), ...) 
{
    if (!is.null(y)) 
        x <- table(x, y, ...)
    if (is.null(conf.level)) 
        conf.level <- NA
    p <- (d <- dim(x))[1L]
    if (!is.numeric(x) || length(d) != 2L || p != d[2L] || p != 
        2L) 
        stop("'x' is not a 2x2 numeric matrix")
    switch(match.arg(arg = method, choices = c("wald", "mle", 
        "midp")), wald = {
        if (any(x == 0)) x <- x + 0.5
        lx <- log(x)
        or <- exp(lx[1, 1] + lx[2, 2] - lx[1, 2] - lx[2, 1])
        if (is.na(conf.level)) {
            res <- or
        } else {
            sigma2lor <- sum(1/x)
            ci <- or * exp(c(1, -1) * qnorm((1 - conf.level)/2) * 
                sqrt(sigma2lor))
            res <- c(`odds ratio` = or, lwr.ci = ci[1], upr.ci = ci[2])
        }
    }, mle = {
        if (is.na(conf.level)) {
            res <- unname(fisher.test(x, conf.int = FALSE)$estimate)
        } else {
            res <- fisher.test(x, conf.level = conf.level)
            res <- c(res$estimate, lwr.ci = res$conf.int[1], 
                upr.ci = res$conf.int[2])
        }
    }, midp = {
        a1 <- x[1, 1]
        a0 <- x[1, 2]
        b1 <- x[2, 1]
        b0 <- x[2, 2]
        or <- 1
        mue <- function(a1, a0, b1, b0, or) {
            mm <- matrix(c(a1, a0, b1, b0), 2, 2, byrow = TRUE)
            fisher.test(mm, or = or, alternative = "l")$p - fisher.test(x = x, 
                or = or, alternative = "g")$p
        }
        midp <- function(a1, a0, b1, b0, or = 1) {
            mm <- matrix(c(a1, a0, b1, b0), 2, 2, byrow = TRUE)
            lteqtoa1 <- fisher.test(mm, or = or, alternative = "l")$p.val
            gteqtoa1 <- fisher.test(mm, or = or, alternative = "g")$p.val
            0.5 * (lteqtoa1 - gteqtoa1 + 1)
        }
        EST <- uniroot(function(or) {
            mue(a1, a0, b1, b0, or)
        }, interval = interval)$root
        if (is.na(conf.level)) {
            res <- EST
        } else {
            alpha <- 1 - conf.level
            LCL <- uniroot(function(or) {
                1 - midp(a1, a0, b1, b0, or) - alpha/2
            }, interval = interval)$root
            UCL <- 1/uniroot(function(or) {
                midp(a1, a0, b1, b0, or = 1/or) - alpha/2
            }, interval = interval)$root
            res <- c(`odds ratio` = EST, lwr.ci = LCL, upr.ci = UCL)
        }
    })
    return(res)
}
<bytecode: 0x110276500>
<environment: namespace:DescTools>

7.9 unname

Cramer’s Vをchisq.test()の結果を利用して作成する. その際,unname()によってカイ2乗統計量の名前をとる.

occupationalStatus
      destination
origin   1   2   3   4   5   6   7   8
     1  50  19  26   8   7  11   6   2
     2  16  40  34  18  11  20   8   3
     3  12  35  65  66  35  88  23  21
     4  11  20  58 110  40 183  64  32
     5   2   8  12  23  25  46  28  12
     6  12  28 102 162  90 554 230 177
     7   0   6  19  40  21 158 143  71
     8   0   3  14  32  15 126  91 106
chisq.test(occupationalStatus)
Warning in chisq.test(occupationalStatus): Chi-squared approximation may be
incorrect

    Pearson's Chi-squared test

data:  occupationalStatus
X-squared = 1416, df = 49, p-value < 2.2e-16
X2 <- chisq.test(occupationalStatus)$statistic |> unname()
Warning in chisq.test(occupationalStatus): Chi-squared approximation may be
incorrect
X2
[1] 1416.04
my_CramerV <- sqrt(X2/min(dim(occupationalStatus)-1)/sum(occupationalStatus))
my_CramerV
[1] 0.2404799
DescTools::CramerV(occupationalStatus)
[1] 0.2404799