Day 11 of NTRESS 6000

today we were working on data import, export, and conversion between data types!

first things first, we need to load tidyverse

library(tidyverse)

coronavirus <- read_csv('https://raw.githubusercontent.com/RamiKrispin/coronavirus/master/csv/coronavirus.csv')



we have been working with this coronavirus dataset from here

coronavirus |> 
  filter(cases > 0) |> 
  group_by(date,type) |> 
  summarise(cases=sum(cases)) |> 
  ggplot() + 
  geom_col(aes(date,cases,fill =type))



this is what our data looks like

head(coronavirus)
## # A tibble: 6 × 15
##   date       province country   lat  long type  cases   uid iso2  iso3  code3 combined_key
##   <date>     <chr>    <chr>   <dbl> <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr>       
## 1 2020-01-22 Alberta  Canada   53.9 -117. conf…     0 12401 CA    CAN     124 Alberta, Ca…
## 2 2020-01-23 Alberta  Canada   53.9 -117. conf…     0 12401 CA    CAN     124 Alberta, Ca…
## 3 2020-01-24 Alberta  Canada   53.9 -117. conf…     0 12401 CA    CAN     124 Alberta, Ca…
## 4 2020-01-25 Alberta  Canada   53.9 -117. conf…     0 12401 CA    CAN     124 Alberta, Ca…
## 5 2020-01-26 Alberta  Canada   53.9 -117. conf…     0 12401 CA    CAN     124 Alberta, Ca…
## 6 2020-01-27 Alberta  Canada   53.9 -117. conf…     0 12401 CA    CAN     124 Alberta, Ca…
## # ℹ 3 more variables: population <dbl>, continent_name <chr>, continent_code <chr>


**lets filter this raw dataset to keep only the date, country,province,type,and cases
we then are piping that dataframe into filter to only keep cases on january 3rd, 2021

coronavirus |> 
  select(date,country,province,type,cases) |> 
  filter(date == "2021-01-03")

okay i got too lazy with marking up this file so deal with some dirty code below!!

lotr <- read_csv("https://raw.githubusercontent.com/jennybc/lotr-tidy/master/data/lotr_tidy.csv")
## Rows: 18 Columns: 4
## ── Column specification ──────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Film, Race, Gender
## dbl (1): Words
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
write_csv(lotr, file = "lotr_tidy.csv")

lotr <- read_csv("lotr_tidy.csv")
## Rows: 18 Columns: 4
## ── Column specification ──────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Film, Race, Gender
## dbl (1): Words
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lotr <- read_csv("lotr_tidy.csv", skip = 2)
## Rows: 16 Columns: 4
## ── Column specification ──────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): The Fellowship Of The Ring, Hobbit, Female
## dbl (1): 14
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(readxl) ## install.packages("readxl")


lotr_xlsx <- read_xlsx("LOTR.xlsx", sheet = "oii")

lotr_xlsx
## # A tibble: 18 × 4
##    Film                       Race   Gender Words
##    <chr>                      <chr>  <chr>  <dbl>
##  1 The Fellowship Of The Ring Elf    Female  1229
##  2 The Fellowship Of The Ring Hobbit Female    14
##  3 The Fellowship Of The Ring Man    Female     0
##  4 The Two Towers             Elf    Female   331
##  5 The Two Towers             Hobbit Female     0
##  6 The Two Towers             Man    Female   401
##  7 The Return Of The King     Elf    Female   183
##  8 The Return Of The King     Hobbit Female     2
##  9 The Return Of The King     Man    Female   268
## 10 The Fellowship Of The Ring Elf    Male     971
## 11 The Fellowship Of The Ring Hobbit Male    3644
## 12 The Fellowship Of The Ring Man    Male    1995
## 13 The Two Towers             Elf    Male     513
## 14 The Two Towers             Hobbit Male    2463
## 15 The Two Towers             Man    Male    3589
## 16 The Return Of The King     Elf    Male     510
## 17 The Return Of The King     Hobbit Male    2673
## 18 The Return Of The King     Man    Male    2459
library(googlesheets4) #install.packages("googlesheets4")

gs4_deauth()
lotr_gs <- read_sheet("https://docs.google.com/spreadsheets/d/1X98JobRtA3JGBFacs_JSjiX-4DPQ0vZYtNl_ozqF6IE/edit#gid=754443596")
## ✔ Reading from "LOTR".
## ✔ Range 'tidy'.
#will help turn whatever you have into a number - helpful if your data isnt read as numeric
parse_double("1,23", locale = locale(decimal_mark = ","))
## [1] 1.23
mess |> 
  mutate(
    price = parse_number(price)
  )
## Error in eval(expr, envir, enclos): object 'mess' not found
students <- read_csv("https://pos.it/r4ds-students-csv", na = c("","N/A"))
## Rows: 6 Columns: 5
## ── Column specification ──────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Full Name, favourite.food, mealPlan, AGE
## dbl (1): Student ID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
students |> 
  rename(
    student_id = `Student ID`,
    full_name = `Full Name`)
## # A tibble: 6 × 5
##   student_id full_name        favourite.food     mealPlan            AGE  
##        <dbl> <chr>            <chr>              <chr>               <chr>
## 1          1 Sunil Huffmann   Strawberry yoghurt Lunch only          4    
## 2          2 Barclay Lynn     French fries       Lunch only          5    
## 3          3 Jayendra Lyne    <NA>               Breakfast and lunch 7    
## 4          4 Leon Rossini     Anchovies          Lunch only          <NA> 
## 5          5 Chidiegwu Dunkel Pizza              Breakfast and lunch five 
## 6          6 Güvenç Attila    Ice cream          Lunch only          6
library(janitor) #install.packages("janitor")

students |> 
  clean_names(case="lower_upper") |> 
  mutate(
  age = parse_number(ifelse(age == "five", 5, age)))
## # A tibble: 6 × 5
##   studentID fullNAME         favouriteFOOD      mealPLAN              age
##       <dbl> <chr>            <chr>              <chr>               <dbl>
## 1         1 Sunil Huffmann   Strawberry yoghurt Lunch only              4
## 2         2 Barclay Lynn     French fries       Lunch only              5
## 3         3 Jayendra Lyne    <NA>               Breakfast and lunch     7
## 4         4 Leon Rossini     Anchovies          Lunch only             NA
## 5         5 Chidiegwu Dunkel Pizza              Breakfast and lunch     5
## 6         6 Güvenç Attila    Ice cream          Lunch only              6