Day 11 of NTRESS 6000
today we were working on data import, export, and conversion between data types!
first things first, we need to load tidyverse
library(tidyverse)
coronavirus <- read_csv('https://raw.githubusercontent.com/RamiKrispin/coronavirus/master/csv/coronavirus.csv')
we have been working with this coronavirus dataset from here
coronavirus |>
filter(cases > 0) |>
group_by(date,type) |>
summarise(cases=sum(cases)) |>
ggplot() +
geom_col(aes(date,cases,fill =type))
this is what our data looks like
head(coronavirus)
## # A tibble: 6 × 15
## date province country lat long type cases uid iso2 iso3 code3 combined_key
## <date> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr>
## 1 2020-01-22 Alberta Canada 53.9 -117. conf… 0 12401 CA CAN 124 Alberta, Ca…
## 2 2020-01-23 Alberta Canada 53.9 -117. conf… 0 12401 CA CAN 124 Alberta, Ca…
## 3 2020-01-24 Alberta Canada 53.9 -117. conf… 0 12401 CA CAN 124 Alberta, Ca…
## 4 2020-01-25 Alberta Canada 53.9 -117. conf… 0 12401 CA CAN 124 Alberta, Ca…
## 5 2020-01-26 Alberta Canada 53.9 -117. conf… 0 12401 CA CAN 124 Alberta, Ca…
## 6 2020-01-27 Alberta Canada 53.9 -117. conf… 0 12401 CA CAN 124 Alberta, Ca…
## # ℹ 3 more variables: population <dbl>, continent_name <chr>, continent_code <chr>
**lets filter this raw dataset to keep only the date
, country
,province
,type
,and cases
we then are piping that dataframe into filter to only keep cases on january 3rd, 2021
coronavirus |>
select(date,country,province,type,cases) |>
filter(date == "2021-01-03")
okay i got too lazy with marking up this file so deal with some dirty code below!!
lotr <- read_csv("https://raw.githubusercontent.com/jennybc/lotr-tidy/master/data/lotr_tidy.csv")
## Rows: 18 Columns: 4
## ── Column specification ──────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Film, Race, Gender
## dbl (1): Words
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
write_csv(lotr, file = "lotr_tidy.csv")
lotr <- read_csv("lotr_tidy.csv")
## Rows: 18 Columns: 4
## ── Column specification ──────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Film, Race, Gender
## dbl (1): Words
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lotr <- read_csv("lotr_tidy.csv", skip = 2)
## Rows: 16 Columns: 4
## ── Column specification ──────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): The Fellowship Of The Ring, Hobbit, Female
## dbl (1): 14
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(readxl) ## install.packages("readxl")
lotr_xlsx <- read_xlsx("LOTR.xlsx", sheet = "oii")
lotr_xlsx
## # A tibble: 18 × 4
## Film Race Gender Words
## <chr> <chr> <chr> <dbl>
## 1 The Fellowship Of The Ring Elf Female 1229
## 2 The Fellowship Of The Ring Hobbit Female 14
## 3 The Fellowship Of The Ring Man Female 0
## 4 The Two Towers Elf Female 331
## 5 The Two Towers Hobbit Female 0
## 6 The Two Towers Man Female 401
## 7 The Return Of The King Elf Female 183
## 8 The Return Of The King Hobbit Female 2
## 9 The Return Of The King Man Female 268
## 10 The Fellowship Of The Ring Elf Male 971
## 11 The Fellowship Of The Ring Hobbit Male 3644
## 12 The Fellowship Of The Ring Man Male 1995
## 13 The Two Towers Elf Male 513
## 14 The Two Towers Hobbit Male 2463
## 15 The Two Towers Man Male 3589
## 16 The Return Of The King Elf Male 510
## 17 The Return Of The King Hobbit Male 2673
## 18 The Return Of The King Man Male 2459
library(googlesheets4) #install.packages("googlesheets4")
gs4_deauth()
lotr_gs <- read_sheet("https://docs.google.com/spreadsheets/d/1X98JobRtA3JGBFacs_JSjiX-4DPQ0vZYtNl_ozqF6IE/edit#gid=754443596")
## ✔ Reading from "LOTR".
## ✔ Range 'tidy'.
#will help turn whatever you have into a number - helpful if your data isnt read as numeric
parse_double("1,23", locale = locale(decimal_mark = ","))
## [1] 1.23
mess |>
mutate(
price = parse_number(price)
)
## Error in eval(expr, envir, enclos): object 'mess' not found
students <- read_csv("https://pos.it/r4ds-students-csv", na = c("","N/A"))
## Rows: 6 Columns: 5
## ── Column specification ──────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Full Name, favourite.food, mealPlan, AGE
## dbl (1): Student ID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
students |>
rename(
student_id = `Student ID`,
full_name = `Full Name`)
## # A tibble: 6 × 5
## student_id full_name favourite.food mealPlan AGE
## <dbl> <chr> <chr> <chr> <chr>
## 1 1 Sunil Huffmann Strawberry yoghurt Lunch only 4
## 2 2 Barclay Lynn French fries Lunch only 5
## 3 3 Jayendra Lyne <NA> Breakfast and lunch 7
## 4 4 Leon Rossini Anchovies Lunch only <NA>
## 5 5 Chidiegwu Dunkel Pizza Breakfast and lunch five
## 6 6 Güvenç Attila Ice cream Lunch only 6
library(janitor) #install.packages("janitor")
students |>
clean_names(case="lower_upper") |>
mutate(
age = parse_number(ifelse(age == "five", 5, age)))
## # A tibble: 6 × 5
## studentID fullNAME favouriteFOOD mealPLAN age
## <dbl> <chr> <chr> <chr> <dbl>
## 1 1 Sunil Huffmann Strawberry yoghurt Lunch only 4
## 2 2 Barclay Lynn French fries Lunch only 5
## 3 3 Jayendra Lyne <NA> Breakfast and lunch 7
## 4 4 Leon Rossini Anchovies Lunch only NA
## 5 5 Chidiegwu Dunkel Pizza Breakfast and lunch 5
## 6 6 Güvenç Attila Ice cream Lunch only 6