i dont even know what day im in for NTRESS6000
today we are talking about data wrangling
this is the r package ill be using coronavirus
- ive done alot the past day on data wrangling, below is my working document from the day:
library(tidyverse)
library(skimr) # install.packages("skimr")
#load data
coronavirus <- read_csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus/refs/heads/master/csv/coronavirus.csv")
View(coronavirus)
head(coronavirus, 2)
skim(coronavirus)
##if we just want to grab the rows where the case count is not zero
filter(coronavirus, cases > 0)
##if i want to match everything that is not cases in the US
filter(coronavirus, country != "US")
##if i want to match everything that ARE cases in the US
filter(coronavirus, country == "US")
coronavirus_US <- filter(coronavirus, country == "US")
coronavirus_US_canada <- filter(coronavirus, country == "US" | country == "Canada")
##if i want to include multiple
coronavirus_US_canada_MX <- filter(coronavirus, country %in% c("US","Canada","Mexico"))
##if i want to include US and only death counts - # these are interchangble
filter(coronavirus, country == "US" & type == "death")
US_deaths <- filter(coronavirus, country == "US" , type == "death")
death_count <- US_deaths$cases
sum(death_count)
filter(coronavirus, date == "2021-09-17", type=="death", country %in% c("France","Germany","Denmark"))
#selects and puts it in the order that I write it in
select(coronavirus, country, type,date,cases)
#removes country
select(coronavirus, !country)
##shortcut for pipe cmd+shft+m
## |>
coronavirus |> head()
coronavirus_us_pipe <- coronavirus |> filter(country=="US")
##piping helps make our workflow more linear
coronavirus |>
filter(country=="Denmark") |>
select(date,country,type,cases)
recap <- coronavirus |>
filter(type=="death") |>
filter(country %in% c("US","Canada","Mexico")) |>
select(country,date,cases)
##dont need the stuff below
#ggplot() +
#geom_line(aes(x = date, y = cases, color = country))
#here i started noodling a bit to see if I could facet the graph
ggplot(recap, aes(x = date, y = cases, color = country)) +
facet_grid(~country)+
geom_line() +
theme_bw()