i dont even know what day im in for NTRESS6000

today we are talking about data wrangling

this is the r package ill be using coronavirus

  • ive done alot the past day on data wrangling, below is my working document from the day:

library(tidyverse)
library(skimr) # install.packages("skimr")

#load data
coronavirus <- read_csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus/refs/heads/master/csv/coronavirus.csv")

View(coronavirus)
head(coronavirus, 2)

skim(coronavirus)

##if we just want to grab the rows where the case count is not zero
filter(coronavirus, cases > 0)

##if i want to match everything that is not cases in the US
filter(coronavirus, country != "US")

##if i want to match everything that ARE cases in the US
filter(coronavirus, country == "US")

coronavirus_US <- filter(coronavirus, country == "US")

coronavirus_US_canada <- filter(coronavirus, country == "US" | country == "Canada")


##if i want to include multiple
coronavirus_US_canada_MX <- filter(coronavirus, country %in% c("US","Canada","Mexico"))

##if i want to include US and only death counts - # these are interchangble
filter(coronavirus, country == "US" & type == "death")
US_deaths <- filter(coronavirus, country == "US" , type == "death")

death_count <- US_deaths$cases

sum(death_count)

filter(coronavirus, date == "2021-09-17", type=="death", country %in% c("France","Germany","Denmark"))

#selects and puts it in the order that I write it in
select(coronavirus, country, type,date,cases)

#removes country 
select(coronavirus, !country)

##shortcut for pipe cmd+shft+m
## |> 

coronavirus |> head()

coronavirus_us_pipe <- coronavirus |> filter(country=="US")

##piping helps make our workflow more linear

coronavirus |> 
  filter(country=="Denmark") |> 
  select(date,country,type,cases)

recap <- coronavirus |>
  filter(type=="death") |>
  filter(country %in% c("US","Canada","Mexico")) |>
  select(country,date,cases)

##dont need the stuff below
  #ggplot() + 
  #geom_line(aes(x = date, y = cases, color = country))


#here i started noodling a bit to see if I could facet the graph
ggplot(recap, aes(x = date, y = cases, color = country)) +
  facet_grid(~country)+
geom_line() +
  theme_bw()