Insights from Data with BigQuery: Challenge Lab

Photo by Campaign Creators on Unsplash

Activate Cloud Shell

gcloud auth list
gcloud config list project

Query 1: Total Confirmed Cases

SELECT sum(cumulative_confirmed) as total_cases_worldwide FROM `bigquery-public-data.covid19_open_data.covid19_open_data` where date='2020-04-15'

Query 2: Worst Affected Areas

with deaths_by_states as ( SELECT subregion1_name as state, sum(cumulative_deceased) as death_count FROM `bigquery-public-data.covid19_open_data.covid19_open_data` where country_name="United States of America" and date='2020-04-10' and subregion1_name is NOT NULL group by subregion1_name ) select count(*) as count_of_states from deaths_by_states where death_count > 100

Query 3: Identifying Hotspots

SELECT subregion1_name as state, sum(cumulative_confirmed) as total_confirmed_cases
FROM `bigquery-public-data.covid19_open_data.covid19_open_data` WHERE country_code="US" AND date='2020-04-10' AND subregion1_name is NOT NULL
GROUP BY subregion1_name ORDER BY total_confirmed_cases DESC ) WHERE total_confirmed_cases > 1000

Query 4: Fatality Ratio

1) SELECT sum(cumulative_confirmed) as total_confirmed_cases, sum(cumulative_deceased) as total_deaths, (sum(cumulative_deceased)/sum(cumulative_confirmed))*100 as case_fatality_ratio FROM `bigquery-public-data.covid19_open_data.covid19_open_data` where country_name="Italy" and date='2020-04-30'2) SELECT sum(cumulative_confirmed) as total_confirmed_cases, sum(cumulative_deceased) as total_deaths, (sum(cumulative_deceased)/sum(cumulative_confirmed))*100 as case_fatality_ratio FROM `bigquery-public-data.covid19_open_data.covid19_open_data` where country_name="Italy" AND date BETWEEN '2020-04-01'and '2020-04-30'

Query 5: Identifying specific day

SELECT dateFROM `bigquery-public-data.covid19_open_data.covid19_open_data` where country_name="Italy" and cumulative_deceased>10000 order by date asc limit 1

Query 6: Finding days with zero net new cases

WITH india_cases_by_date AS ( SELECT date, SUM( cumulative_confirmed ) AS cases FROM `bigquery-public-data.covid19_open_data.covid19_open_data` WHERE country_name ="India" AND date between '2020-02-21' and '2020-03-15' GROUP BY date ORDER BY date ASC ) , india_previous_day_comparison AS (SELECT date, cases, LAG(cases) OVER(ORDER BY date) AS previous_day, cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases FROM india_cases_by_date ) select count(*) from india_previous_day_comparison where net_new_cases=0

Query 7: Doubling rate

WITH us_cases_by_date AS (SELECTdate,SUM(cumulative_confirmed) AS casesFROM`bigquery-public-data.covid19_open_data.covid19_open_data`WHEREcountry_name="United States of America"AND date between '2020-03-22' and '2020-04-20'GROUP BYdateORDER BYdate ASC), us_previous_day_comparison AS(SELECTdate,cases,LAG(cases) OVER(ORDER BY date) AS previous_day,cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases,(cases - LAG(cases) OVER(ORDER BY date))*100/LAG(cases) OVER(ORDER BY date) AS percentage_increaseFROM us_cases_by_date)select Date, cases as Confirmed_Cases_On_Day, previous_day as Confirmed_Cases_Previous_Day, percentage_increase as Percentage_Increase_In_Casesfrom us_previous_day_comparisonwhere percentage_increase > 10

Query 8: Recovery rate

WITH cases_by_country AS ( SELECT country_name AS country, sum(cumulative_confirmed) AS cases, sum(cumulative_recovered) AS recovered_cases FROM bigquery-public-data.covid19_open_data.covid19_open_data WHERE date = '2020-05-10' GROUP BY country_name ) , recovered_rate AS(SELECT country, cases, recovered_cases, (recovered_cases * 100)/cases AS recovery_rate FROM cases_by_country ) SELECT country, cases AS confirmed_cases, recovered_cases, recovery_rate FROM recovered_rate WHERE cases > 50000 ORDER BY recovery_rate desc LIMIT 10

Query 9: CDGR — Cumulative Daily Growth Rate

WITH france_cases AS ( SELECT date, SUM(cumulative_confirmed) AS total_cases FROM `bigquery-public-data.covid19_open_data.covid19_open_data` WHERE country_name="France" AND date IN ('2020-01-24', '2020-05-10') GROUP BY date ORDER BY date) , summary as ( SELECT total_cases AS first_day_cases, LEAD(total_cases) OVER(ORDER BY date) AS last_day_cases, DATE_DIFF(LEAD(date) OVER(ORDER BY date),date, day) AS days_diff FROM france_cases LIMIT 1 ) select first_day_cases, last_day_cases, days_diff, POW((last_day_cases/first_day_cases),(1/days_diff))-1 as cdgr from summary

Create a Datastudio report

SELECT date, SUM(cumulative_confirmed) AS country_cases, SUM(cumulative_deceased) AS country_deaths FROM `bigquery-public-data.covid19_open_data.covid19_open_data` WHERE date BETWEEN '2020-03-15' AND '2020-04-30' AND country_name ="United States of America" GROUP BY date




Reach out to me at

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

3. The Red Village “Unofficial Rarity Guide”

Forex Trading Strategies and Techniques!

R — Basic Statistics

Data Science Is Helping Towards Sustainability. Here’s how!

Power Query Quickbytes# 2: Summary Statistics using Data Preview

4 Simple Ways to Import Word and PDF Files into Python when Pandas Fails

Algorithms Illuminated (2)

Different Plot Types on Matplotlib — Fill Between

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Ajil T U

Ajil T U

Reach out to me at

More from Medium

Data Cleansing made Simple using SimpleData Management

Company Analysis — Keep

Big Data

what is dbt