Insights from Data with BigQuery: Challenge Lab

Photo by Campaign Creators on Unsplash

Activate Cloud Shell

gcloud auth list
gcloud config list project

Query 1: Total Confirmed Cases

SELECT sum(cumulative_confirmed) as total_cases_worldwide FROM `bigquery-public-data.covid19_open_data.covid19_open_data` where date='2020-04-15'

Query 2: Worst Affected Areas

with deaths_by_states as ( SELECT subregion1_name as state, sum(cumulative_deceased) as death_count FROM `bigquery-public-data.covid19_open_data.covid19_open_data` where country_name="United States of America" and date='2020-04-10' and subregion1_name is NOT NULL group by subregion1_name ) select count(*) as count_of_states from deaths_by_states where death_count > 100

Query 3: Identifying Hotspots

SELECT * FROM ( 
SELECT subregion1_name as state, sum(cumulative_confirmed) as total_confirmed_cases
FROM `bigquery-public-data.covid19_open_data.covid19_open_data` WHERE country_code="US" AND date='2020-04-10' AND subregion1_name is NOT NULL
GROUP BY subregion1_name ORDER BY total_confirmed_cases DESC ) WHERE total_confirmed_cases > 1000

Query 4: Fatality Ratio

1) SELECT sum(cumulative_confirmed) as total_confirmed_cases, sum(cumulative_deceased) as total_deaths, (sum(cumulative_deceased)/sum(cumulative_confirmed))*100 as case_fatality_ratio FROM `bigquery-public-data.covid19_open_data.covid19_open_data` where country_name="Italy" and date='2020-04-30'2) SELECT sum(cumulative_confirmed) as total_confirmed_cases, sum(cumulative_deceased) as total_deaths, (sum(cumulative_deceased)/sum(cumulative_confirmed))*100 as case_fatality_ratio FROM `bigquery-public-data.covid19_open_data.covid19_open_data` where country_name="Italy" AND date BETWEEN '2020-04-01'and '2020-04-30'

Query 5: Identifying specific day

SELECT dateFROM `bigquery-public-data.covid19_open_data.covid19_open_data` where country_name="Italy" and cumulative_deceased>10000 order by date asc limit 1

Query 6: Finding days with zero net new cases

WITH india_cases_by_date AS ( SELECT date, SUM( cumulative_confirmed ) AS cases FROM `bigquery-public-data.covid19_open_data.covid19_open_data` WHERE country_name ="India" AND date between '2020-02-21' and '2020-03-15' GROUP BY date ORDER BY date ASC ) , india_previous_day_comparison AS (SELECT date, cases, LAG(cases) OVER(ORDER BY date) AS previous_day, cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases FROM india_cases_by_date ) select count(*) from india_previous_day_comparison where net_new_cases=0

Query 7: Doubling rate

WITH us_cases_by_date AS (SELECTdate,SUM(cumulative_confirmed) AS casesFROM`bigquery-public-data.covid19_open_data.covid19_open_data`WHEREcountry_name="United States of America"AND date between '2020-03-22' and '2020-04-20'GROUP BYdateORDER BYdate ASC), us_previous_day_comparison AS(SELECTdate,cases,LAG(cases) OVER(ORDER BY date) AS previous_day,cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases,(cases - LAG(cases) OVER(ORDER BY date))*100/LAG(cases) OVER(ORDER BY date) AS percentage_increaseFROM us_cases_by_date)select Date, cases as Confirmed_Cases_On_Day, previous_day as Confirmed_Cases_Previous_Day, percentage_increase as Percentage_Increase_In_Casesfrom us_previous_day_comparisonwhere percentage_increase > 10

Query 8: Recovery rate

WITH cases_by_country AS ( SELECT country_name AS country, sum(cumulative_confirmed) AS cases, sum(cumulative_recovered) AS recovered_cases FROM bigquery-public-data.covid19_open_data.covid19_open_data WHERE date = '2020-05-10' GROUP BY country_name ) , recovered_rate AS(SELECT country, cases, recovered_cases, (recovered_cases * 100)/cases AS recovery_rate FROM cases_by_country ) SELECT country, cases AS confirmed_cases, recovered_cases, recovery_rate FROM recovered_rate WHERE cases > 50000 ORDER BY recovery_rate desc LIMIT 10

Query 9: CDGR — Cumulative Daily Growth Rate

WITH france_cases AS ( SELECT date, SUM(cumulative_confirmed) AS total_cases FROM `bigquery-public-data.covid19_open_data.covid19_open_data` WHERE country_name="France" AND date IN ('2020-01-24', '2020-05-10') GROUP BY date ORDER BY date) , summary as ( SELECT total_cases AS first_day_cases, LEAD(total_cases) OVER(ORDER BY date) AS last_day_cases, DATE_DIFF(LEAD(date) OVER(ORDER BY date),date, day) AS days_diff FROM france_cases LIMIT 1 ) select first_day_cases, last_day_cases, days_diff, POW((last_day_cases/first_day_cases),(1/days_diff))-1 as cdgr from summary

Create a Datastudio report

SELECT date, SUM(cumulative_confirmed) AS country_cases, SUM(cumulative_deceased) AS country_deaths FROM `bigquery-public-data.covid19_open_data.covid19_open_data` WHERE date BETWEEN '2020-03-15' AND '2020-04-30' AND country_name ="United States of America" GROUP BY date

--

--

--

Reach out to me at https://twitter.com/itsajil

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

3. The Red Village “Unofficial Rarity Guide”

Forex Trading Strategies and Techniques!

R — Basic Statistics

Data Science Is Helping Towards Sustainability. Here’s how!

Power Query Quickbytes# 2: Summary Statistics using Data Preview

4 Simple Ways to Import Word and PDF Files into Python when Pandas Fails

Algorithms Illuminated (2)

Different Plot Types on Matplotlib — Fill Between

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Ajil T U

Ajil T U

Reach out to me at https://twitter.com/itsajil

More from Medium

Data Cleansing made Simple using SimpleData Management

Company Analysis — Keep

Big Data

what is dbt