-
Notifications
You must be signed in to change notification settings - Fork 72
/
SQL - Data Exploration.sql
165 lines (115 loc) · 5.45 KB
/
SQL - Data Exploration.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
/*
Covid 19 Data Exploration
Skills used: Joins, CTE's, Temp Tables, Windows Functions, Aggregate Functions, Creating Views, Converting Data Types
*/
--------------------------------------------------------------------------------------------------------------------------
select *
from coviddeaths_csv
where continent is not null
order by 3, 4
select location , total_cases , new_cases , total_deaths , population
from coviddeaths_csv
order by 1 ,2
--------------------------------------------------------------------------------------------------------------------------
/*Looking at Total Cases vs Total Deaths*/
select location , `date` ,total_cases , total_deaths ,
(total_cases/population)*100 as death_percentage
from coviddeaths_csv
where location = 'Afghanistan'
order by 1 ,2
--------------------------------------------------------------------------------------------------------------------------
/*Looking at Total Cases vs Population*/
select location , `date` , population ,total_cases , total_deaths , (total_cases/population)*100 as death_percentage
from coviddeaths_csv
where location = 'Africa'
order by 1 ,2
--------------------------------------------------------------------------------------------------------------------------
/*Looking at Countries with Highest Infection rate compared to Population*/
select location , population , max(total_cases) ,
max(total_cases/population)*100 as percent_population_infected
from coviddeaths_csv
group by location, population
order by percent_population_infected desc
--------------------------------------------------------------------------------------------------------------------------
/*Showing Countries with Highest Death Count per Population*/
select location , sum(total_deaths) as total_death_count
from coviddeaths_csv
where continent is not null
group by location
order by total_death_count desc
--------------------------------------------------------------------------------------------------------------------------
/*BREAKING THINGS DOWN BY CONTINENT
Showing contintents with the highest death count per population*/
Select continent, MAX(cast(Total_deaths as int)) as total_death_count
From coviddeaths_csv
Where continent is not null
Group by continent
order by total_death_count desc
--------------------------------------------------------------------------------------------------------------------------
/*GLOBAL NUMBERS*/
select `date` , sum(new_cases) as total_cases , sum(new_deaths) total_deaths ,
sum(new_deaths)/sum(new_cases)*100 as new_death_percentage
from coviddeaths_csv
where continent is not null
group by `date`
--------------------------------------------------------------------------------------------------------------------------
/*Looking at Total Population vs Vaccination*/
select cd.continent , cd.location , cd.`date` , cd.population, cv.new_vaccinations
from coviddeaths_csv cd
join covidvacinations_csv cv
on cd.location = cv.location
and cd.`date` = cv.`date`
where cv.new_vaccinations is not null
--------------------------------------------------------------------------------------------------------------------------
/*Using CTE*/
with PopsVsVacc (Continent, Location, Date, New_Vaccination, RollingPeopleVaccinated)
as
(
select cd.continent , cd.location , cd.`date` , cv.new_vaccinations,
sum(cast (cv.new_vaccinations as int)) OVER
(partition by cd.location order by cd.location, cd.`date`)
as rolling_people_vaccinated,
(rolling_people_vaccinated/population)*100
from coviddeaths_csv cd
join covidvacinations_csv cv
on cd.location = cv.location
and cd.`date` = cv.`date`
where cv.new_vaccinations is not null
)
select *, (RollingPeopleVaccinated/Population)*100 as vaccination_percentage
from PopsVsVacc
--------------------------------------------------------------------------------------------------------------------------
/*TEMP TABLE*/
drop table if exists percentage_population_vaccinated
create temporary table percentage_population_vaccinated
(
Continent varchar(255),
Location varchar(255),
Date datetime,
Population numeric,
New_vaccinations numeric,
RollingPeopleVaccinated numeric
)
insert into percentage_population_vaccinated
select cd.continent , cd.location , cd.`date` , cv.new_vaccinations,
SUM(cv.new_vaccinations) OVER(partition by cv.location order by cv.location ,cv.`date` desc)
as rolling_people_vaccinated
from coviddeaths_csv cd
join covidvacinations_csv cv
on cd.location = cv.location
and cd.`date` = cv.`date`
where cv.new_vaccinations is not null
select *, (RollingPeopleVaccinated/Population)*100 as vaccination_percentage
from percentage_population_vaccinated
--------------------------------------------------------------------------------------------------------------------------
/*Creating View to store data for later visualization*/
create view percentage_population_vaccinated as
select cd.continent , cd.location , cd.`date` , cv.new_vaccinations,
SUM(cv.new_vaccinations) OVER(partition by cv.location order by cv.location ,cv.`date` desc)
as rolling_people_vaccinated
from coviddeaths_csv cd
join covidvacinations_csv cv
on cd.location = cv.location
and cd.`date` = cv.`date`
where cv.new_vaccinations is not null
--------------------------------------------------------------------------------------------------------------------------