Skip to content

Commit d81f5b1

Browse files
authored
SQL Data Cleaning
1 parent dc48865 commit d81f5b1

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

warehouse Data Cleaning.sql

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
-- data cleaning
2+
-- check for missing values
3+
4+
select *
5+
from project
6+
where year is null or month is null or supplier is null or itemcode is null
7+
or itemdescription is null or itemtype is null or retailsales is null or retailtransfers is null
8+
or warehousesales is null;
9+
10+
-- check for duplicates
11+
12+
select year, month, supplier, itemcode, itemdescription, itemtype, retailsales, retailtransfers, warehousesales
13+
from project
14+
group by year, month, supplier, itemcode, itemdescription, itemtype, retailsales, retailtransfers, warehousesales
15+
having count(*) > 1;
16+
17+
/* removing rows with missing values
18+
since it is less than 1% of the whole dataset */
19+
20+
delete from project
21+
where year is null or month is null or supplier is null or itemcode is null
22+
or itemdescription is null or itemtype is null or retailsales is null or retailtransfers is null
23+
or warehousesales is null;

0 commit comments

Comments
 (0)