This repository is used for keeping track with my progress for Data Engineering Zoomcamp ( https://github.com/DataTalksClub/data-engineering-zoomcamp ) and for uploading the homeworks and the final project of this course.
Homework 1 -> January 24
-- Q1 -> 24.3.1
-- Q2 -> db:5432
SQL -- Q3 -> 8007 SELECT COUNT(*) FROM green WHERE trip_distance <= 1 AND lpep_pickup_datetime BETWEEN DATE '2025-11-01' AND DATE '2025-12-01'; -- Q4 -> 2025-11-14 SELECT lpep_pickup_datetime FROM green WHERE trip_distance<=100 AND trip_distance = (SELECT MAX(trip_distance) FROM green WHERE trip_distance <= 100);
-- Q5 -> Flushing
SELECT z."Zone" FROM green g JOIN zones z ON g."PULocationID" = z."LocationID" WHERE g.total_amount = ( SELECT MAX(total_amount) FROM green WHERE lpep_pickup_datetime >= DATE '2025-11-18' AND lpep_pickup_datetime < DATE '2025-11-19' ); ;
-- Q6 -> Yorkville West
SELECT DISTINCT z_do."Zone" FROM green g JOIN zones z_pu ON g."PULocationID" = z_pu."LocationID" JOIN zones z_do ON g."DOLocationID" = z_do."LocationID" WHERE z_pu."Zone" = 'East Harlem North AND g."tip_amount" = ( SELECT MAX(g2."tip_amount") FROM green g2 JOIN zones z2 ON g2."PULocationID" = z2."LocationID" WHERE z2."Zone" = 'East Harlem North' );
Terraform -- Q7 terraform init, terraform apply -auto-approve, terraform destroy
Homework2 - FEB 7
-- Question 1. Counting records
SELECT COUNT(*) FROM terraform-484617.HW_03.yellow_taxies
Question 2. Data read estimation
SELECT COUNT() FROM terraform-484617.HW_03.yellow_taxies -> 0B
SELECT COUNT() FROM terraform-484617.HW_03.materialized_yellow_taxi -> 155.12MB
Question 3. Understanding columnar storage
SELECT PULocationID FROM terraform-484617.HW_03.materialized_yellow_taxi -> 155.12 MB
SELECT PULocationID, DOLocationID FROM terraform-484617.HW_03.materialized_yellow_taxi -> 310.24 MB
Question 4. Counting zero fare trips
SELECT COUNT(*) FROM terraform-484617.HW_03.materialized_yellow_taxi WHERE fare_amount = 0 -> 8333
Question 5. Partitioning and clustering -> Partition by tpep_dropoff_datetime and Cluster on VendorID
Question 6. Partition benefits
creating the partitioned table
CREATE TABLE terraform-484617.HW_03.partitioned_yellow_taxis
PARTITION BY DATE(tpep_dropoff_datetime)
AS
SELECT *
FROM terraform-484617.HW_03.yellow_taxies
SELECT DISTINCT VendorID FROM terraform-484617.HW_03.materialized_yellow_taxi WHERE tpep_dropoff_datetime BETWEEN '2024-03-01' AND '2024-03-15' -> 310.24 MB
SELECT DISTINCT VendorID FROM terraform-484617.HW_03.partitioned_yellow_taxis WHERE tpep_dropoff_datetime BETWEEN '2024-03-01' AND '2024-03-15' -> 26.84 MB