Since the schema has changed, you need to revise the regular population script as well. You need the script to also populate the product_count_fact table right after the product_dim table gets populated. Listing 23.5 shows the revised regular population script.
Listing 21.5: The revised daily DW regular population script
/**********************************************************************/ /* */ /* dw_regular_21.sql */ /* */ /**********************************************************************/ USE dw; /* CUSTOMER_DIM POPULATION */ TRUNCATE customer_stg; LOAD DATA INFILE 'customer.csv' INTO TABLE customer_stg FIELDS TERMINATED BY ', ' OPTIONALLY ENCLOSED BY "" LINES TERMINATED BY '\r\n' IGNORE 1 LINES ( customer_number , customer_name , customer_street_address , customer_zip_code , customer_city , customer_state , shipping_address , shipping_zip_code , shipping_city , shipping_state ) ; /* SCD 2ON ADDRESSES */ UPDATE customer_dim a , customer_stg b SET a.expiry_date = SUBDATE (CURRENT_DATE, 1) WHERE a.customer_number = b.customer_number AND ( a.customer_street_address <> b.customer_street_address OR a.customer_city <> b.customer_city OR a.customer_zip_code <> b.customer_zip_code OR a.customer_state <> b.customer_state OR a.shipping_address <> b.shipping_address OR a.shipping_city <> b.shipping_city OR a.shipping_zip_code <> b.shipping_zip_code OR a.shipping_state <> b.shipping_state OR a.shipping_address IS NULL OR a.shipping_city IS NULL OR a.shipping_zip_code IS NULL OR a.shipping_state IS NULL) AND expiry_date = '9999-12-31' ; INSERT INTO customer_dim SELECT NULL , b.customer_number , b.customer_name , b.customer_street_address , b.customer_zip_code , b.customer_city , b.customer_state , b.shipping_address , b.shipping_zip_code , b.shipping_city , b.shipping_state , CURRENT_DATE , '9999-12-31' FROM customer_dim a , customer_stg b WHERE a.customer_number = b.customer_number AND ( a.customer_street_address <> b.customer_street_address OR a.customer_city <> b.customer_city OR a.customer_zip_code <> b.customer_zip_code OR a.customer_state <> b.customer_state OR a.shipping_address <> b.shipping_address OR a.shipping_city <> b.shipping_city OR a.shipping_zip_code <> b.shipping_zip_code OR a.shipping_state <> b.shipping_state OR a.shipping_address IS NULL OR a.shipping_city IS NULL OR a.shipping_zip_code IS NULL OR a.shipping_state IS NULL) AND EXISTS ( SELECT * FROM customer_dim x WHERE b.customer_number = x.customer_number AND a.expiry_date = SUBDATE (CURRENT_DATE, 1)) AND NOT EXISTS ( SELECT * FROM customer_dim y WHERE b.customer_number = y.customer_number AND y.expiry_date = '9999-12-31') ; /* END OF SCD2 */ /* SCD 1 ON NAME */ UPDATE customer_dim a, customer_stg b SET a.customer_name = b.customer_name WHERE a.customer_number = b.customer_number AND a.expiry_date = '9999-12-31' AND a.customer_name <> b.customer_name ; /* ADD NEW CUSTOMER */ INSERT INTO customer_dim SELECT NULL , customer_number , customer_name , customer_street_address , customer_zip_code , customer_city , customer_state , shipping_address , shipping_zip_code , shipping_city , shipping_state , CURRENT_DATE , '9999-12-31' FROM customer_stg WHERE customer_number NOT IN( SELECT a.customer_number FROM customer_dim a , customer_stg b WHERE b.customer_number = a.customer_number ) ; /* RE-BUILD PA CUSTOMER DIMENSION */ TRUNCATE pa_customer_dim; INSERT INTO pa_customer_dim SELECT customer_sk , customer_number , customer_name , customer_street_address , customer_zip_code , customer_city , customer_state , shipping_address , shipping_zip_code , shipping_city , shipping_state , effective_date , expiry_date FROM customer_dim WHERE customer state = 'PA' ; /* END OF CUSTOMER_DIM POPULATION */ /* PRODUCT_DIM POPULATION */ TRUNCATE product_stg ; LOAD DATA INFILE 'product.txt' INTO TABLE product_stg FIELDS TERMINATED BY '' OPTIONALLY ENCLOSED BY '' LINES TERMINATED BY '\r\n' IGNORE 1 LINES ( product_code , product_name , product_category ) ; /* SCD2ON PRODUCT NAME AND GROUP */ UPDATE product_dim a , product_stg b SET expiry_date = SUBDATE (CURRENT_DATE, 1) WHERE a.product_code = b.product_code Revising the DW Regular Population Script AND ( a.product_name <> b.product_name OR a.product_category <> b.product_category ) AND expiry_date = '9999-12-31' ; INSERT INTO product_dim SELECT NULL , b.product_code , b.product_name , b.product_category , CURRENT_DATE , '9999-12-31' FROM product_dim a , product_stg b WHERE a.product_code = b.product_code AND ( a.product_name <> b.product_name OR a.product_category <> b.product_category ) AND EXISTS ( SELECT * FROM product_dim x WHERE b.product_code = x.product_code AND a.expiry_date - SUBDATE (CURRENT_DATE, 1)) AND NOT EXISTS ( SELECT * FROM product_dim y WHERE b.product_code = y.product_code AND y.expiry_date = '9999-12-31') ; /* END OF SCD 2 */ /* ADD NEW PRODUCT */ INSERT INTO product_dim SELECT NULL , product_code , product_name , product_category , CURRENT_DATE , '9999-12-31' FROM product_stg WHERE product_code NOT IN( SELECT y.product_code FROM product_dim x, product_stg y WHERE x.product_code = y.product_code ) ; /* END OF PRODUCT_DIM POPULATION */ /* PRODUCT_COUNT_FACT POPULATION */ TRUNCATE product_count_fact ; INSERT INTO product_count_fact (product_sk, product_launch_date_sk) SELECT a.product_sk , b.date_sk FROM product_dim a , date_dim b WHERE a.effective_date = b.date GROUP BY product_code HAVING COUNT (product_code) = 1 ; /* for products that have been updated by SCD2 */ INSERT INTO product_count_fact (product_sk, product_launch_date_sk) SELECT a.product_sk , b.date_sk FROM product_dim a , date_dim b WHERE a.effective_date = b.date GROUP BY product_code HAVING COUNT (product_code) > 1 ; /* END OF PRODUCT_COUNT_FACT POPULATION */ /* insert new orders */ INSERT INTO sales_order_fact SELECT b.customer_sk , c.product_sk , f.sales_order_attribute_sk , d.order_date_sk , NULL , NULL , NULL , NULL , a.order_number , e.request_delivery_date_sk , order_amount , quantity , NULL , NULL , NULL , NULL FROM source.sales_order a , customer_dim b , product_dim c , order_date_dim d , request_delivery_date_dim e , sales_order_attribute_dim f WHERE order_status = 'N' AND entry_date = CURRENT_DATE AND a.customer_number = b.customer_number AND a.status_date >= b.effective_date AND a.status_date <= b.expiry_date AND a.product_code = c.product_code AND a.status_date >= c.effective_date AND a.status_date <= c.expiry_date AND a.status_date = d.order_date AND a.request_delivery_date = e.request_delivery_date AND a.verification_ind = f.verification_ind AND a.credit_check_flag = f.credit_check_flag AND a.new_customer_ind = f.new_customer_ind AND a.web_order_flag = f.web_order_flag AND a.status_date >= f.effective_date AND a.status_date <= f.expiry_date ; UPDATE sales_order_fact a, source.sales_order b, allocate_date_dim c SET a.allocate_date_sk = c.allocate_date_sk , a.allocate_quantity = b.quantity WHERE order_status = 'A' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND c.allocate date = b.status date ; UPDATE sales_order_fact a, source.sales_order b, packing_date_dim d SET a.packing_date_sk = d.packing_date_sk , a.packing_quantity = b.quantity WHERE order_status = 'P' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND d.packing_date = b.status_date ; UPDATE sales_order_fact a, source.sales_order b, ship_date_dim e SET a.ship_date_sk = e.ship_date_sk , a.ship_quantity = b.quantity WHERE order_status = 'S' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND e.ship_date = b.status_date ; UPDATE sales_order_fact a, source.sales_order b, receive_date_dim f SET a.receive_date_sk = f.receive_date_sk , a.receive_quantity = b.quantity WHERE order_status = 'R' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND f.receive date = b.status date ; /* end of script */
Before you can test the revised regular population script, you need to do two things. First, you need to prepare a product text file. Use the previous product text file, but change the name of Product code 1 to ‘Regular Hard Disk Drive’ and add a new product ‘High End Hard Disk Drive’ (product code 5), as shown here:
PRODUCT CODE, PRODUCT NAME, PRODUCT GROUP 1 Regular Hard Disk Drive Storage 2 Floppy Drive Storage 3 Flat Panel Monitor 4 Keyboard Peripheral 5 High End Hard Disk Drive Storage
Second, you need to set your MySQL date to March 20, 2007 (a date later than March 16, 2007, which is the last date you ran the regular population script in Chapter 18). Now run the dw_regular_21.sql script:
mysql> \. c:\mysql\scripts\dw_regular_21.sql
You’ll see the following message on your console.
Database changed Query OK, 9 rows affected (0.10 sec) Query OK, 9 rows affected (0.14 sec) Records: 9 Deleted: 0 Skipped: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Records: 0 Duplicates: 0 Warnings: 0 Query OK, 0 rows affected (0.01 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Records: 0 Duplicates: 0 Warnings: 0 Query OK, 19 rows affected (0.04 sec) Query OK, 19 rows affected (0.11 sec) Records: 19 Duplicates: 0 Warnings: 0 Query OK, 4 rows affected (0.09 sec) Query OK, 5 rows affected (0.06 sec) Records: 5 Deleted: 0 Skipped: 0 Warnings: 0 Query OK, 1 row affected (0.04 sec) Rows matched: 1 Changed: 1 Warnings: 0 Query OK, 1 row affected (0.06 sec) Records: 1 Duplicates: 0 Warnings: 0 Query OK, 1 row affected (0.05 sec) Records: 1 Duplicates: 0 Warnings: 0 Query OK, 2 rows affected (0.10 sec) Records: 2 Duplicates: 0 Warnings: 0 Query OK, 1 row affected (0.08 sec) Rows matched: 1 Changed: 1 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Records: 0 Duplicates: 0 Warnings: 0 Query OK, 0 rows affected (0.01 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.01 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.01 sec) Rows matched: 0 Changed: 0 Warnings: 0
You can confirm that the regular population is correct by querying the product_count_fact table:
mysql> SELECT c.product_sk psk, c.product_code pc, b.product_launch_date_sk plsk, -> b.product_launch_date pld -> FROM product_count_fact a, product_launch_date_dim b, -> product_dim c -> WHERE a.product_launch_date_sk = b.product_launch_date_sk -> AND a.product_sk = c.product_sk -> ORDER BY product_code, product_launch_date;
Here is the correct result.
+-----+------+------+------------+ | psk | pc | plsk | pld | +-----+------+------+------------+ | 1 | 1 | 1 | 2005-03-01 | | 2 | 2 | 1 | 2005-03-01 | | 3 | 3 | 1 | 2005-03-01 | | 5 | 4 | 731 | 2007-03-01 | | 7 | 5 | 750 | 2007-03-20 | +-----+------+------+------------+ 5 rows in set (0.01 sec)
Note | The launch date of the new product code (March 20, 200) was added. The change on the name of Product code 1 did not impact the launch date. |