@@ -724,3 +724,283 @@ def query_get_null_keyword_count(
724
724
return round ((result [0 ] / result [1 ]) * 100 , 2 ) if result [1 ] > 0 else 0
725
725
726
726
return result [0 ] if result else 0
727
+
728
+ def query_timestamp_metric (
729
+ self ,
730
+ table : str ,
731
+ field : str ,
732
+ operation : str ,
733
+ predefined_regex : str ,
734
+ filters : str = None ,
735
+ ) -> Union [float , int ]:
736
+ """
737
+ :param table: Table name
738
+ :param field: Column name
739
+ :param operation: Metric operation ("count" or "percent")
740
+ :param predefined_regex: regex pattern
741
+ :param filters: filter condition
742
+ :return: Tuple containing valid count and total count (or percentage)
743
+ """
744
+
745
+ qualified_table_name = self .qualified_table_name (table )
746
+
747
+ timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$"
748
+
749
+ if predefined_regex == "timestamp_iso" :
750
+ regex_condition = f"{ field } ~ '{ timestamp_iso_regex } '"
751
+ else :
752
+ raise ValueError (f"Unknown predefined regex pattern: { predefined_regex } " )
753
+
754
+ filters_clause = f"WHERE { filters } " if filters else ""
755
+
756
+ query = f"""
757
+ WITH extracted_timestamps AS (
758
+ SELECT
759
+ { field } ,
760
+ SUBSTRING({ field } FROM '^(\d{{4}})') AS year, -- Extract year
761
+ SUBSTRING({ field } FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month
762
+ SUBSTRING({ field } FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day, -- Extract day
763
+ SUBSTRING({ field } FROM 'T(\d{{2}})') AS hour, -- Extract hour
764
+ SUBSTRING({ field } FROM 'T\d{{2}}:(\d{{2}})') AS minute, -- Extract minute
765
+ SUBSTRING({ field } FROM 'T\d{{2}}:\d{{2}}:(\d{{2}})') AS second, -- Extract second
766
+ SUBSTRING({ field } FROM '([+-]\d{{2}}:\d{{2}}|Z)$') AS timezone -- Extract timezone
767
+ FROM { qualified_table_name }
768
+ { filters_clause }
769
+ ),
770
+ validated_timestamps AS (
771
+ SELECT
772
+ { field } ,
773
+ CASE
774
+ WHEN
775
+ -- Validate each component with its specific rules
776
+ year ~ '^\d{{4}}$' AND
777
+ month ~ '^(0[1-9]|1[0-2])$' AND
778
+ day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND
779
+ hour ~ '^([01][0-9]|2[0-3])$' AND
780
+ minute ~ '^[0-5][0-9]$' AND
781
+ second ~ '^[0-5][0-9]$' AND
782
+ (timezone IS NULL OR timezone ~ '^(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])$') AND
783
+ -- Additional check for days in months (e.g., February)
784
+ (
785
+ (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR
786
+ (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR
787
+ (month = '02' AND day BETWEEN '01' AND
788
+ CASE
789
+ -- Handle leap years
790
+ WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29'
791
+ ELSE '28'
792
+ END
793
+ )
794
+ )
795
+ THEN 1
796
+ ELSE 0
797
+ END AS is_valid
798
+ FROM extracted_timestamps
799
+ )
800
+ SELECT COUNT(*) AS valid_count, COUNT(*) AS total_count
801
+ FROM validated_timestamps
802
+ WHERE is_valid = 1;
803
+ """
804
+
805
+ try :
806
+ valid_count = self .fetchone (query )[0 ]
807
+ total_count_query = (
808
+ f"SELECT COUNT(*) FROM { qualified_table_name } { filters_clause } "
809
+ )
810
+ total_count = self .fetchone (total_count_query )[0 ]
811
+
812
+ if operation == "count" :
813
+ return valid_count , total_count
814
+ elif operation == "percent" :
815
+ return valid_count , total_count
816
+ else :
817
+ raise ValueError (f"Unknown operation: { operation } " )
818
+
819
+ except Exception as e :
820
+ print (f"Error occurred: { e } " )
821
+ return 0 , 0
822
+
823
+ def query_timestamp_not_in_future_metric (
824
+ self ,
825
+ table : str ,
826
+ field : str ,
827
+ operation : str ,
828
+ predefined_regex : str ,
829
+ filters : str = None ,
830
+ ) -> Union [float , int ]:
831
+ """
832
+ :param table: Table name
833
+ :param field: Column name
834
+ :param operation: Metric operation ("count" or "percent")
835
+ :param predefined_regex: regex pattern
836
+ :param filters: filter condition
837
+ :return: Tuple containing count of valid timestamps not in the future and total count
838
+ """
839
+
840
+ qualified_table_name = self .qualified_table_name (table )
841
+
842
+ timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$"
843
+
844
+ if predefined_regex == "timestamp_iso" :
845
+ regex_condition = f"{ field } ~ '{ timestamp_iso_regex } '"
846
+ else :
847
+ raise ValueError (f"Unknown predefined regex pattern: { predefined_regex } " )
848
+
849
+ filters_clause = f"WHERE { filters } " if filters else ""
850
+
851
+ query = f"""
852
+ WITH extracted_timestamps AS (
853
+ SELECT
854
+ { field } ,
855
+ SUBSTRING({ field } FROM '^(\d{{4}})') AS year, -- Extract year
856
+ SUBSTRING({ field } FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month
857
+ SUBSTRING({ field } FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day, -- Extract day
858
+ SUBSTRING({ field } FROM 'T(\d{{2}})') AS hour, -- Extract hour
859
+ SUBSTRING({ field } FROM 'T\d{{2}}:(\d{{2}})') AS minute, -- Extract minute
860
+ SUBSTRING({ field } FROM 'T\d{{2}}:\d{{2}}:(\d{{2}})') AS second, -- Extract second
861
+ SUBSTRING({ field } FROM '([+-]\d{{2}}:\d{{2}}|Z)$') AS timezone -- Extract timezone
862
+ FROM { qualified_table_name }
863
+ { filters_clause }
864
+ ),
865
+ validated_timestamps AS (
866
+ SELECT
867
+ { field } ,
868
+ CASE
869
+ WHEN
870
+ year ~ '^\d{{4}}$' AND
871
+ month ~ '^(0[1-9]|1[0-2])$' AND
872
+ day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND
873
+ hour ~ '^([01][0-9]|2[0-3])$' AND
874
+ minute ~ '^[0-5][0-9]$' AND
875
+ second ~ '^[0-5][0-9]$' AND
876
+ (timezone IS NULL OR timezone ~ '^(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])$') AND
877
+ (
878
+ (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR
879
+ (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR
880
+ (month = '02' AND day BETWEEN '01' AND
881
+ CASE
882
+ WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29'
883
+ ELSE '28'
884
+ END
885
+ )
886
+ )
887
+ THEN 1
888
+ ELSE 0
889
+ END AS is_valid
890
+ FROM extracted_timestamps
891
+ ),
892
+ timestamps_not_in_future AS (
893
+ SELECT *
894
+ FROM validated_timestamps
895
+ WHERE is_valid = 1 AND ({ field } ~ '{ timestamp_iso_regex } ') AND { field } ::timestamp <= CURRENT_TIMESTAMP
896
+ )
897
+ SELECT COUNT(*) AS valid_count, (SELECT COUNT(*) FROM { qualified_table_name } { filters_clause } ) AS total_count
898
+ FROM timestamps_not_in_future;
899
+ """
900
+ try :
901
+ valid_count = self .fetchone (query )[0 ]
902
+ total_count_query = (
903
+ f"SELECT COUNT(*) FROM { qualified_table_name } { filters_clause } "
904
+ )
905
+ total_count = self .fetchone (total_count_query )[0 ]
906
+
907
+ if operation == "count" :
908
+ return valid_count , total_count
909
+ elif operation == "percent" :
910
+ return valid_count , total_count
911
+ else :
912
+ raise ValueError (f"Unknown operation: { operation } " )
913
+
914
+ except Exception as e :
915
+ print (f"Error occurred: { e } " )
916
+ return 0 , 0
917
+
918
+ def query_timestamp_date_not_in_future_metric (
919
+ self ,
920
+ table : str ,
921
+ field : str ,
922
+ operation : str ,
923
+ predefined_regex : str ,
924
+ filters : str = None ,
925
+ ) -> Union [float , int ]:
926
+ """
927
+ :param table: Table name
928
+ :param field: Column name
929
+ :param operation: Metric operation ("count" or "percent")
930
+ :param predefined_regex: The regex pattern to use (e.g., "timestamp_iso")
931
+ :param filters: Optional filter condition
932
+ :return: Tuple containing count of valid dates not in the future and total count
933
+ """
934
+
935
+ qualified_table_name = self .qualified_table_name (table )
936
+
937
+ timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$"
938
+
939
+ if predefined_regex == "timestamp_iso" :
940
+ regex_condition = f"{ field } ~ '{ timestamp_iso_regex } '"
941
+ else :
942
+ raise ValueError (f"Unknown predefined regex pattern: { predefined_regex } " )
943
+
944
+ filters_clause = f"WHERE { filters } " if filters else ""
945
+
946
+ query = f"""
947
+ WITH extracted_timestamps AS (
948
+ SELECT
949
+ { field } ,
950
+ SUBSTRING({ field } FROM '^(\d{{4}})') AS year, -- Extract year
951
+ SUBSTRING({ field } FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month
952
+ SUBSTRING({ field } FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day -- Extract day
953
+ FROM { qualified_table_name }
954
+ { filters_clause }
955
+ ),
956
+ validated_dates AS (
957
+ SELECT
958
+ { field } ,
959
+ CASE
960
+ WHEN
961
+ year ~ '^\d{{4}}$' AND
962
+ month ~ '^(0[1-9]|1[0-2])$' AND
963
+ day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND
964
+ (
965
+ (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR
966
+ (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR
967
+ (month = '02' AND day BETWEEN '01' AND
968
+ CASE
969
+ WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29'
970
+ ELSE '28'
971
+ END
972
+ )
973
+ )
974
+ THEN 1
975
+ ELSE 0
976
+ END AS is_valid
977
+ FROM extracted_timestamps
978
+ ),
979
+ dates_not_in_future AS (
980
+ SELECT *
981
+ FROM validated_dates
982
+ WHERE is_valid = 1
983
+ AND ({ field } ~ '{ timestamp_iso_regex } ')
984
+ AND ({ field } )::date <= CURRENT_DATE -- Compare only the date part against the current date
985
+ )
986
+ SELECT COUNT(*) AS valid_count, (SELECT COUNT(*) FROM { qualified_table_name } { filters_clause } ) AS total_count
987
+ FROM dates_not_in_future;
988
+ """
989
+
990
+ try :
991
+ valid_count = self .fetchone (query )[0 ]
992
+ total_count_query = (
993
+ f"SELECT COUNT(*) FROM { qualified_table_name } { filters_clause } "
994
+ )
995
+ total_count = self .fetchone (total_count_query )[0 ]
996
+
997
+ if operation == "count" :
998
+ return valid_count , total_count
999
+ elif operation == "percent" :
1000
+ return valid_count , total_count
1001
+ else :
1002
+ raise ValueError (f"Unknown operation: { operation } " )
1003
+
1004
+ except Exception as e :
1005
+ print (f"Error occurred: { e } " )
1006
+ return 0 , 0
0 commit comments