|
|
Archives of the TeradataForum
Message Posted: Wed, 05 Nov 2003 @ 19:51:01 GMT
Subj: | | Re: Data profiling |
|
From: | | Jim Downey |
You can use a stored procedure to do this, but I have used the following code in the past:
**** this is the DDL for the table that holds the results:
CREATE SET TABLE development.statistics ,NO FALLBACK ,
NO BEFORE JOURNAL,
NO AFTER JOURNAL
(
Statistic_Dt DATE FORMAT 'yyyy-mm-dd',
Statistic_Tm FLOAT,
Statistic_Type_Ds VARCHAR(50) CHARACTER SET UNICODE NOT CASESPECIFIC,
Database_Nm VARCHAR(50) CHARACTER SET UNICODE NOT CASESPECIFIC,
Table_Nm VARCHAR(50) CHARACTER SET UNICODE NOT CASESPECIFIC,
Column_Nm VARCHAR(50) CHARACTER SET UNICODE NOT CASESPECIFIC,
Column_Value_Ds VARCHAR(50) CHARACTER SET LATIN NOT CASESPECIFIC,
Statistic_Ct INTEGER)
PRIMARY INDEX ( Statistic_Tm );
*** This is the code that writes the value and the number of rows with that value:
*** Change the where clause so you generate only code for the databases, tables you are interested in.
*** Run this code and then run the code it generates.
/*
STATISTIC TYPE = VALUE_ROW_CT
DESCRIPTION:
THIS COUNTS THE ROWS FOR EACH VALUE IN EACH COLUMN.
*/
sel
(
'/'||'* VALUE_ROW_CT - '||
upper(databasename) ||
' - ' ||
upper(tablename) ||
' - ' ||
upper(columnname) ||
'*'||'/' ||
'Locking table '||
trim(upper(databasename)) ||
'.' ||
trim(upper(tablename)) ||
' For Access '||
'Insert into development.statistics'||
'( '||
' Statistic_Dt '||
', Statistic_Tm '||
', Statistic_Type_Ds '||
', Database_Nm '||
', Table_Nm '||
', Column_Nm '||
', Column_Value_Ds '||
', Statistic_Ct '||
') '||
'SEL DATE '||
', TIME '||
', ''VALUE_ROW_CT'' '||
',' ||
''''||trim(upper(databasename))||'''' ||
',' ||
''''||trim(upper(tablename))||'''' ||
',' ||
''''||trim(upper(columnname))||'''' ||
', CASE '||
' WHEN '||
trim(upper(columnname)) ||
' IS NULL '||
' THEN '''' '||
' WHEN TRIM( '||
trim(upper(columnname)) ||
')='''' '||
' THEN '''' '||
' WHEN VALUE_CT=1 '||
' THEN '''' '||
' WHEN RANK_VALUE_CT<101 '||
' THEN TRIM( '||
trim(upper(columnname)) ||
') '||
' ELSE '''' END '||
', SUM(VALUE_CT) '||
'FROM '||
'( '||
'SEL '||
trim(upper(columnname)) ||
' '||
', VALUE_CT '||
', RANK(VALUE_CT, '||
trim(upper(columnname)) ||
') '||
' AS RANK_VALUE_CT '||
'FROM '||
'( '||
'SEL '||
trim(upper(columnname)) ||
' '||
', COUNT(*) AS VALUE_CT '||
'FROM '||
trim(upper(databasename)) ||
'.' ||
trim(upper(tablename)) ||
' '||
'GROUP BY 1 '||
') T1 '||
') T2 '||
'GROUP BY 1,2,3,4,5,6,7 '||
'; '
) as XQL_VALUE_ROW_CT
from dbc.columns
where (databasename,tablename) in
(
sel databasename
, tablename
from dbc.tables
where databasename='XXXXXXXX'
and tablekind='T'
)
order by databasename
, tablename
, columnname
;
| |