Home Page for the TeradataForum
 

Archives of the TeradataForum

Message Posted: Wed, 05 Nov 2003 @ 19:51:01 GMT


     
  <Prev Next>   <<First <Prev
Next>
Last>>
 


Subj:   Re: Data profiling
 
From:   Jim Downey

You can use a stored procedure to do this, but I have used the following code in the past:

**** this is the DDL for the table that holds the results:
CREATE SET TABLE development.statistics ,NO FALLBACK ,
     NO BEFORE JOURNAL,
     NO AFTER JOURNAL
     (
      Statistic_Dt DATE FORMAT 'yyyy-mm-dd',
      Statistic_Tm FLOAT,
      Statistic_Type_Ds VARCHAR(50) CHARACTER SET UNICODE NOT CASESPECIFIC,
      Database_Nm VARCHAR(50) CHARACTER SET UNICODE NOT CASESPECIFIC,
      Table_Nm VARCHAR(50) CHARACTER SET UNICODE NOT CASESPECIFIC,
      Column_Nm VARCHAR(50) CHARACTER SET UNICODE NOT CASESPECIFIC,
      Column_Value_Ds VARCHAR(50) CHARACTER SET LATIN NOT CASESPECIFIC,
      Statistic_Ct INTEGER)
PRIMARY INDEX ( Statistic_Tm );

*** This is the code that writes the value and the number of rows with that value:

*** Change the where clause so you generate only code for the databases, tables you are interested in.

*** Run this code and then run the code it generates.

/*
STATISTIC TYPE = VALUE_ROW_CT
DESCRIPTION:
THIS COUNTS THE ROWS FOR EACH VALUE IN EACH COLUMN.
*/

sel
(
'/'||'* VALUE_ROW_CT -                     '||
upper(databasename)                         ||
' - '                                       ||
upper(tablename)                            ||
' - '                                       ||
upper(columnname)                           ||
'*'||'/'                          ||
'Locking table                             '||
trim(upper(databasename))                   ||
'.'                                         ||
trim(upper(tablename))                      ||
' For Access                               '||
'Insert into development.statistics'||
'(                                         '||
'       Statistic_Dt                       '||
',      Statistic_Tm                       '||
',      Statistic_Type_Ds                  '||
',      Database_Nm                        '||
',      Table_Nm                           '||
',      Column_Nm                          '||
',      Column_Value_Ds                    '||
',      Statistic_Ct                       '||
')                                         '||
'SEL    DATE                               '||
',      TIME                               '||
',      ''VALUE_ROW_CT''                   '||
','                                         ||
''''||trim(upper(databasename))||''''       ||
','                                         ||
''''||trim(upper(tablename))||''''          ||
','                                         ||
''''||trim(upper(columnname))||''''         ||
',      CASE                               '||
'       WHEN                               '||
trim(upper(columnname))                     ||
'       IS NULL                            '||
'       THEN ''''                    '||
'       WHEN TRIM(                         '||
trim(upper(columnname))                     ||
')=''''                                    '||
'       THEN ''''                   '||
'       WHEN VALUE_CT=1                    '||
'       THEN ''''         '||
'       WHEN RANK_VALUE_CT<101             '||
'       THEN TRIM(                         '||
trim(upper(columnname))                     ||
')                                         '||
'       ELSE '''' END           '||
',      SUM(VALUE_CT)                      '||
'FROM                                      '||
'(                                         '||
'SEL                                       '||
trim(upper(columnname))                     ||
'                                          '||
',      VALUE_CT                           '||
',      RANK(VALUE_CT,                     '||
trim(upper(columnname))                     ||
')                                         '||
'       AS RANK_VALUE_CT                   '||
'FROM                                      '||
'(                                         '||
'SEL                                       '||
trim(upper(columnname))                     ||
'                                          '||
',      COUNT(*) AS VALUE_CT               '||
'FROM                                      '||
trim(upper(databasename))                   ||
'.'                                         ||
trim(upper(tablename))                      ||
'                                          '||
'GROUP BY       1                          '||
') T1                                      '||
') T2                                      '||
'GROUP BY 1,2,3,4,5,6,7                    '||
';                                         '
) as XQL_VALUE_ROW_CT
from    dbc.columns
where   (databasename,tablename) in
(
sel     databasename
,       tablename
from    dbc.tables
where   databasename='XXXXXXXX'
and     tablekind='T'
)
order by databasename
,       tablename
,       columnname
;


     
  <Prev Next>   <<First <Prev
Next>
Last>>
 
 
 
 
 
 
 
 
 
  
  Top Home Privacy Feedback  
 
 
Copyright for the TeradataForum (TDATA-L), Manta BlueSky    
Copyright 2016 - All Rights Reserved    
Last Modified: 15 Jun 2023