Home Page for the TeradataForum
 
https:

Archives of the TeradataForum

Message Posted: Thu, 25 May 2007 @ 00:12:38 GMT


     
  <Prev Next>   <<First <Prev
Next>
Last>>
 


Subj:   Re: Information about collect statistics already done for tables in particular Database
 
From:   fachtna.c.donovan

Hi John,

There are a few tables that you have a look at for stats

     dbc.tvfields
     dbc.ColumnStats
     dbc.Indexes
     dbc.IndexStats
     dbc.MultiColumnStats

There is a field on each of these tables called fieldStatistics or IndexStatistics if Stats have been collect on the column/index. If you want to interrogate dbc.tvfields I got the code below a few years ago on this forum from Dieter Noeth which works on V2R5.1 (Fair play Dieter!)


Fachtna O'Donovan
Data Warehouse Developer
AIB Bank
Ireland

This technique may be used to extract additional information from the binary statistics:

- Row count

- Number of distinct values

- Number of NULLs

- Modal Frequency, i.e. maximum number of rows / value


     /** Additional info on collected statistics, including Date, Time, Rowcount...
     2003-03-12 dn initial version
     **/
     SELECT
     DatabaseName,
     TableName,
     ColumnName,

     /** stats collected on:
     'C' --> Column
     'I' --> Index
     'M' --> Multiple columns (V2R5)
     **/
     StatsType,

     /** collect stats date **/
     CollectDate,

     /** collect stats time **/
     CollectTime,

     /** V2R5: sample size used for collect stats**/
     CASE SampleSize WHEN 0 THEN 100 ELSE SampleSize END AS SampleSize,


     /** Row Count
     Estimated when SampleSize < 100
     **/
     (-1**(NumRowsw1 / 32768))
     * (2**((NumRowsw1 / 16 mod 2048) - 1023))
     * (1 + ((NumRowsw1 mod 16) * 2**-4) + (NumRowsw2 * 2**-20)
     + (NumRowsw3 * 2**-36) + (NumRowsw4 * 2**-52))  (decimal(18,0))
     AS NumRows,

     /** Distinct Values
     Estimated when SampleSize < 100
     **/
     (-1**(NumValuesw1 / 32768))
     * (2**((NumValuesw1 / 16 mod 2048) - 1023))
     * (1 + ((NumValuesw1 mod 16) * 2**-4) + (NumValuesw2 * 2**-20)
     + (NumValuesw3 * 2**-36) + (NumValuesw4 * 2**-52))
     (decimal(18,0))
     AS NumValues,

     /** Number of NULLs
     Estimated when SampleSize < 100
     **/
     (-1**(NumNullsw1 / 32768))
     * (2**((NumNullsw1 / 16 mod 2048) - 1023))
     * (1 + ((NumNullsw1 mod 16) * 2**-4) + (NumNullsw2 * 2**-20)
     + (NumNullsw3 * 2**-36) + (NumNullsw4 * 2**-52))
     (decimal(18,0))
     AS NumNulls,

     /** Maximum number of rows / value,
     Estimated when SampleSize < 100
     **/
     (-1**(ModeFreqw1 / 32768))
     * (2**((ModeFreqw1 / 16 mod 2048) - 1023))
     * (1 + ((ModeFreqw1 mod 16) * 2**-4) + (ModeFreqw2 * 2**-20)
     + (ModeFreqw3 * 2**-36) + (ModeFreqw4 * 2**-52))
     (decimal(18,0))
     AS ModeFreq
     FROM
     (
     SELECT
     DatabaseName,
     TableName,
     ColumnName,
     Stats,
     StatsType,
     (
     (HASHBUCKET
     (SUBSTR(Stats, 2, 1) ||
     SUBSTR(Stats, 1, 1) (BYTE(4))
     ) - 1900
     ) * 10000
     +
     (HASHBUCKET
     ('00'xb || SUBSTR(Stats, 3, 1) (BYTE(4))
     )
     ) * 100
     +
     (HASHBUCKET
     (
     '00'xb || SUBSTR(Stats, 4, 1) (BYTE(4))
     )
     )
     ) (DATE)
     AS CollectDate,

     (CAST(
     (HASHBUCKET
     (CAST('00'xb || SUBSTR(Stats, 5, 1) AS BYTE(4))
     ) (FORMAT '99:')
     ) ||
     (HASHBUCKET
     (CAST('00'xb || SUBSTR(Stats, 6, 1) AS BYTE(4))
     ) (FORMAT '99:')
     ) ||
     (HASHBUCKET
     (CAST('00'xb || SUBSTR(Stats, 7, 1) AS BYTE(4))
     ) (FORMAT '99.')
     ) ||
     (HASHBUCKET
     (CAST('00'xb || SUBSTR(Stats, 8, 1) AS BYTE(4))
     ) (FORMAT '99')
     ) AS TIME(2))
     ) AS CollectTime,

     HASHBUCKET
     ('00'xb || SUBSTR(Stats, 12, 1) (BYTE(4))) AS SampleSize,

     HASHBUCKET(SUBSTR(Stats, 12 + 8, 1)
     || SUBSTR(Stats, 12 + 7, 1) (BYTE(4))) AS NumNullsw1,
     HASHBUCKET(SUBSTR(Stats, 12 + 6, 1)
     || SUBSTR(Stats, 12 + 5, 1) (BYTE(4))) AS NumNullsw2,
     HASHBUCKET(SUBSTR(Stats, 12 + 4, 1)
     || SUBSTR(Stats, 12 + 3, 1) (BYTE(4))) AS NumNullsw3,
     HASHBUCKET(SUBSTR(Stats, 12 + 2, 1)
     || SUBSTR(Stats, 12 + 1, 1) (BYTE(4))) AS NumNullsw4,

     HASHBUCKET(SUBSTR(Stats, 40 + Offset + 8, 1)
     || SUBSTR(Stats, 40 + Offset + 7, 1) (BYTE(4))) AS
     ModeFreqw1,
     HASHBUCKET(SUBSTR(Stats, 40 + Offset + 6, 1)
     || SUBSTR(Stats, 40 + Offset + 5, 1) (BYTE(4))) AS
     ModeFreqw2,
     HASHBUCKET(SUBSTR(Stats, 40 + Offset + 4, 1)
     || SUBSTR(Stats, 40 + Offset + 3, 1) (BYTE(4))) AS
     ModeFreqw3,
     HASHBUCKET(SUBSTR(Stats, 40 + Offset + 2, 1)
     || SUBSTR(Stats, 40 + Offset + 1, 1) (BYTE(4))) AS
     ModeFreqw4,

     HASHBUCKET(SUBSTR(Stats, 48 + Offset + 8, 1)
     || SUBSTR(Stats, 48 + Offset + 7, 1) (BYTE(4))) AS
     NumValuesw1,
     HASHBUCKET(SUBSTR(Stats, 48 + Offset + 6, 1)
     || SUBSTR(Stats, 48 + Offset + 5, 1) (BYTE(4))) AS
     NumValuesw2,
     HASHBUCKET(SUBSTR(Stats, 48 + Offset + 4, 1)
     || SUBSTR(Stats, 48 + Offset + 3, 1) (BYTE(4))) AS
     NumValuesw3,
     HASHBUCKET(SUBSTR(Stats, 48 + Offset + 2, 1)
     || SUBSTR(Stats, 48 + Offset + 1, 1) (BYTE(4))) AS
     NumValuesw4,

     HASHBUCKET(SUBSTR(Stats, 56 + Offset + 8, 1)
     || SUBSTR(Stats, 56 + Offset + 7, 1) (BYTE(4))) AS
     NumRowsw1,
     HASHBUCKET(SUBSTR(Stats, 56 + Offset + 6, 1)
     || SUBSTR(Stats, 56 + Offset + 5, 1) (BYTE(4))) AS
     NumRowsw2,
     HASHBUCKET(SUBSTR(Stats, 56 + Offset + 4, 1)
     || SUBSTR(Stats, 56 + Offset + 3, 1) (BYTE(4))) AS
     NumRowsw3,
     HASHBUCKET(SUBSTR(Stats, 56 + Offset + 2, 1)
     || SUBSTR(Stats, 56 + Offset + 1, 1) (BYTE(4))) AS
     NumRowsw4

     FROM
     (
     SELECT
     d.databasename AS DatabaseName,
     t.tvmname AS TableName,

     /** Concat up to 16 columnnames into a comma delimited list **/
     MAX(CASE WHEN i.FieldPosition = 1 THEN TRIM(c.FieldName) ELSE ''
     END) ||
     MAX(CASE WHEN i.FieldPosition = 2 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 3 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 4 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 5 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 6 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 7 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 8 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 9 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 10 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 11 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 12 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 13 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 14 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 15 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition = 16 THEN ',' || TRIM(c.FieldName)
     ELSE '' END) ||
     MAX(CASE WHEN i.FieldPosition > 16  THEN ',...' ELSE '' END)
     AS ColumnName,

     CASE WHEN IndexType = 'M' THEN 'M' ELSE 'I' END AS StatsType,

     /** Floats are stored after the data and data may be 16 or 32
     BYTEs **/
     /** depending on BYTE[23]**/
     MAX(CASE
     WHEN SUBSTR(i.indexStatistics, 23, 1) = '00'XB THEN 16
     ELSE 0
     END)
     AS Offset,

     MAX(SUBSTR(i.indexstatistics, 1, 80)) AS Stats
     FROM
     dbc.dbase d
     JOIN dbc.tvm t
     ON d.databaseid = t.databaseid
     JOIN dbc.tvfields c
     ON t.tvmid = c.tableid
     JOIN dbc.Indexes i
     ON c.tableid = i.tableid
     AND c.fieldid = i.fieldid
     GROUP BY
     DatabaseName,
     TableName,
     StatsType,
     i.IndexNumber

     UNION ALL

     SELECT
     d.databasename AS DatabaseName,
     t.tvmname AS TableName,
     c.fieldname AS ColumnName,
     'C' AS StatsType,

     /** Floats are stored after the data and data may be 16 or 32
     BYTEs **/
     /** depending on BYTE[23]**/
     CASE
     WHEN SUBSTR(c.fieldStatistics, 23, 1) = '00'XB THEN 16
     ELSE 0
     END AS Offset,

     SUBSTR(c.fieldstatistics, 1, 80) AS Stats
     FROM
     dbc.dbase d
     JOIN dbc.tvm t
     ON d.databaseid = t.databaseid
     JOIN dbc.tvfields c
     ON t.tvmid = c.tableid
     ) dt
     ) dt
     WHERE Stats IS NOT NULL
     ORDER BY
     DatabaseName,
     TableName,
     ColumnName
     ;

     /*** Modified view dbc.ColumnStats.
     dbc.IndexStats and dbc.MultiColumnStats may be modified in a
     similar way ***/

     SELECT
     DatabaseName,
     TableName,
     ColumnName,
     ColumnType,
     ColumnLength,
     ColumnFormat,
     DecimalTotalDigits,
     DecimalFractionalDigits,
     Stats AS FieldStatistics,
     SeqNumber,

     /** collect stats date **/
     CollectDate,

     /** collect stats time **/
     CollectTime,

     /** V2R5: sample size used for collect stats**/
     CASE SampleSize WHEN 0 THEN 100 ELSE SampleSize END AS SampleSize,


     /** Row Count
     Estimated when SampleSize < 100
     **/
     (-1**(NumRowsw1 / 32768))
     * (2**((NumRowsw1 / 16 mod 2048) - 1023))
     * (1 + ((NumRowsw1 mod 16) * 2**-4) + (NumRowsw2 * 2**-20)
     + (NumRowsw3 * 2**-36) + (NumRowsw4 * 2**-52))  (decimal(18,0))
     AS NumRows,

     /** Distinct Values
     Estimated when SampleSize < 100
     **/
     (-1**(NumValuesw1 / 32768))
     * (2**((NumValuesw1 / 16 mod 2048) - 1023))
     * (1 + ((NumValuesw1 mod 16) * 2**-4) + (NumValuesw2 * 2**-20)
     + (NumValuesw3 * 2**-36) + (NumValuesw4 * 2**-52))
     (decimal(18,0))
     AS NumValues,

     /** Number of NULLs
     Estimated when SampleSize < 100
     **/
     (-1**(NumNullsw1 / 32768))
     * (2**((NumNullsw1 / 16 mod 2048) - 1023))
     * (1 + ((NumNullsw1 mod 16) * 2**-4) + (NumNullsw2 * 2**-20)
     + (NumNullsw3 * 2**-36) + (NumNullsw4 * 2**-52))
     (decimal(18,0))
     AS NumNulls,

     /** Maximum number of rows / value,
     Estimated when SampleSize < 100
     **/
     (-1**(ModeFreqw1 / 32768))
     * (2**((ModeFreqw1 / 16 mod 2048) - 1023))
     * (1 + ((ModeFreqw1 mod 16) * 2**-4) + (ModeFreqw2 * 2**-20)
     + (ModeFreqw3 * 2**-36) + (ModeFreqw4 * 2**-52))
     (decimal(18,0))
     AS ModeFreq
     FROM
     (
     SELECT
     DatabaseName,
     TableName,
     ColumnName,
     ColumnType,
     ColumnLength,
     ColumnFormat,
     DecimalTotalDigits,
     DecimalFractionalDigits,
     Stats,
     SeqNumber,
     (
     (HASHBUCKET
     (SUBSTR(Stats, 2, 1) ||
     SUBSTR(Stats, 1, 1) (BYTE(4))
     ) - 1900
     ) * 10000
     +
     (HASHBUCKET
     ('00'xb || SUBSTR(Stats, 3, 1) (BYTE(4))
     )
     ) * 100
     +
     (HASHBUCKET
     (
     '00'xb || SUBSTR(Stats, 4, 1) (BYTE(4))
     )
     )
     ) (DATE)
     AS CollectDate,

     (CAST(
     (HASHBUCKET
     (CAST('00'xb || SUBSTR(Stats, 5, 1) AS BYTE(4))
     ) (FORMAT '99:')
     ) ||
     (HASHBUCKET
     (CAST('00'xb || SUBSTR(Stats, 6, 1) AS BYTE(4))
     ) (FORMAT '99:')
     ) ||
     (HASHBUCKET
     (CAST('00'xb || SUBSTR(Stats, 7, 1) AS BYTE(4))
     ) (FORMAT '99.')
     ) ||
     (HASHBUCKET
     (CAST('00'xb || SUBSTR(Stats, 8, 1) AS BYTE(4))
     ) (FORMAT '99')
     ) AS TIME(2))
     ) AS CollectTime,

     HASHBUCKET
     ('00'xb || SUBSTR(Stats, 12, 1) (BYTE(4))) AS SampleSize,

     HASHBUCKET(SUBSTR(Stats, 12 + 8, 1)
     || SUBSTR(Stats, 12 + 7, 1) (BYTE(4))) AS NumNullsw1,
     HASHBUCKET(SUBSTR(Stats, 12 + 6, 1)
     || SUBSTR(Stats, 12 + 5, 1) (BYTE(4))) AS NumNullsw2,
     HASHBUCKET(SUBSTR(Stats, 12 + 4, 1)
     || SUBSTR(Stats, 12 + 3, 1) (BYTE(4))) AS NumNullsw3,
     HASHBUCKET(SUBSTR(Stats, 12 + 2, 1)
     || SUBSTR(Stats, 12 + 1, 1) (BYTE(4))) AS NumNullsw4,

     HASHBUCKET(SUBSTR(Stats, 40 + Offset + 8, 1)
     || SUBSTR(Stats, 40 + Offset + 7, 1) (BYTE(4))) AS
     ModeFreqw1,
     HASHBUCKET(SUBSTR(Stats, 40 + Offset + 6, 1)
     || SUBSTR(Stats, 40 + Offset + 5, 1) (BYTE(4))) AS
     ModeFreqw2,
     HASHBUCKET(SUBSTR(Stats, 40 + Offset + 4, 1)
     || SUBSTR(Stats, 40 + Offset + 3, 1) (BYTE(4))) AS
     ModeFreqw3,
     HASHBUCKET(SUBSTR(Stats, 40 + Offset + 2, 1)
     || SUBSTR(Stats, 40 + Offset + 1, 1) (BYTE(4))) AS
     ModeFreqw4,

     HASHBUCKET(SUBSTR(Stats, 48 + Offset + 8, 1)
     || SUBSTR(Stats, 48 + Offset + 7, 1) (BYTE(4))) AS
     NumValuesw1,
     HASHBUCKET(SUBSTR(Stats, 48 + Offset + 6, 1)
     || SUBSTR(Stats, 48 + Offset + 5, 1) (BYTE(4))) AS
     NumValuesw2,
     HASHBUCKET(SUBSTR(Stats, 48 + Offset + 4, 1)
     || SUBSTR(Stats, 48 + Offset + 3, 1) (BYTE(4))) AS
     NumValuesw3,
     HASHBUCKET(SUBSTR(Stats, 48 + Offset + 2, 1)
     || SUBSTR(Stats, 48 + Offset + 1, 1) (BYTE(4))) AS
     NumValuesw4,

     HASHBUCKET(SUBSTR(Stats, 56 + Offset + 8, 1)
     || SUBSTR(Stats, 56 + Offset + 7, 1) (BYTE(4))) AS
     NumRowsw1,
     HASHBUCKET(SUBSTR(Stats, 56 + Offset + 6, 1)
     || SUBSTR(Stats, 56 + Offset + 5, 1) (BYTE(4))) AS
     NumRowsw2,
     HASHBUCKET(SUBSTR(Stats, 56 + Offset + 4, 1)
     || SUBSTR(Stats, 56 + Offset + 3, 1) (BYTE(4))) AS
     NumRowsw3,
     HASHBUCKET(SUBSTR(Stats, 56 + Offset + 2, 1)
     || SUBSTR(Stats, 56 + Offset + 1, 1) (BYTE(4))) AS
     NumRowsw4

     FROM
     (
     SELECT
     d.databasename AS DatabaseName,
     t.tvmname AS TableName,
     c.fieldname AS ColumnName,
     c.FieldType AS ColumnType,
     c.MaxLength AS ColumnLength,
     c.FieldFormat AS ColumnFormat,
     c.TotalDigits AS DecimalTotalDigits,
     c.ImpliedPoint AS DecimalFractionalDigits,
     c.FieldStatistics AS Stats,
     c.FieldID AS SeqNumber,

     /** Floats are stored after the data and data may be 16 or 32
     BYTEs **/
     /** depending on BYTE[23]**/
     CASE
     WHEN SUBSTR(c.fieldStatistics, 23, 1) = '00'XB THEN 16
     ELSE 0
     END AS Offset

     FROM
     dbc.dbase d
     JOIN dbc.tvm t
     ON d.databaseid = t.databaseid
     JOIN dbc.tvfields c
     ON t.tvmid = c.tableid
     ) dt
     ) dt;


     
  <Prev Next>   <<First <Prev
Next>
Last>>
 
 
 
 
 
 
 
 
 
  
  Top Home Privacy Feedback  
 
 
Copyright for the TeradataForum (TDATA-L), Manta BlueSky    
Copyright 2016 - All Rights Reserved    
Last Modified: 28 Jun 2020