Tôi đã xây dựng một quy trình được lưu trữ để phân tích bảng nguồn hoặc truy vấn với một số đặc điểm cho mỗi cột trong đó độ dài tối thiểu (min_len) và độ dài tối đa (max_len).
CREATE PROCEDURE [dbo].[sp_analysetable] (
@tableName varchar(8000),
@deep bit = 0
) AS
/*
sp_analysetable 'company'
sp_analysetable 'select * from company where name is not null'
*/
DECLARE @intErrorCode INT, @errorMSG VARCHAR(500), @tmpQ NVARCHAR(2000), @column_name VARCHAR(50), @isQuery bit
SET @intErrorCode=0
IF OBJECT_ID('tempdb..##tmpTableToAnalyse') IS NOT NULL BEGIN
DROP TABLE ##tmpTableToAnalyse
END
IF OBJECT_ID('tempdb..##tmpColumns') IS NOT NULL BEGIN
DROP TABLE ##tmpColumns
END
if CHARINDEX('from', @tableName)>0
set @isQuery=1
IF @intErrorCode=0 BEGIN
if @isQuery=1 begin
--set @tableName = 'USE '+@db+';'+replace(@tableName, 'from', 'into ##tmpTableToAnalyse from')
--replace only first occurance. Now multiple froms may exists, but first from will be replaced with into .. from
set @tableName=Stuff(@tableName, CharIndex('from', @tableName), Len('from'), 'into ##tmpTableToAnalyse from')
exec(@tableName)
IF OBJECT_ID('tempdb..##tmpTableToAnalyse') IS NULL BEGIN
set @intErrorCode=1
SET @errorMSG='Error generating temporary table from query.'
end
else begin
set @tableName='##tmpTableToAnalyse'
end
end
end
IF @intErrorCode=0 BEGIN
SET @tmpQ='USE '+DB_NAME()+';'+CHAR(13)+CHAR(10)+'
select
c.column_name as [column],
cast(sp.value as varchar(1000)) as description,
tc_fk.constraint_type,
kcu_pk.table_name as fk_table,
kcu_pk.column_name as fk_column,
c.ordinal_position as pos,
c.column_default as [default],
c.is_nullable as [null],
c.data_type,
c.character_maximum_length as length,
c.numeric_precision as [precision],
c.numeric_precision_radix as radix,
cast(null as bit) as [is_unique],
cast(null as int) as min_len,
cast(null as int) as max_len,
cast(null as int) as nulls,
cast(null as int) as blanks,
cast(null as int) as numerics,
cast(null as int) as distincts,
cast(null as varchar(500)) as distinct_values,
cast(null as varchar(50)) as remarks
into ##tmpColumns'
if @isQuery=1 begin
SET @tmpQ=@tmpQ+' from tempdb.information_schema.columns c, (select null as value) sp'
end
else begin
SET @tmpQ=@tmpQ+'
from information_schema.columns c
left join sysobjects so on so.name=c.table_name and so.xtype=''U''
left join syscolumns sc on sc.name=c.column_name and sc.id =so.id
left join sys.extended_properties sp on sp.minor_id = sc.colid AND sp.major_id = sc.id and sp.name=''MS_Description''
left join information_schema.key_column_usage kcu_fk on kcu_fk.table_name = c.table_name and c.column_name = kcu_fk.column_name
left join information_schema.table_constraints tc_fk on kcu_fk.table_name = tc_fk.table_name and kcu_fk.constraint_name = tc_fk.constraint_name
left join information_schema.referential_constraints rc on rc.constraint_name = kcu_fk.constraint_name
left join information_schema.table_constraints tc_pk on rc.unique_constraint_name = tc_pk.constraint_name
left join information_schema.key_column_usage kcu_pk on tc_pk.constraint_name = kcu_pk.constraint_name
'
end
SET @tmpQ=@tmpQ+' where c.table_name = '''+@tableName+''''
exec(@tmpQ)
end
IF @intErrorCode=0 AND @deep = 1 BEGIN
DECLARE
@count_rows int,
@count_distinct int,
@count_nulls int,
@count_blanks int,
@count_numerics int,
@min_len int,
@max_len int,
@distinct_values varchar(500)
DECLARE curTmp CURSOR LOCAL FAST_FORWARD FOR
select [column] from ##tmpColumns;
OPEN curTmp
FETCH NEXT FROM curTmp INTO @column_name
WHILE @@FETCH_STATUS = 0 and @intErrorCode=0 BEGIN
set @tmpQ = 'USE '+DB_NAME()+'; SELECT'+
' @count_rows=count(0), '+char(13)+char(10)+
' @count_distinct=count(distinct ['+@column_name+']),'+char(13)+char(10)+
' @count_nulls=sum(case when ['+@column_name+'] is null then 1 else 0 end),'+char(13)+char(10)+
' @count_blanks=sum(case when ltrim(['+@column_name+'])='''' then 1 else 0 end),'+char(13)+char(10)+
' @count_numerics=sum(isnumeric(['+@column_name+'])),'+char(13)+char(10)+
' @min_len=min(len(['+@column_name+'])),'+char(13)+char(10)+
' @max_len=max(len(['+@column_name+']))'+char(13)+char(10)+
' from ['+@tableName+']'
exec sp_executesql @tmpQ,
N'@count_rows int OUTPUT,
@count_distinct int OUTPUT,
@count_nulls int OUTPUT,
@count_blanks int OUTPUT,
@count_numerics int OUTPUT,
@min_len int OUTPUT,
@max_len int OUTPUT',
@count_rows OUTPUT,
@count_distinct OUTPUT,
@count_nulls OUTPUT,
@count_blanks OUTPUT,
@count_numerics OUTPUT,
@min_len OUTPUT,
@max_len OUTPUT
IF (@count_distinct>10) BEGIN
SET @distinct_values='Many ('+cast(@count_distinct as varchar)+')'
END ELSE BEGIN
set @distinct_values=null
set @tmpQ = N'USE '+DB_NAME()+';'+
' select @distinct_values=COALESCE(@distinct_values+'',''+cast(['+@column_name+'] as varchar), cast(['+@column_name+'] as varchar))'+char(13)+char(10)+
' from ('+char(13)+char(10)+
' select distinct ['+@column_name+'] from ['+@tableName+'] where ['+@column_name+'] is not null) a'+char(13)+char(10)
exec sp_executesql @tmpQ,
N'@distinct_values varchar(500) OUTPUT',
@distinct_values OUTPUT
END
UPDATE ##tmpColumns SET
is_unique =case when @count_rows=@count_distinct then 1 else 0 end,
distincts =@count_distinct,
nulls =@count_nulls,
blanks =@count_blanks,
numerics =@count_numerics,
min_len =@min_len,
max_len =@max_len,
distinct_values=@distinct_values,
remarks =
case when @count_rows=@count_nulls then 'all null,' else '' end+
case when @count_rows=@count_distinct then 'unique,' else '' end+
case when @count_distinct=0 then 'empty,' else '' end+
case when @min_len=@max_len then 'same length,' else '' end+
case when @count_rows=@count_numerics then 'all numeric,' else '' end
WHERE [column]=@column_name
FETCH NEXT FROM curTmp INTO @column_name
END
CLOSE curTmp DEALLOCATE curTmp
END
IF @intErrorCode=0 BEGIN
select * from ##tmpColumns order by pos
end
IF @intErrorCode=0 BEGIN --Clean up temporary tables
IF OBJECT_ID('tempdb..##tmpTableToAnalyse') IS NOT NULL BEGIN
DROP TABLE ##tmpTableToAnalyse
END
IF OBJECT_ID('tempdb..##tmpColumns') IS NOT NULL BEGIN
DROP TABLE ##tmpColumns
END
end
IF @intErrorCode<>0 BEGIN
RAISERROR(@errorMSG, 12, 1)
END
RETURN @intErrorCode
Tôi lưu trữ thủ tục này trong cơ sở dữ liệu chủ để tôi có thể sử dụng nó trong mọi cơ sở dữ liệu như vậy:
sp_analysetable 'table_name', 1
// deep=1 for doing value analyses
Và đầu ra là:
column description constraint_type fk_table fk_column pos default null data_type length precision radix is_unique min_len max_len nulls blanks numerics distincts distinct_values remarks
id_individual NULL PRIMARY KEY NULL NULL 1 NULL NO int NULL 10 10 1 1 2 0 0 70 70 Many (70) unique,all numeric,
id_brand NULL NULL NULL NULL 2 NULL NO int NULL 10 10 0 1 1 0 0 70 2 2,3 same length,all numeric,
guid NULL NULL NULL NULL 3 (newid()) NO uniqueidentifier NULL NULL NULL 1 36 36 0 0 0 70 Many (70) unique,same length,
customer_id NULL NULL NULL NULL 4 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
email NULL NULL NULL NULL 5 NULL YES varchar 100 NULL NULL 0 4 36 0 0 0 31 Many (31)
mobile NULL NULL NULL NULL 6 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
initials NULL NULL NULL NULL 7 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
title_short NULL NULL NULL NULL 8 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
title_long NULL NULL NULL NULL 9 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
firstname NULL NULL NULL NULL 10 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
lastname NULL NULL NULL NULL 11 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
address NULL NULL NULL NULL 12 NULL YES varchar 100 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
pc NULL NULL NULL NULL 13 NULL YES varchar 10 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
kixcode NULL NULL NULL NULL 14 NULL YES varchar 20 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
date_created NULL NULL NULL NULL 15 (getdate()) NO datetime NULL NULL NULL 1 19 19 0 0 0 70 Many (70) unique,same length,
created_by NULL NULL NULL NULL 16 (user_name()) NO varchar 50 NULL NULL 0 13 13 0 0 0 1 loyalz-public same length,
id_location_created NULL FOREIGN KEY location id_location 17 NULL YES int NULL 10 10 0 1 1 0 0 70 2 1,2 same length,all numeric,
id_individual_type NULL FOREIGN KEY individual_type id_individual_type 18 NULL YES int NULL 10 10 0 NULL NULL 70 0 0 0 NULL all null,empty,
optin NULL NULL NULL NULL 19 NULL YES int NULL 10 10 0 1 1 39 0 31 2 0,1 same length,