My previous post explored a stored procedure that extended procedure_analyse with the intent of helping DBAs optimize table structure.
Here's an improved version. I've followed Arjen Lentz's suggestion and added support for the max_elements and max_memory parameters.
I also added a new Indexed column to the output, which is an ENUM('No','Yes','Overindexed'). Yes and No are self-explanatory, while Overindexed means the column is present as the left-most part of more than one index. This is useless, it just presents a performance penalty for MySQL (it needs to update more indexes) and if, for instance, you have columns A and B, and you have KEY(A) and KEY (A,B), mysql can use the second index to search for A alone too.
Here's the updated version:
/* extended procedure analyse (C) 2009 Fernando Ipar mail(at)fernandoipar.com GPLv2 */ drop procedure if exists extended_procedure_analyse; delimiter // create procedure extended_procedure_analyse(databaseName varchar(64), tableName varchar(64), max_elements int, max_memory int) begin drop temporary table if exists procedure_analyse_output; drop temporary table if exists tmp_pao; create temporary table procedure_analyse_output ( Field_name varchar(64), Min_value int, Max_value int, Min_length int, Max_length int, Empties_or_zeros int, Nulls int, Avg_value_or_avg_length float, Std float, Optimal_fieldtype text, Actual_fieldtype text, Indexed enum ('No','Yes','Overindexed') default 'No' ); set @table = concat(databaseName,'.',tableName); set @dbName = databaseName; set @tbName = tableName; set @maxEle = max_elements; set @maxMem = max_memory; set @qry = concat('insert into procedure_analyse_output (Field_name,Min_value,Max_value,Min_length,Max_length,Empties_or_zeros,Nulls,Avg_Value_or_avg_length,Std,Optimal_fieldtype) select * from ', @table,' procedure analyse(',@maxEle,',',@maxMem,')'); prepare myStmt from @qry; execute myStmt; update procedure_analyse_output set Field_name = replace(Field_name, CONCAT(databaseName,'.',tableName,'.'),''); prepare myStmt from 'update procedure_analyse_output pao, information_schema.columns c set pao.Actual_Fieldtype = c.column_type where table_schema = ? and table_name = ? and column_name = pao.Field_name'; execute myStmt using @dbName,@tbName; set @qry = concat('select count(*) as `Total_number_of_rows` from ',@table); prepare myStmt from @qry; execute myStmt; set @qry = concat('update procedure_analyse_output pao set Indexed = "Yes\" where exists (select 1 from information_schema.statistics where table_schema = ? and table_name = ? and column_name = pao.Field_name)'); prepare myStmt from @qry; execute myStmt using @dbName,@tbName; create temporary table tmp_pao as select * from procedure_analyse_output; prepare myStmt from 'update tmp_pao set Indexed = "Overindexed" where exists (select Field_name,count(*) from procedure_analyse_output pao inner join information_schema.statistics s on pao.Field_name = s.column_name where table_schema = ? and table_name = ? and seq_in_index = 1 and pao.Field_name = tmp_pao.Field_name group by Field_name having count(*) > 1)'; execute myStmt using @dbName,@tbName; select * from tmp_pao; drop temporary table procedure_analyse_output; drop temporary table tmp_pao; end; // delimiter ;
And here's a sample output:
mysql> call extended_procedure_analyse('test','Account',4,100)\G *************************** 1. row *************************** Total_number_of_rows: 1 1 row in set (0.04 sec) *************************** 1. row *************************** Field_name: InternalAID Min_value: 2147483647 Max_value: 2147483647 Min_length: 18 Max_length: 18 Empties_or_zeros: 0 Nulls: 0 Avg_value_or_avg_length: 1.65632e+17 Std: 0 Optimal_fieldtype: BIGINT(18) UNSIGNED NOT NULL Actual_fieldtype: bigint(20) Indexed: Yes *************************** 2. row *************************** Field_name: accountID Min_value: 12 Max_value: 12 Min_length: 2 Max_length: 2 Empties_or_zeros: 0 Nulls: 0 Avg_value_or_avg_length: 2 Std: NULL Optimal_fieldtype: ENUM('12') NOT NULL Actual_fieldtype: varchar(255) Indexed: Overindexed *************************** 3. row *************************** Field_name: acctBalance Min_value: 2147483647 Max_value: 2147483647 Min_length: 11 Max_length: 11 Empties_or_zeros: 0 Nulls: 0 Avg_value_or_avg_length: 1.00051e+20 Std: 0 Optimal_fieldtype: BIGINT(11) UNSIGNED NOT NULL Actual_fieldtype: double Indexed: Yes *************************** 4. row *************************** Field_name: ownerID Min_value: 2147483647 Max_value: 2147483647 Min_length: 19 Max_length: 19 Empties_or_zeros: 0 Nulls: 0 Avg_value_or_avg_length: 6.57541e+18 Std: 0 Optimal_fieldtype: BIGINT(19) UNSIGNED NOT NULL Actual_fieldtype: bigint(20) Indexed: No 4 rows in set (0.24 sec) Query OK, 0 rows affected, 8 warnings (0.24 sec)
There's a hg repository to handle the project. We're working with @fedesilva to create a standalone java app that will present this and more info in a friendly manner to sysadmins, and to handle the creation and destruction of the stored procedure automatically.