SQL Server - MSTVF

Multi-Statement Table Valued Functions (MSTVF)

MSTVFs can be useful, when the number of rows returned is about 100 or less.
Since SQL Server cannot estimate the returned rowcount correctly, it defaults
to a row estimate of 1 or 100 depending on the SQL Server version. Even if the MSTVF
returns a lot of rows, SQL Server opts for serial execution rather than parallel,
due to this underestimation problem.

Let's look at a simple example (SQL Server 2019 running on Compat Level 130)
I am not using interleaved execution or any new fancy features for this demo, which
are supposed to help improve performance.

Step 1 = Change Cost Threshold For Parallelism (CTFP)
WARNING - DO NOT RUN THIS CTFP configuration change ON ANY SERVER without knowing the 
consequences ****. What we're doing is reducing the Query Cost, at which SQL Server 
will decide to go for parallel execution. This is done for this demo, to ensure that 
SQL Server will go parallel at a lower cost than usual

EXEC sp_configure 'show advanced options', 1;

GO

RECONFIGURE

GO

EXEC sp_configure 'cost threshold for parallelism', 10;

GO

RECONFIGURE

GO


Step 2 - Drop Tables If Exists (if you've run this once before)

BEGIN

IF OBJECT_ID(N'dbo.MSTVF_OrderDetails', N'U')   IS NOT NULL DROP TABLE dbo.MSTVF_OrderDetails;

IF OBJECT_ID(N'dbo.MSTVF_Orders', N'U')     IS NOT NULL DROP TABLE dbo.MSTVF_Orders;

IF OBJECT_ID(N'dbo.MSTVF_Users', N'U')      IS NOT NULL DROP TABLE dbo.MSTVF_Users;

IF OBJECT_ID(N'dbo.MSTVF_Products', N'U')   IS NOT NULL DROP TABLE dbo.MSTVF_Products;

IF OBJECT_ID(N'dbo.MSTVF_Manufacturers', N'U')  IS NOT NULL DROP TABLE dbo.MSTVF_Manufacturers;

END


Step 3 = Create Tables

CREATE TABLE dbo.MSTVF_Manufacturers

(

    ManufacturerId              int not null identity PRIMARY KEY,

    ManufacturerName        varchar(100) not null

)


CREATE TABLE dbo.MSTVF_Products

(

    ProductId                       int not null identity PRIMARY KEY,

    ProductName                 varchar(200) not null,

    ManufacturerId              int not null,

    PricePerUnitUSD          money,

    UnitsInStock                  int not null

)

        

CREATE TABLE dbo.MSTVF_Users

(

    UserId                          int not null identity PRIMARY KEY,

    FirstName                   varchar(200) not null,

    LastName                    varchar(200),

    JoinDate                     date not null,

    ShippingAddress        varchar(1000)

)

        

CREATE TABLE dbo.MSTVF_Orders

(

    OrderId             int not null identity PRIMARY KEY,

    UserId              int not null,

    OrderTime           datetimeoffset not null,

    ProductCount        int not null,

    TotalPriceUSD       money,

    DiscountCode        varchar(100),

    MustShipByDate      datetimeoffset,

    ProcessingStatus    bit,

    ShipStatus          bit,

    ReturnStatus        bit

)

        

CREATE TABLE dbo.MSTVF_OrderDetails

(

    OrderDetailId       int not null identity PRIMARY KEY,

    OrderId             int not null,

    ProductId           int not null,

    Units               int not null,

    TotalPriceUSD       money,

    DiscountAmountUSD   money

)


Step 4 = Populate tables

SET NOCOUNT ON;

    

BEGIN

    -- Manufacturers (1000 rows)

    DECLARE @int_RowCount   integer         = 0,

            @tms_Start      datetimeoffset  = getDate();


    WHILE @int_RowCount < 1000 BEGIN

        INSERT INTO dbo.MSTVF_Manufacturers

                    (ManufacturerName)

        SELECT      TOP 100 Name

        FROM        sys.objects


        SET @int_RowCount = @int_RowCount + @@ROWCOUNT;

    END;


    PRINT CAST(@int_RowCount AS varchar(9)) + ' Manufacturers inserted in ' + 

        CONVERT(varchar(50), DATEDIFF(millisecond, @tms_Start, getDate())) + ' ms';

END

GO


BEGIN

    -- Products (10,000 rows)

    DECLARE @int_RowCount   integer         = 0,

            @tms_Start      datetimeoffset  = getDate();


    WHILE @int_RowCount < 10000 BEGIN

        INSERT INTO dbo.MSTVF_Products

                    (ProductName, ManufacturerId, PricePerUnitUSD, UnitsInStock)

        SELECT      TOP 1000 ManufacturerName, ManufacturerId, FLOOR(RAND()*(500)+1), 

                    FLOOR(RAND()*(60)+1)

        FROM        dbo.MSTVF_Manufacturers


        SET @int_RowCount = @int_RowCount + @@ROWCOUNT;

    END;


    PRINT CAST(@int_RowCount AS varchar(9)) + ' Products inserted in ' + 

            CONVERT(varchar(50), DATEDIFF(millisecond, @tms_Start, getDate())) + ' ms';

END

GO


BEGIN

    -- Users (10,000 rows)

    DECLARE @int_RowCount   integer         = 0,

            @tms_Start      datetimeoffset  = getDate();


    WHILE @int_RowCount < 10000 BEGIN

        INSERT INTO dbo.MSTVF_Users

                    (FirstName, LastName, JoinDate, ShippingAddress)

        SELECT      TOP 1000 ManufacturerName, REVERSE(ManufacturerName) AS LastName, 

                    GetDate(), NEWID()

        FROM        dbo.MSTVF_Manufacturers


        SET @int_RowCount = @int_RowCount + @@ROWCOUNT;

    END;


    PRINT CAST(@int_RowCount AS varchar(9)) + ' Users inserted in ' + 

        CONVERT(varchar(50), DATEDIFF(millisecond, @tms_Start, getDate())) + ' ms';

END

GO


BEGIN

    -- Orders (1 million rows)

    DECLARE @int_RowCount   integer         = 0,

            @tms_Start      datetimeoffset  = getDate();


    WHILE @int_RowCount < 1000000 BEGIN

        INSERT INTO dbo.MSTVF_Orders

                    (UserId, OrderTime, ProductCount, TotalPriceUSD, DiscountCode,

                    MustShipByDate, ProcessingStatus, ShipStatus, ReturnStatus)

        SELECT      TOP 10000 UserId, GetDate(), FLOOR(RAND()*(5)+1), 

                    FLOOR(RAND()*(100)+1), 'DC', GetDate(), 1, 0, 0

        FROM        dbo.MSTVF_Users


        SET @int_RowCount = @int_RowCount + @@ROWCOUNT;

    END;


    PRINT CAST(@int_RowCount AS varchar(9)) + ' Orders inserted in ' + 

        CONVERT(varchar(50), DATEDIFF(millisecond, @tms_Start, getDate())) + ' ms';

END

GO


BEGIN

    -- Order Details (3 million rows)

    DECLARE @int_RowCount   integer         = 0,

            @tms_Start      datetimeoffset  = getDate(),

            @int_i          integer         = 1;


    WHILE @int_i < 4 BEGIN

        INSERT INTO dbo.MSTVF_OrderDetails

                    (OrderId, ProductId, Units, TotalPriceUSD, DiscountAmountUSD)

        SELECT      OrderId, 

                    CASE 

                        WHEN OrderId BETWEEN       1 AND   10000 THEN OrderId

                        WHEN OrderId BETWEEN   10001 AND   99999 THEN ROUND(OrderId/10,  0)

                        WHEN OrderId BETWEEN  100000 AND  999999 THEN ROUND(OrderId/100,  0)

                        WHEN OrderId BETWEEN 1000000 AND 9999999 THEN ROUND(OrderId/1000, 0)

                    END AS ProductId,

                    5, 5, 5

        FROM        dbo.MSTVF_Orders


        SET @int_RowCount += @@ROWCOUNT;

        SET @int_i += 1;            

    END


    PRINT CAST(@int_RowCount AS varchar(9)) + ' Order Details inserted in ' + 

        CONVERT(varchar(50), DATEDIFF(millisecond, @tms_Start, getDate())) + ' ms';

END


Step 5 = Check rowcounts

SELECT     'MSTVF_Manufacturers' AS TableName, MIN(ManufacturerId) AS MinId, 

            MAX(ManufacturerId) AS MaxId, COUNT(*) AS RowCounts 

FROM        dbo.MSTVF_Manufacturers UNION ALL

SELECT     'MSTVF_Products' AS TableName, MIN(ProductId) AS MinId, 

            MAX(ProductId) AS MaxId, COUNT(*) AS RowCounts 

FROM        dbo.MSTVF_Products UNION ALL

SELECT     'MSTVF_Users' AS TableName, MIN(UserId) AS MinId, 

            MAX(UserId) AS MaxId, COUNT(*) AS RowCounts 

FROM        dbo.MSTVF_Users UNION ALL

SELECT     'MSTVF_Orders' AS TableName, MIN(OrderId) AS MinId, 

            MAX(OrderId) AS MaxId, COUNT(*) AS RowCounts 

FROM        dbo.MSTVF_Orders UNION ALL

SELECT     'MSTVF_OrderDetails' AS TableName, MIN(OrderDetailId) AS MinId, 

            MAX(OrderDetailId) AS MaxId, COUNT(*) AS RowCounts 

FROM        dbo.MSTVF_OrderDetails;


Step 6 = Add Non-Clustered Indexes to help with Foreign Keys and Queries

BEGIN

    CREATE NONCLUSTERED INDEX Prods_MfrId ON dbo.MSTVF_Products (ManufacturerId);

    CREATE NONCLUSTERED INDEX Ord_UserId ON dbo.MSTVF_Orders (UserId);

    CREATE NONCLUSTERED INDEX OrdDetails_OrderId ON dbo.MSTVF_OrderDetails (OrderId);

    CREATE NONCLUSTERED INDEX MSTVFOrdDet_ProdId_Includes ON 

                              dbo.MSTVF_OrderDetails (ProductId) INCLUDE (OrderId);

END


Step 7 = Add Foreign Keys (optional for this demo)

BEGIN

    ALTER TABLE dbo.MSTVF_Products 

    ADD CONSTRAINT  FK_Prod_Mfr FOREIGN KEY (ManufacturerId) 

    REFERENCES  dbo.MSTVF_Manufacturers (ManufacturerId)

    ON UPDATE   CASCADE


    ALTER TABLE dbo.MSTVF_Orders 

    ADD CONSTRAINT  FK_Ord_User FOREIGN KEY (UserId) 

    REFERENCES  dbo.MSTVF_Users (UserId)

    ON UPDATE   CASCADE


    ALTER TABLE dbo.MSTVF_OrderDetails 

    ADD CONSTRAINT  FK_OrdDet_Order FOREIGN KEY (OrderId) 

    REFERENCES  dbo.MSTVF_Orders (OrderId)

    ON UPDATE   CASCADE


    ALTER TABLE dbo.MSTVF_OrderDetails 

    ADD CONSTRAINT  FK_OrdDet_Product FOREIGN KEY (ProductId) 

    REFERENCES  dbo.MSTVF_Products (ProductId)

    ON UPDATE   CASCADE

END;


Step 8 = Create a MSTVF

CREATE OR ALTER FUNCTION dbo.fn_GetOrderDetails

(

    @int_StartProductId int,

    @int_EndProductId int

)

RETURNS @tab_Output TABLE (OrderDetailId int, OrderId int, ProductId int)

AS

BEGIN

    -- Do Some Stuff to make it a MSTVF

    DECLARE @int_X int;

    

    SET @int_X = 0;

    SET @int_X = @int_X + 1;


    -- Get data into Table Var

    INSERT INTO @tab_Output

    SELECT      OrderDetailId, OrderId, ProductId

    FROM        dbo.MSTVF_OrderDetails

    WHERE       (ProductId BETWEEN @int_StartProductId AND @int_EndProductId);


    RETURN;


END;


Step 9 = Turn on Actual Execution Plan and Statistics IO ON

SET STATISTICS IO ON


Step 10 = Try Direct SQL (TURN ON ACTUAL EXECUTION PLAN)

BEGIN

    DBCC FREEPROCCACHE WITH no_infomsgs

    DBCC DROPCLEANBUFFERS WITH no_infomsgs

                

    DECLARE @tms_Start          datetimeoffset = getdate(),

            @int_StartProductId int = 1000,

            @int_EndProductId   int = 1300;


    SELECT      OD.OrderDetailId, O.OrderId, U.UserId, U.FirstName, U.LastName, O.OrderTime,

                P.ProductId, P.ProductName,

                M.ManufacturerId, M.ManufacturerName

    FROM        dbo.MSTVF_OrderDetails OD 

    JOIN        dbo.MSTVF_Orders O ON OD.OrderId = O.OrderId 

    JOIN        dbo.MSTVF_Users U ON O.UserId = U.UserId  

    JOIN        dbo.MSTVF_Products P ON OD.ProductId = P.ProductId 

    JOIN        dbo.MSTVF_Manufacturers M ON P.ManufacturerId = M.ManufacturerId

    WHERE       OD.ProductId BETWEEN @int_StartProductId AND @int_EndProductId

    ORDER BY    O.OrderId, OrderDetailId;


    PRINT 'Milliseconds Taken for Direct SQL = ' + 

        CONVERT(varchar(50), DATEDIFF(millisecond, @tms_Start, GetDate()));

END


What just happened?
    Table 'MSTVF_Users'. logical reads 373
    Table 'MSTVF_Manufacturers'. logical reads 19
    Table 'MSTVF_Products'. logical reads 10
    Table 'MSTVF_OrderDetails'. logical reads 238
    Table 'MSTVF_Orders'. logical reads 8932
    Milliseconds Taken for Direct SQL = 1263

Degree of Parallelism = Max DOP (4 in my case since I am running this on my laptop)


Step 11 = Try the MSTVF

BEGIN

    DBCC FREEPROCCACHE WITH no_infomsgs

    DBCC DROPCLEANBUFFERS WITH no_infomsgs

        

    DECLARE @tms_Start          datetimeoffset = getdate(),

            @int_StartProductId int = 1000,

            @int_EndProductId   int = 1300;


    SELECT      OD.OrderDetailId, 

                O.OrderId, O.OrderTime,

                U.FirstName, U.LastName, 

                P.ProductId, P.ProductName,

                M.ManufacturerId, M.ManufacturerName

    FROM        dbo.fn_GetOrderDetails (@int_StartProductId, @int_EndProductId) OD 

    JOIN        dbo.MSTVF_Orders O ON OD.OrderId = O.OrderId 

    JOIN        dbo.MSTVF_Users U ON O.UserId = U.UserId 

    JOIN        dbo.MSTVF_Products P ON OD.ProductId = P.ProductId 

    JOIN        dbo.MSTVF_Manufacturers M ON P.ManufacturerId = M.ManufacturerId

    ORDER BY    O.OrderId, OD.OrderDetailId;


    PRINT 'Milliseconds Taken for Direct SQL = ' + 

        CONVERT(varchar(50), DATEDIFF(millisecond, @tms_Start, GetDate()));

END


What just happened? A Lot of Logical Reads happened!
    Table 'MSTVF_Users'. logical reads 207171
    Table 'MSTVF_Orders'. logical reads 307575
    Table 'MSTVF_Manufacturers'. logical reads 200466
    Table 'MSTVF_Products'. logical reads 207159
    Milliseconds Taken for MSTVF = 1264

Degree of Parallelism = 1 😓


Comments

Popular posts from this blog

SQL Server - Scalar UDFs vs Inline

C# App to compare 2 SQL Server Stored Procedure Outputs

Database 101 - The Art of Gathering Requirements