Execute Fisher’s exact test with T-SQL

Fisher’s probability is function depend on cut-off value. In this article, you could draw scatter plot ROC curve and get Fisher’s probability with query below. In ROC curve, you would plot false positive rate on horizontal axis and sensitivity on vertical axis. You could plot cut-off value on horizontal axis and Fisher’s probability on vertical axis. You could get best cut-off value with minimum probability in line graph if minimum probability would be smaller than 0.05.

CREATE TABLE [dbo].[T_DATA]
    (    ID       nchar(8)         NOT NULL, PRIMARY KEY
    ,    Test     decimal(4, 2)    NOT NULL
    ,    Outcome  nchar(1)         NOT NULL
    );
GO
ALTER TABLE T_DATA ADD CONSTRAINT CK_Outcome CHECK (Outcome = '0' OR Outcome = '1');
GO

Run the code below to create function, converting from factorial to sum of logarithm.

CREATE FUNCTION [dbo].[LOG_FACT](
    @SrcNumber FLOAT
)
RETURNS FLOAT
BEGIN
    DECLARE @DesNumber   FLOAT
    SET @DesNumber = LOG(1)
    WHILE @SrcNumber > 0
    BEGIN
        SET @DesNumber = @DesNumber + LOG(@SrcNumber)
        SET @SrcNumber = @SrcNumber - 1
    END
    RETURN @DesNumber
END
GO

Run the code below to create stored procedure that creates cross table from T_DATA.

CREATE PROCEDURE [dbo].[sp_Cut_by_Test]
    @CutOff  decimal(4, 2)
AS
BEGIN
WITH Cross_Table AS
(
  SELECT COUNT(*) AS 'N'
    ,    SUM(CASE WHEN T_DATA.Test <= @CutOff AND T_DATA.Outcome = '1' THEN 1 ELSE 0 END) AS 'a'
    ,    SUM(CASE WHEN T_DATA.Test <= @CutOff AND T_DATA.Outcome = '0' THEN 1 ELSE 0 END) AS 'b'
    ,    SUM(CASE WHEN T_DATA.Test >  @CutOff AND T_DATA.Outcome = '1' THEN 1 ELSE 0 END) AS 'c'
    ,    SUM(CASE WHEN T_DATA.Test >  @CutOff AND T_DATA.Outcome = '0' THEN 1 ELSE 0 END) AS 'd'
    ,    SUM(CASE WHEN                            T_DATA.Outcome = '1' THEN 1 ELSE 0 END) AS 'a+c'
    ,    SUM(CASE WHEN                            T_DATA.Outcome = '0' THEN 1 ELSE 0 END) AS 'b+d'
    ,    SUM(CASE WHEN T_DATA.Test <= @CutOff                          THEN 1 ELSE 0 END) AS 'a+b'
    ,    SUM(CASE WHEN T_DATA.Test >  @CutOff                          THEN 1 ELSE 0 END) AS 'c+d'
    FROM T_DATA
)
  SELECT @CutOff
    ,    Cross_Table.[N]
    ,    Cross_Table.[a]
    ,    Cross_Table.[b]
    ,    Cross_Table.
    ,    Cross_Table.[d]
    ,    Cross_Table.[a+c]
    ,    Cross_Table.[b+d]
    ,    Cross_Table.[a+b]
    ,    Cross_Table.
    ,    Cross_Table.[a]/Cross_Table.[a+c] AS 'Sensitivity'
    ,    Cross_Table.[d]/Cross_Table.[b+d] AS 'Specificity'
    ,    1 - Cross_Table.[d]/Cross_Table.[b+d] AS 'FalsePositive'
    FROM Cross_Table;
END
GO

Run the code below to create stored procedure that calculates Fisher’s probability. @Start means minimum value of test, @End means maximum value of test and @Step means step value of test from @Start to @End. For example, @Start 2.0, @End 4.0 and @Step 0.1, respectively.

CREATE PROCEDURE [dbo].[FisherExactTest]
    (    @Start  decimal(4, 2)
    ,    @End    decimal(4, 2)
    ,    @Step   decimal(4, 2)
    )
AS
BEGIN
    CREATE TABLE #Result
    (    [CutOff]    decimal(4, 2)    NOT NULL
    ,    N    int    NOT NULL
    ,    a    int    NOT NULL
    ,    b    int    NOT NULL
    ,    c    int    NOT NULL
    ,    d    int    NOT NULL
    ,    [a+c]    int    NOT NULL
    ,    [b+d]    int    NOT NULL
    ,    [a+b]    int    NOT NULL
    ,        int    NOT NULL
    ,    Sensitivity    FLOAT    NOT NULL
    ,    Specificity    FLOAT    NOT NULL
    ,    FalsePositive    FLOAT    NOT NULL
    )
    DECLARE @CutOff  decimal(4, 2)
    SET @CutOff = @Start
    WHILE @CutOff <= @End
    BEGIN
       INSERT INTO #Result EXEC sp_Cut_by_Test @CutOff
       SET @CutOff = @CutOff + @Step
    END
  SELECT CutOff
    ,    N
    ,    a
    ,    b
    ,    c
    ,    d
    ,    [a+c]
    ,    [b+d]
    ,    [a+b]
    ,    [c+d]
    ,    Sensitivity
    ,    Specificity
    ,    FalsePositive
    ,    EXP(dbo.LOG_FACT([a+b])
         +   dbo.LOG_FACT([c+d])
         +   dbo.LOG_FACT([a+c])
         +   dbo.LOG_FACT([b+d])
         -   dbo.LOG_FACT(N)
         -   dbo.LOG_FACT(CASE WHEN a = 0 THEN 1 ELSE a END)
         -   dbo.LOG_FACT(CASE WHEN b = 0 THEN 1 ELSE b END)
         -   dbo.LOG_FACT(CASE WHEN c = 0 THEN 1 ELSE c END)
         -   dbo.LOG_FACT(CASE WHEN d = 0 THEN 1 ELSE d END)) AS [FisherExact_Test]
    FROM #Result
   ORDER BY CutOff
END

References;
How to calculate Fisher’s exact test with logarithm?
How to calculate four numbers from marginal total in cross tabulation?
Stored procedure to calculate factorial with natural logarithm

T-SQLでFisherの直接確率検定を行う

 Fisher の確率はカットオフ値の関数であると以前書きました.今回は下記のクエリで ROC 曲線を描き,Fisherの直接確率検定を行います.ROC 曲線では横軸に偽陽性率を取り,縦軸に感度を取ります.カットオフ値を横軸に取り,Fisher の確率を縦軸にグラフを描くと,グラフの最小値に該当するカットオフ値が求めるものとなります.ただし,p < 0.05 であることが条件です.

CREATE TABLE [dbo].[T_DATA]
    (    ID       nchar(8)         NOT NULL, PRIMARY KEY
    ,    Test     decimal(4, 2)    NOT NULL
    ,    Outcome  nchar(1)         NOT NULL
    );
GO
ALTER TABLE T_DATA ADD CONSTRAINT CK_Outcome CHECK (Outcome = '0' OR Outcome = '1');
GO

 下記の関数は階乗を対数の和に変換します.

CREATE FUNCTION [dbo].[LOG_FACT](
    @SrcNumber FLOAT
)
RETURNS FLOAT
BEGIN
    DECLARE @DesNumber   FLOAT
    SET @DesNumber = LOG(1)
    WHILE @SrcNumber > 0
    BEGIN
        SET @DesNumber = @DesNumber + LOG(@SrcNumber)
        SET @SrcNumber = @SrcNumber - 1
    END
    RETURN @DesNumber
END
GO

 下記のストアドプロシージャはカットオフ値を変数化して T_DATA からクロス表を作成します.

CREATE PROCEDURE [dbo].[sp_Cut_by_Test]
    @CutOff  decimal(4, 2)
AS
BEGIN
WITH Cross_Table AS
(
  SELECT COUNT(*) AS 'N'
    ,    SUM(CASE WHEN T_DATA.Test <= @CutOff AND T_DATA.Outcome = '1' THEN 1 ELSE 0 END) AS 'a'
    ,    SUM(CASE WHEN T_DATA.Test <= @CutOff AND T_DATA.Outcome = '0' THEN 1 ELSE 0 END) AS 'b'
    ,    SUM(CASE WHEN T_DATA.Test >  @CutOff AND T_DATA.Outcome = '1' THEN 1 ELSE 0 END) AS 'c'
    ,    SUM(CASE WHEN T_DATA.Test >  @CutOff AND T_DATA.Outcome = '0' THEN 1 ELSE 0 END) AS 'd'
    ,    SUM(CASE WHEN                            T_DATA.Outcome = '1' THEN 1 ELSE 0 END) AS 'a+c'
    ,    SUM(CASE WHEN                            T_DATA.Outcome = '0' THEN 1 ELSE 0 END) AS 'b+d'
    ,    SUM(CASE WHEN T_DATA.Test <= @CutOff                          THEN 1 ELSE 0 END) AS 'a+b'
    ,    SUM(CASE WHEN T_DATA.Test >  @CutOff                          THEN 1 ELSE 0 END) AS 'c+d'
    FROM T_DATA
)
  SELECT @CutOff
    ,    Cross_Table.[N]
    ,    Cross_Table.[a]
    ,    Cross_Table.[b]
    ,    Cross_Table.
    ,    Cross_Table.[d]
    ,    Cross_Table.[a+c]
    ,    Cross_Table.[b+d]
    ,    Cross_Table.[a+b]
    ,    Cross_Table.
    ,    Cross_Table.[a]/Cross_Table.[a+c] AS 'Sensitivity'
    ,    Cross_Table.[d]/Cross_Table.[b+d] AS 'Specificity'
    ,    1 - Cross_Table.[d]/Cross_Table.[b+d] AS 'FalsePositive'
    FROM Cross_Table;
END
GO

 下記のストアドプロシージャはクロス表から Fisher の確率を求めます.@Start とは test の最小値であり, @End は test の最大値, @Step は @Start から @End までの増分のことです.例えばそれぞれ @Start を 2.0 とし,@End を 4.0 とし,@Step を 0.1 とするなどです.

CREATE PROCEDURE [dbo].[FisherExactTest]
    (    @Start  decimal(4, 2)
    ,    @End    decimal(4, 2)
    ,    @Step   decimal(4, 2)
    )
AS
BEGIN
    CREATE TABLE #Result
    (    [CutOff]    decimal(4, 2)    NOT NULL
    ,    N    int    NOT NULL
    ,    a    int    NOT NULL
    ,    b    int    NOT NULL
    ,    c    int    NOT NULL
    ,    d    int    NOT NULL
    ,    [a+c]    int    NOT NULL
    ,    [b+d]    int    NOT NULL
    ,    [a+b]    int    NOT NULL
    ,        int    NOT NULL
    ,    Sensitivity    FLOAT    NOT NULL
    ,    Specificity    FLOAT    NOT NULL
    ,    FalsePositive    FLOAT    NOT NULL
    )
    DECLARE @CutOff  decimal(4, 2)
    SET @CutOff = @Start
    WHILE @CutOff <= @End
    BEGIN
       INSERT INTO #Result EXEC sp_Cut_by_Test @CutOff
       SET @CutOff = @CutOff + @Step
    END
  SELECT CutOff
    ,    N
    ,    a
    ,    b
    ,    c
    ,    d
    ,    [a+c]
    ,    [b+d]
    ,    [a+b]
    ,    [c+d]
    ,    Sensitivity
    ,    Specificity
    ,    FalsePositive
    ,    EXP(dbo.LOG_FACT([a+b])
         +   dbo.LOG_FACT([c+d])
         +   dbo.LOG_FACT([a+c])
         +   dbo.LOG_FACT([b+d])
         -   dbo.LOG_FACT(N)
         -   dbo.LOG_FACT(CASE WHEN a = 0 THEN 1 ELSE a END)
         -   dbo.LOG_FACT(CASE WHEN b = 0 THEN 1 ELSE b END)
         -   dbo.LOG_FACT(CASE WHEN c = 0 THEN 1 ELSE c END)
         -   dbo.LOG_FACT(CASE WHEN d = 0 THEN 1 ELSE d END)) AS [FisherExact_Test]
    FROM #Result
   ORDER BY CutOff
END

参照記事
対数を用いてFisherの直接確率検定を計算するには
周辺度数からクロス表を作成するには
対数により階乗を計算するストアドプロシージャを作成する