﻿:Namespace Operators
⍝ === VARIABLES ===

TablEj←27 7⍴({⎕io←0⋄0 0 0.005 0.01 0.025 0.05 1 5,(4/¯1),1 16 6,(3/¯1),1 2 22 7 ¯1 ¯1 0 2 4 29 8 ¯1 0 2 4 6 37 9 ¯1 2 3 6 8 46 10 ¯1 3 5 8 11 56 11 ¯1 5 7 11 14 67 12 ¯1 7 10 14 17 79 13 ¯1 10 13 17 21 92 14 ¯1 13 16 21 26 106 15 ¯1 16 20 25 30 121 16 ¯1 19 24 30 36 137 17 ¯1 23 28 35 41 154 18 ¯1 28 33 40 47 172 19 ¯1 32 38 46 54 191 20 ¯1 37 43 52 60 211 21 ¯1 43 49 59 68 232 22 ¯1 49 56 66 75 254 23 ¯1 55 62 73 83 277 24 ¯1 61 69 81 92 301 25 ¯1 68 77 90 101 326 26 ¯1 76 85 98 110 352 27 ¯1 84 93 107 120 379 28 ¯1 92 102 117 130 407 29 ¯1 100 111 127 141 436 30 ¯1 109 120 137 152 466⊣⎕fr←⍵}1287)


⍝ === End of variables definition ===

(⎕IO ⎕ML ⎕WX)←1 1 3

 bayes←{
      ⍝⍟ Copyright (C) 2018 by Stephen M. Mansour
      ⍝∇ Calculate posterior probability given prior and conditional probabilities
      ⍝∊
      ⍝⍺ EVENTS: List of events - show joint and posterior in table
      ⍝⍵ PRIOR: Prior Probabilites
      ⍝⍵ COND:  Conditonal Probabilities
      ⍝← POST:  Posterior probabilities or Table
      ⍝⍕ POST ← [0|EVENTS] bayes PRIOR COND
      ⍝⍎ 0.553 0.447 ← bayes (.03 .97)(.8 .02)
      ⍝⍎          'CANCER,NO CANCER' bayes (.03 .97)(.8 .02)
      ⍝⍎  CANCER     0.03 0.8  0.024  0.5529953917
      ⍝⍎  NO CANCER  0.97 0.02 0.0194 0.4470046083
     ⍺←0 ⋄ ⎕ML←3 ⋄ ⎕IO←0                       ⍝ Set system variables
     Posterior←↑(×÷+.×)/⍵                      ⍝ Calculate posterior probability
     ⍺≡0:Posterior                             ⍝ If monadic, done
     Event←toNestedVector ⍺~0J1                ⍝ Parse event list
     Joint←↑×/⍵                                ⍝ Calculate joint probability
     ~0J1∊⍺:Event,⍉⊃⍵,Joint Posterior ⍝ Append Events
     Hdr←'      Prior    Conditional    Joint    Posterior'
     Hdr,[¯0.5]←'      P(A)        P(B|A)      P(A∩B)      P(A|B)'
          ⍝ Hdr,[¯0.5]¨←'-'                         ⍝ Construct header
     D←48⍴5 7/' -'                             ⍝ Include Dashes
     Tbl←'F12.5'⎕FMT{⍵⍪+⌿⍵}⍉⊃⍵,Joint Posterior ⍝ Build table
     Tbl[¯1+≢Tbl;14+⍳14]←'Marginal P(B):'      ⍝ Include Marginal label
     Z←⊃1⌽'Total' ' ' 'Event' ' ',Event        ⍝ Row titles
     Z[2;]←'-'                                 ⍝ Insert Dashes under headers
     Z,←' ',Hdr⍪D⍪Tbl                          ⍝ Append Row, Column Headers
     (¯1↓Z)⍪Z[2;]⍪¯1↑Z                         ⍝ Insert dashes above totals
 }

 clopperPearson←{
     ⍝Calculate confidence interval for small sample sizes
     ⍝∊ Reference: Table A4 p. 523 Conover, Practical Non-Parametric Statistics, 3rd Ed.
     ⍝⍺ confidence level
     ⍝⍵ sampleSize
     ⍝⍵ Events
     ⍝⍵ [populationSize]
     ⍝← confidence interval
     ⍺←0.95 ⋄ n x N←3↑⍵
     str←'binom.test(⍵,⍵,conf.level=⍵)' ⍝ Build R expression
     rAv:(4⊃(##.∆r.x str x n ⍺).Value).Value
     A←0.5×1-⍺
     L←(x,n-x-1)beta critVal>A
     U←(x+1)(n-x)beta critVal<A
     L U
 }

 confMean←{⎕IO←0 ⋄ ⎕ML←3                   ⍝ Conf interval for mean
      ⍝⍺ Confidence Level
      ⍝⍵ Data Representation
     
     C N←⍺
     f1←{C N←⍺ ⋄ ⎕DIV←1                    ⍝ Mean estimate
         rAv:C{str←'t.test(⍵,conf.level=⍵)'⍝ build R expression
             x←{2=⍴⍴⍵:⊃(//↓⍉⌽⍵) ⋄ ⍵}⍵      ⍝ Input to R expression
             u←(↑⍺){+##.∆r.x str ⍵ ⍺}¨⊆x   ⍝ Use R if requested
             ⊃(3⊃u.Value).Value}⍵         ⍝ Obtain value from frame
         m←mean ⍵                          ⍝ Sample Average
         v←var ⍵                           ⍝ Sample Variance
         n←count ⍵                         ⍝ Sample Sizes
         se←sqrt(1-(×N)×n÷N⌈1)×v÷n         ⍝ Standard Error w/optional finite correction factor
         df←n-1                            ⍝ Degrees of freedom
         t←df tDist criticalValue<(1-C)÷2  ⍝ Get 2-tail t critical value
         me←t×se                           ⍝ Margin of Error
         m(-,+)me}                         ⍝ LCL, UCL
     f2←{rAv:⍺{                            ⍝ Difference of means
             str←'t.test(⍵,⍵,conf.level=⍵)'⍝ Build R expression
             x←{2=⍴⍴⍵:⊃(//↓⍉⌽⍵) ⋄ ⍵}¨⍵     ⍝ Convert freq to data
             s←')',⍨¨{'c(',('¯'⎕R'-')(' '⎕R',')⍕⍵}¨x
             u←+##.∆r.x't.test(',(0⊃s),',',(1⊃s),',conf.level=',(⍕⍺),')'
             (3⊃u.Value).Value}⍵           ⍝ Use R if requested
         m←-/mean¨⍵                        ⍝ Get mean(s)
         n←count¨⍵                         ⍝ Get sample size(s)
         v←(variance¨⍵)÷n                  ⍝ Get sampling variance(s)
         se←(+/v)*÷2                       ⍝ Standard Error(s)
         f3←{((+/⍵)*2)÷(⍵*2)+.÷⍺-1}        ⍝ Calculate degrees
         df←n f3 v                         ⍝   of freedom
         t←df tDist criticalValue<(1-⍺)÷2  ⍝ Get 2-tail t critical value
         me←t×se                           ⍝ Margin of Error
         m(-,+)me}                         ⍝ LCL, UCL
     twoSample ⍵:C f2 ⍵ ⋄ ⍺ f1 ⍵}

 confProp←{⎕IO←0 ⋄ ⎕ML←3 ⋄ C N←⍺
     f1←{⍺←C N ⋄ ⎕DIV←1
         n←count ⍵                              ⍝ Sample size
         p←proportion ⍵                         ⍝ Sample proportion
         C=1:0 1 ⋄ C=0:p p                      ⍝ Extreme cases
         (n≤30)∧N=0:C confPropExact×\n p       ⍝ Small sample, use exact method
             ⍝(n≤30)∧N=0:C clopperPearson×\n p       ⍝ Small sample, use exact method
         rAv:C{n x←×\⍵                          ⍝ If R requested
             str←'prop.test(⍵,⍵,conf.level=⍵,correct = TRUE)'
             u←+##.∆r.x str x n ⍺               ⍝ Build R expression
             (5⊃u.Value).Value}n p              ⍝ Obtain values
         v←p-p*2                                ⍝ Variance
         C N n{C N n←⍺                          ⍝ Margin of Error
             v←⍵-⍵*2
               ⍝   N=0:p{z←normal critVal<(1-C)÷2     ⍝ Infinit Pop.; sampling with replacement
     ⍝                 p(-,+)z×(⍵÷n)*÷2}v             ⍝ Get 2-tailed conf int for z
             N=0:C 1 wilsonScoreInt×\n p       ⍝ Use Wilson Score w/cont correction
             A2←0.5×1(-,+)C
             t←(n-1)tDist critVal<↑A2           ⍝ Finite population sampling w/o replacement
             me←t×sqrt(1-n÷N)×v÷n-1             ⍝ Var(P-hat) Thompson, Sampling p. 58
             cb←p(-,+)me                        ⍝ Approximate conf bounds
             X←n×p,1-p                          ⍝ If < 10 1's or 0's
             (¯1∧.=××/cb∘.-0 1)∧X∧.>10:cb       ⍝ Use exact method
     
         }p
     }
     f2←{
         n←count¨⍵
                 ⍝ p←(+/¨⍵)÷n
         p←proportion¨⍵
         rAv:⍺{n x←×\⍵
             str←'prop.test(⍵,⍵,conf.level=⍵,correct = TRUE)'
             u←+##.∆r.x str x n ⍺
             (5⊃u.Value).Value}n p
         v←p-p*2
         se←0.5*⍨+/v÷n
         z←normal criticalValue<(1-⍺)÷2
         me←z×se                 ⍝ Margin of Error
         (-/p)(-,+)me            ⍝ LCL, PointEstimate, U
     }
         ⍝  (2=≢⍵)∧1=⍴⍴⍵:C f2 ⍵ ⋄ ⍺ f1 ⍵}
     twoSample ⍵:C f2 ⍵ ⋄ ⍺ f1 ⍵}

 confPropExact←{
     ⍝Calculate confidence interval for small sample sizes
     ⍝∊ Reference: Table A4 p. 523 Conover, Practical Non-Parametric Statistics, 3rd Ed.
     ⍝⍺ confidence level
     ⍝⍵ sampleSize
     ⍝⍵ Events
     ⍝⍵ [populationSize]
     ⍝← confidence interval
         ⍝ C B←2↑⍺ ⋄ n N←2↑⍵ ⋄ ⎕IO←0
     ⍺←0.95 ⋄ n x←2↑⍵
     str←'binom.test(⍵,⍵,conf.level=⍵)' ⍝ Build R expression
     rAv:(4⊃(##.∆r.x str x n ⍺).Value).Value
     x>n÷2:⌽1-⍺ ∇-\n x                 ⍝ Always use p < 0.5
     A←0.5×1-⍺
     f←{A-n ⍵ binomial prob≤x}
     g←{A-n ⍵ binomial prob≥x}
     p←0.1⌈0.9⌊x÷n
     z←normal critVal<A
     B←0.005⌈0.995⌊p(-,+)z×sqrt p×(1-p)÷n
     U←{(n≥28)∧x=0:f SecAlg BB,f¨BB←0.16 0.18
         f SecAlg ⍵,f¨⍵}B
     L←{x=0:0
         x=1:g SecAlg BB,g¨BB←0.001 0.002
         A≥0.025:g SecAlg ⍵,g¨⍵
         BB←0.05 0.09
         (x=4)∧n∊10 11:g SecAlg BB,g¨BB
         (x=5)∧n between 16 30:g SecAlg BB,g¨BB
         (x=6)∧n=30:g SecAlg BB,g¨BB
         g SecAlg ⍵,g¨⍵}B
     L U
     
 }

 confTotal←{
     ⍝⍺:  Conf Level
     ⍝⍺:  Population Size
     ⍝⍵:  Raw data or summary data
     ⍝←:  Conf Interval for total
     C N←⍺
     M←count ⍵
     TauH←N×mean ⍵      ⍝ Sample total
     varH←N×(N-M)×(var ⍵)÷M
     T←(M-1)tDist criticalValue<0.5×1-C
     TauH(-,+)T×sqrt varH
 }

 confVar←{⎕IO←0 ⋄ ⎕ML←3 ⋄ C N←⍺
     f1←{
         v←var ⍵                         ⍝ Sample Variance
               ⍝   n←↑⍴⍵                           ⍝ Sample Size
         n←count ⍵
              ⍝p←q,1-q
         p←{⍵,1-⍵}(1-⍺)÷2
         df←n-1                          ⍝ Degrees of Freedom
         x2←df chiSquare criticalValue<p ⍝
         df×v÷x2                         ⍝ Confidence Limits
     }
     f2←{v←÷/var¨⍵
         n←count¨⍵                     ⍝ Two sample confInt
         p←{⍵,1-⍵}(1-⍺)÷2              ⍝ Ratio of V1/V2
         df←n-1
         v÷df fDist criticalValue<p}
         ⍝ 2=≢⍵:C f2 ⍵ ⋄ C f1 ⍵
     twoSample ⍵:C f2 ⍵ ⋄ C f1 ⍵
 }

 corrHo←{ ⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝ Ho:⍴=0
      ⍝   Reference: Montogmery pp 48-49
     ⎕ML←3 ⋄ ⎕IO←0
     XX R0←⍵
     tails←2*0=↑⍺ ⋄ R←↑corr/XX            ⍝ Sample correlation
         ⍝ t←(r-R0)÷se←sqrt(1-r*2)÷df←n-2       ⍝ Test statistic
     NS←⎕NS''
     NS.SampleSize←N←≢↑XX                  ⍝ Sample size
     NS.Estimate←↑corr/XX
     NS.DegreesOfFreedom←DF←N-2
     NS.TestStatistic←DF{R R0←⍵ ⋄ 0=R0:R×sqrt ⍺÷1-R*2
         Z←(-/¯7○⍵)×sqrt ⍺-1}R R0
     NS.dist←tDist
     NS.P←tails×DF tDist prob>|NS.TestStatistic             ⍝ p-Value
     NS.ParameterList←⍬
     NS._Parameter←'⍴'
     R0=0:NS
     NS.P←tails×normal prob>|NS.TestStatistic
     NS.dist←normal
     NS}

 goodnessOfFitR←{
      ⍝⍟ Copyright (c) 2018 by Stephen M. Mansour
      ⍝∇ Perform goodness of fit using R
      ⍝∊ goodnessOfFit
      ⍝⍺ [Parameters]:   Distribution parameters (default 0 1)
      ⍝⍺⍺ fD:  Distribution function:  normal, uniform or multinomial
      ⍝⍵  Xv:  Numeric variable
      ⍝⍵  Av:  Categorical variable
      ⍝⍵  W:   2-column (possibly nested) matrix (Frequency Distribution)
      ⍝←  NS:   Namespace containing the following variables
      ⍝         DegreesOfFreedom  Factors  P  Table  TestStatistic  Type
      ⍝⍕ NS ← fD goodnessOfFit Av|Xv|W
      ⍝⍎ normal goodnessOfFit D.Height
      ⍝⍎ uniform goodnessOfFit D.State
     ⎕IO←0 ⋄ ⎕ML←3 ⋄ 0≡↑0⍴⍵:'Nonce Error'                   ⍝ Set system variables
     v←⍺{(1=⍴⍴⍵)∧⍺≡⍬:{str←'XSQ <- chisq.test(table(⍵))' ⍝ Count data
             _←##.∆r.x str(⍵⍳⍵) ⋄ ∪⍵}⍵                  ⍝
         1=⍴⍴⍵:⍺{XX←frequency ⍵                         ⍝ Raw data
             i←⍺[;0]⍳XX[;0]                             ⍝
             X←XX[i;1]                                  ⍝
             P←⍺[;1]                                    ⍝
             str←'XSQ <- chisq.test(⍵,p = ⍵)'           ⍝
             _←##.∆r.x str X(P÷+/P)                     ⍝
             ⍺[;0]}⍵                                    ⍝
         2=↑⌽⍴⍵:{str←'XSQ <- chisq.test(c('             ⍝ Frequency data
             X←⍕1↓,',',⍵[;0],'=',⍵[;,1]                 ⍝  Uniform
             _←##.∆r.x str,X,'))' ⋄ ⍵[;0]}⍵             ⍝   goodness of fit
         3=↑⌽⍴⍵:{str←'XSQ <- chisq.test(⍵,p = ⍵)'       ⍝ 3-columns data
             X P←↓⍉1 1↓⍵                                ⍝  Multinomial
             _←##.∆r.x str X(P÷+/P)                     ⍝   goodness of
             ⍵[;0]~'*'}⍵}⍵                              ⍝   fit
     ns←⎕NS''                                           ⍝ Populate
     ns.TestStatistic←##.∆r.x'XSQ$statistic[[1]]'       ⍝  namespace
     ns.P←##.∆r.x'XSQ$p.value'                          ⍝   with
     ns.DegreesOfFreedom←##.∆r.x'XSQ$parameter[[1]]'    ⍝    assigned
     u←{##.∆r.g'XSQ$',⍵}¨'observed' 'expected' 'residuals' ⍝ properties
     u←{1=≡⍵:⍵.Value ⋄ ⍵}u                              ⍝
     x←{o e r←,¨⍵ ⋄ o,e,(o-e),[0.5]r*2}u                ⍝ Calc table
     ns.Table←{⍵⍪(-1⊃⍴⍵)↑' ' 'Total',{⍵×1E¯10<|⍵}+⌿¯4↑[1]⍵}v,x
     ns.Type←'GoodnessOfFit'                            ⍝
     ns.Factors←'CAT1' 'CAT2'                           ⍝ Dummyvalues
     ns._Distribution←(1=≢∪x[;1])⊃'Multinomial' 'Uniform'
     ns}

 ind←{⍺←⍬ ⋄ ⍺ independent ⍵}

 independent←{
      ⍝⍟ Copyright (C) 2018 by Stephen M. Mansour
      ⍝∇ Test of Independence
      ⍝∊
      ⍝⍺ VARIABLE1: Categorical variable
      ⍝⍵ VARIABLE2: Categorical variable
      ⍝⍵ DataBase:  NameSpace
      ⍝  VarNames:  Simple variable
      ⍝⍵ ContingencyTable
      ⍝← NAMESPACE containing the following variables
      ⍝  DegreesOfFreedom  Factors  P  Table  TestStatistic  Type
      ⍝⍕ [Variable1] independent Variable2|ContingencyTable|NameSpace VarNames
      ⍝⍎ D.Sex independent D.Party
      ⍝⍎ independent D 'Sex Party'
      ⍝⍎ independent frequency D.Sex D.Party
     ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⍺←⍬ ⋄ ⎕DIV←1      ⍝ Set system variables
     ⍵≡0:¯1                            ⍝ Exit if improper arg
     d nms←⍺{nms←'Factor1' 'Factor2'   ⍝ Separate data from names
         ⍺≢⍬:(frequency ⍺ ⍵)nms        ⍝ If left arg, build contingency table
         ns←↑⍵                         ⍝ If first item
         b←9≠⎕NC'ns'                   ⍝  is namespace,
         b∧(,2)≡⍴⍵:(frequency ⍵)nms    ⍝  build contingency table
         b:⍵ nms                       ⍝ Else
         nms←' 'toNestedVector 1⊃⍵     ⍝   Partition namelist
         tbl←frequency⍎⍕'ns.(,'nms,')' ⍝   Build contingency table
         tbl nms                       ⍝ Endeif
         hdr←' 'toNestedVector nms     ⍝   Orphan code?
         tbl←hdr⍪⍉⊃⍎⍕'ns.(,'nms,')'    ⍝
         tbl nms}⍵                     ⍝
     rAv:nms independentR d            ⍝ If requested call R
     obs←1 1↓d                         ⍝ Remove header from observed counts
     exp←(+/∘.×+⌿÷+/∘,)obs             ⍝ Calculate expected values with a train!
     r←obs-exp                         ⍝ Get differences
     v←⊃,↑∘.{⍺ ⍵}/1↓¨(d[;0])(d[0;])    ⍝
     X2←+/,(2*⍨obs-exp)÷exp            ⍝ Chi-Square test Statistic
     df←1×.-⍴obs                       ⍝ Degrees of Freedom
     e2←(r*2)÷exp                      ⍝ Squared errors
     tbl←v,,[⍳2]obs,exp,r,[1.5]e2      ⍝ Build explanatory table
     tbl⍪←(-1⊃⍴tbl)↑' ' 'Total',{⍵×⎕CT<|⍵}+⌿¯4↑[1]tbl
     tbl←(nms,'Observed' 'Expected' 'Difference' 'ChiSquare')⍪tbl
     p←df chiSquare prob>X2            ⍝ p-Value
     ns←⎕NS''                          ⍝ Create namespace for output
     ns.(TestStatistic P)←X2 p         ⍝ Insert variables
     ns.DegreesOfFreedom←df            ⍝    "      "
     ns.Table←tbl                      ⍝    "      "
     ns.Type←'Independent'             ⍝    "      "
     ns._Distribution←'Independent'    ⍝    "      "
     ns.Factors←nms                    ⍝    "      "
     ns                                ⍝    "      "
 }

 independentR←{
     ⎕IO←0 ⋄ ⎕ML←3
     fn←{0=↑0⍴⍵:⍕⍵ ⋄ q,⍵,q←'"'}
     rn cn←{∊{⍺,','⍵}/fn¨⍵}¨1↓¨(⍵[;0])(⍵[0;])
     rows←↓1 1↓⍵
     str←'M <- as.table(rbind(',1↓∊(≢rows)⍴⊂',⍵'
     _←+##.∆r.x(⊂str,'))'),rows
     str←'dimnames(M) <- list(',(0⊃⍺),' = c(',rn,')'
     str,←',',(1⊃⍺),' = c(',cn,'))'
     _←+##.∆r.x str
     _←+##.∆r.x'XSQ <- chisq.test(M)'
     ns←⎕NS''
     ns.TestStatistic←##.∆r.x'XSQ$statistic[[1]]'
     ns.P←##.∆r.x'XSQ$p.value'
     ns.DegreesOfFreedom←##.∆r.x'XSQ$parameter[[1]]'
     u←{##.∆r.g'XSQ$',⍵}¨'observed' 'expected' 'residuals'
     x←{o e r←,¨⍵ ⋄ o,e,(o-e),[0.5]r*2}u.Value
     v←⊃,↑∘.{⍺ ⍵}/1↓¨(⍵[;0])(⍵[0;])
     ns.Table←{⍵⍪(-1⊃⍴⍵)↑' ' 'Total',{⍵×1E¯10<|⍵}+⌿¯4↑[1]⍵}v,x
     ns.Factors←⍺
     ns.Type←'Independent'
     ns._Distribution←'Independent'
     ns
 }

 interceptHo←{ ⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝ Ho:⍴=0
     ⎕ML←3 ⋄ ⎕IO←0
     x1 Beta0←⍵
     Y X←x1
     tails←2*0=↑⍺ ⋄
     n←≢Y                                 ⍝ Sample size
     df←n-2
     B←Y⌹1,⍪X                     ⍝ Sample slope/intercept
     YHAT←(⍪X)⊥⌽B
     MSE←df÷⍨(Y-YHAT)+.*2
     SXX←(X+.*2)-(×⍨+/X)÷≢X          ⍝ Calculate Sxx
     SE←sqrt MSE×+/÷n,SXX÷(mean X)*2 ⍝ Standard error
     t←(B[0]-Beta0)÷SE               ⍝ Test statistic
     p←tails×df tDist prob>|t        ⍝ p-Value
     ns←⎕NS''
     ns.Estimate←B[0]
     ns.SampleSize←n
     ns.StandardError←SE                  ⍝ Standard Error
     ns.TestStatistic←t
     ns.P←p                          ⍝ p-Value
     ns.DegreesOfFreedom←df
     ns.ParameterList←⍬
     ns._Parameter←'β₀'
     ns.dist←tDist
     ns
     
     
     
     
 }

 isconnected←{0::0 ⋄ 1⊣⍵.x'2+2'}

 meanHo←{       ⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝ One or two-sample t-test
     ⎕IO←0 ⋄ ⎕ML←3
     r s←⍺ ⋄ tails←2*r=0 ⋄ x←⍵            ⍝
     x_←{s:mean¨⍵ ⋄ (mean↑⍵),1↓⍵}x        ⍝ Mean(s)
     n←(-~s)↓{9=⎕NC'⍵':⍵.count ⋄ ↑⍴⍵}¨x   ⍝ Sample Size(s)
     v←{s:var¨⍵ ⋄ var↑⍵}x                 ⍝ Sample Standard Deviation(s)
     se←(v+.÷n)*÷2                        ⍝ Standard Error
     g←{((+/⍵)*2)÷(⍵*2)+.÷⍺-1}            ⍝ Calculate Degrees
     df←g{⍵:⌊n ⍺⍺ v÷n ⋄ ↑n-1}s            ⍝ Degrees of Freedom
     t←(-/x_)÷se                          ⍝ Test Statistic
     t←r{⍺=0:|⍵ ⋄ ⍵×⍺}t                   ⍝ Test Statistic
     p←df tDist probability>t             ⍝ p-Value
         ⍝ dd←(1 s∘/¨x_ n(v*0.5)),se t(p×tails)df
     ns←⎕NS''
     ns.(Estimate SampleSize StandardDeviation)←1 s∘/¨x_ n(v*0.5)
     ns.StandardError←se                  ⍝ Standard Error
     ns.TestStatistic←t
     ns.P←p×tails                         ⍝ p-Value
     ns.DegreesOfFreedom←df
     ns.ParameterList←df
     ns._Parameter←'µ'
     ns.dist←tDist
     ns
 }

 medianHo←{       ⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝
      ⍝
     ⎕IO←0 ⋄ ⎕ML←3
     r s←⍺
     1<↑⌽N←≢¨⍵:Z←↑r mannWhitney/(>/N)⌽⍵
     r s←⍺ ⋄ tails←2*r=0 ⋄ D←↑-/⍵         ⍝ Get differences
     FR←+/¯1 1∘.=×D                       ⍝ Frequency of + and -
     X N←(⌊/,+/)FR                        ⍝ Less frequent sign occurances
     Z←N{⍺>25:|(1-⍺-2×⍵)÷⍺*÷2 ⋄ ⍵}X       ⍝ Test Statistic
        ⍝  CV←N 0.5 binomial criticalValue>⍺   ⍝ Critical Value
     P←r{N X Z←⍵ ⋄ Tails←2*r=0
         N>25:Tails×normal prob>Z
         P←Tails×N 0.5 binomial prob<X
         P←P+N 0.5 binomial prob=X}N X Z
     
     se←0.5×N*÷2                          ⍝ Standard Error?
        ⍝  (1 s∘/¨(median,count)↑⍵),⍬ se Z P ⍬}
     ns←⎕NS''
     ns.(Estimate SampleSize)←1 s∘/¨(median,count)↑⍵
         ⍝ns.(Estimate SampleSize)←1 s∘/¨(median↑⍵)N
     ns.StandardError←se
     ns.TestStatistic←(N>25)⊃X Z
     ns.P←P
     ns.DegreesOfFreedom←(N≤25)/N
     ns.StandardDeviation←⍬
     ns._Parameter←'η'
     ns
 }

 monadicHypothesis←{
       ⍝∇ Special case hypotheses
       ⍝∇ Equal variances and/or three or more groups
     ⎕IO←0 ⋄ ⎕ML←3
     r c i←⍺ ⋄ s←≢⍵                           ⍝ Number of groups
          ⍝w←{2=⍴⍴⍵:⍵[;0]stats¨↓0 1↓⍵ ⋄ ⍵}⍵            ⍝ Group
     n w←{2=⍴⍴⍵:↓⍉⍵ ⋄ ('G',¨1↓⎕D↑⍨1+≢⍵)⍵}⍵              ⍝ Group names, values
     
     meanHoEqV←{⍝Two-sample t-test; equal variances
            ⍝  r←⍺ ⋄ tails←2*r=0 ⋄ x←⍵              ⍝
         r←⍺ ⋄ tails←2*r∊3 4 ⋄ x←⍵
         m←mean¨x
         n←{9=⎕NC'⍵':⍵.count ⋄ ≢⍵}¨x
         v←var¨x
         pv←(v+.×n-1)÷df←+/n,-2                 ⍝ Pooled variance
         se←sqrt pv+.÷n                         ⍝ Standard Error
         t←m-.÷se                               ⍝ Test Statistic
             ⍝t←r{⍺=0:|⍵ ⋄ ⍵×⍺}t                     ⍝ Test Statistic
         t←|⍣(r=0)⊢t
         p←df tDist probability>t               ⍝ p-Value
         m n(v*0.5),se t(p×tails)df}
     f←{(r s)←⍺
         (i=3)∧s=2:r meanHoEqV ⍵                ⍝ Determine
         (i=3)∧s>2:anova ⍵                      ⍝   type
         (i=¯1)∧s=2:independent/⍵
         (i=¯1)∧s>2:uniform goodnessOfFit ⍵     ⍝     of
         (i=2)∧s=2:↑r mannWhitney/(>/count¨⍵)⌽⍵ ⍝      test
         (i=2)∧s>2:kruskalWallis ⍵
     
             ⍝ 'Nonce Error' ⎕signal 0               ⍝       of
         i=0:⍺ corrHo ⍵}                        ⍝         test
     dd←r s f w                                 ⍝ Apply the test
     9=⎕NC'dd':dd
     ns←⎕NS''                                 ⍝ Create namespace for output
     ns.(Estimate SampleSize StandardDeviation StandardError)←4↑dd
     ns.(TestStatistic P DegreesOfFreedom)←4↓dd ⍝ Put variables into namespace
     ns._Populations←s+1                      ⍝ One or two populations?
         ⍝ns._Relation←(¯1 0 1⍳r)⊃'<≠>'            ⍝ One or two tails?
     ns._Relation←r⊃'*><≠≠<>'
     ns._Parameter←(¯1 0 2 3 7⍳i)⊃'p⍴ηµ',⌽,\'σ²'  ⍝ Parameter of interest
     ns._Claim←c                              ⍝ Claim is H1 if <, > or ≠, otherwise H0
     ns._HypothesizedValue←0                  ⍝ Assumed value
     ns._Paired←0
     ns.Type←'Hypothesis' ⋄ ns}

 pointBiserial←{
     ⍝⍵  Boolean Matrix:  Students x Questions
     ⎕IO←0 ⋄ ⎕ML←3
     M←⍉(+/⍵)-⍤0 1⊢⍵ ⋄ B←⍉⍵
     f←{((≢⍵)×⍺+.×⍵)-⍺×⍥(+/)⍵}⍤1
         ⍝ f←{((≢⍵)×⍺+.×⍵)-×/+/⊃⍺ ⍵}⍤1
     (M f B)÷(M×⍥(f⍨)B)*÷2
 }

 proportionHo←{  ⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝ One or two sample Z-test
     ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⎕DIV←1
     r s←⍺ ⋄ tails←2*r=0
     b←(-~s)↓b1 b2←⍵                      ⍝
     p0←s⊃b2 0                            ⍝ Hypothesized Value
     x←{9=⎕NC'⍵':⍵.events ⋄ +/⍵}¨b        ⍝ Number of successes
     n←{9=⎕NC'⍵':⍵.count ⋄ ↑⍴⍵}¨b         ⍝ Sample Sizes
     f←{~s:(p0×1-p0)÷↑⍵
         (+/÷⍵)×{⍵×1-⍵}÷/+/⊃⍺ ⍵}
     se←(x f n)*÷2                        ⍝ Standard Error
     z←(p0-⍨x-.÷n)÷se                     ⍝ Get Z-statistic
     z←r{⍺=0:|⍵ ⋄ ⍺×⍵}z                   ⍝ Test Statistic
     p←normal probability<z
         ⍝ (1 s∘/¨(x÷n)n),⍬ se z(tails×1-p)⍬}
     ns←⎕NS''
     ns.(Estimate SampleSize)←(1 s∘/¨(x÷n)n)
     ns.StandardError←se                  ⍝ Standard Error
     ns.TestStatistic←z
     ns.P←tails×1-p                       ⍝ p-Value
     ns.DegreesOfFreedom←⍬
     ns.ParameterList←0 1
     ns.dist←normal
     ns._Parameter←'p'
     ns
 }

∇ rel
∇

 slopeHo←{ ⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝ Ho:⍴=0
     ⎕ML←3 ⋄ ⎕IO←0
     x1 Beta1←⍵
     Y X←x1
     tails←2*0=↑⍺ ⋄
     n←≢Y                                 ⍝ Sample size
     df←n-2
     B←Y⌹1,⍪X                     ⍝ Sample slope/intercept
     YHAT←(⍪X)⊥⌽B
     se←sqrt df÷⍨(Y-YHAT)+.*2
     SE←se÷(sdev X)×sqrt n-1
     t←(B[1]-Beta1)÷SE            ⍝ Test statistic
     p←tails×df tDist prob>|t     ⍝ p-Value
     ns←⎕NS''
     ns.Estimate←B[1]
     ns.SampleSize←n
     ns.StandardError←SE                  ⍝ Standard Error
     ns.TestStatistic←t
     ns.P←p                          ⍝ p-Value
     ns.DegreesOfFreedom←df
     ns.ParameterList←⍬
     ns._Parameter←'β₁'
     ns.dist←tDist
     ns
     
     
     
     
 }

 ugoodnessOfFitR←{
     v←⍺{(1=⍴⍴⍵)∧⍺≡⍬:{str←'XSQ <- chisq.test(table(⍵))'  ⍝ Raw data
             _←##.∆r.x str(⍵⍳⍵) ⋄ ∪⍵}⍵
         1=⍴⍴⍵:⍺{XX←frequency ⍵
             i←⍺[;0]⍳XX[;0]
             X←XX[i;1]
             P←⍺[;1]
             str←'XSQ <- chisq.test(⍵,p = ⍵)'
             _←##.∆r.x str X(P÷+/P)
             ⍺[;0]}⍵
         2=↑⌽⍴⍵:{str←'XSQ <- chisq.test(c('             ⍝ Frequency data
             X←⍕1↓,',',⍵[;0],'=',⍵[;,1]
             _←##.∆r.x str,X,'))' ⋄ ⍵[;0]}⍵
         3=↑⌽⍴⍵:{str←'XSQ <- chisq.test(⍵,p = ⍵)'
             X P←↓⍉1 1↓⍵
             _←##.∆r.x str X(P÷+/P)
             ⍵[;0]~'*'}⍵}⍵
     ns←⎕NS''
     ns.TestStatistic←##.∆r.x'XSQ$statistic[[1]]'
     ns.pValue←##.∆r.x'XSQ$p.value'
     ns.DegreesOfFreedom←##.∆r.x'XSQ$parameter[[1]]'
     u←{##.∆r.g'XSQ$',⍵}¨'observed' 'expected' 'residuals'
     u←{1=≡⍵:⍵.Value ⋄ ⍵}u
     x←{o e r←,¨⍵ ⋄ o,e,(o-e),[0.5]r*2}u
     ns.Table←{⍵⍪(-1⊃⍴⍵)↑' ' 'Total',{⍵×1E¯10<|⍵}+⌿¯4↑[1]⍵}v,x
     ns.Type←'GoodnessOfFit'
     ns.Factors←'CAT1' 'CAT2' ⍝ Dummyvalues
     ns._Distribution←(1=≢∪x[;1])⊃'Multinomial' 'Uniform'
     ns}

 varianceHo←{   ⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝⍝ Chi-Square or F-test
     ⎕IO←0 ⋄ ⎕ML←3
     r s←⍺ ⋄ tails←2*r=0 ⋄ d1 d2←⍵        ⍝
     d←(-~s)↓d1 d2←⍵                      ⍝
     v←var¨d                              ⍝ Sample Variance
     n←{9=⎕NC'⍵':⍵.count ⋄ ↑⍴⍵}¨d         ⍝ Sample Sizes
     df←n-1                               ⍝ Degrees of Freedom
     TestX2←{x2←v×⍵÷d2                    ⍝ Test Statistic
         p←↑⍵ chiSquare probability>¨x2   ⍝
         x2 p}                            ⍝ Chi-Square for 1 sample
     TestF←{ts←÷/v                        ⍝ Test Statistic
         p←⍵ fDist probability>ts         ⍝ p-Value
         ts p}                            ⍝ F-test for 2 samples
     dd←{1=⍴⍵:TestX2 ⍵ ⋄ TestF ⍵}df
     ns←⎕NS''
     ns.TestStatistic←↑dd
     ns.P←tails{⍺=1:⍵ ⋄ 2×⌊/⍵,1-⍵}1⊃dd
     ns.Estimate←v
     ns.SampleSize←n
     ns.StandardDeviation←⍬
     ns.StandardError←⍬
     ns.DegreesOfFreedom←df
     ns.ParameterList←df
     ns.dist←chiSquare
     ns._Parameter←'σ²'
     1=≢df:ns
     ns.dist←fDist
     ns
 }

 wilsonScoreInt←{
      ⍝∊ Written by Steve Mansour
      ⍝∇ Wilson Score Interval
      ⍝⍺  Normal
      ⍝⍺  Continuity Correction
      ⍝⍵  N       I    Sample Size
      ⍝⍵  Events
      ⍝←  critical Value
     ⍺←0.95 1 ⋄ ⎕ML←1 ⋄ CL CC←2↑⍺,1 ⋄ n x←⍵ ⍝ Default conf is 95%; cont correction=1
     str←'prop.test(⍵,⍵,conf.level=⍵,correct=⍵)'      ⍝ Build R expression
     rAv:(6⊃(##.∆r.x str x n CL CC).Value).Value
     p←x÷n                                  ⍝ Sample Proportion
     z←normal criticalValue<0.5×1-CL        ⍝ CriticalValue
     z2←z*2                                 ⍝ z-squared
     CC:{n p←⍵
         A←z2+2×n×p                         ⍝ Continuity correction
         B←1+z×sqrt(z2-÷n)+(4×n×p-p*2)(-,+)(2-4×p)
         C←2×n+z2
         0⌈1⌊C÷⍨A+B×¯1 1}n p
     x←÷/p 1+z2÷n×2 1                       ⍝ Point Estimate
     me←(z÷1+z2÷n)×sqrt(z2÷4×n*2)+(p×1-p)÷n ⍝ Margin of error
     0⌈x(-,+)me                             ⍝ Confidence Interval
 }

 _discreteprob←{
⍝⍟   Copyright 2020 by Stephen M. Mansour
⍝∇   Calculate discrete probabilities from raw data or a table
⍝⍺⍺  Vector (raw data) or 2-column matrix (relative) frequency distribution
⍝⍵⍵  Relational function, e.g,  >  =
⍝⍵   Value
⍝⍕   P←Xv|FD _discreteprob fR Y
⍝⍎   0.5←D.Family _discreteProb > 2
⍝⍎   0.5←(5 2⍴0 2 1 17 2 11 3 7 4 1) _discreteProb > 2
     ⎕IO←0 ⋄ ⎕ML←3
     1=⍴⍴⍺⍺:(≢(⍺⍺ ⍵⍵ ⍵)/⍺⍺)÷≢⍺⍺        ⍝ Raw data
     B←⍺⍺[;0]⍵⍵ ⍵                      ⍝ Select from frequency dist
     (B+.×⍺⍺[;1])÷+/⍺⍺[;1]             ⍝ Calculate
 }


 _indprob←{
⍝⍟   Copyright 2020 by Stephen M. Mansour
⍝∇   Calculate independent probabilies using logic rules
⍝⍺   First probability
⍝⍺⍺  f Logical function: e.g. (^ and) (∨ or)
⍝⍵   Second probability
⍝⍕   P←.5 f _indprob .3
⍝⍎   P←.2 (∧ _indprob) .5        ⍝ Marginal prob
     ⎕IO←0 ⋄ ⎕ML←3 ⋄ ⍺←0 1       ⍝ Set defaults
     2=⎕NC'⍺⍺':⍺⍺                ⍝ If zero or 1
     ⍺≡0 1:(⍺⍺ 1 0)+.×⍵,1-⍵      ⍝ Monadic probability
     T←⍺(1-⍺)∘.×⍵,1-⍵            ⍝ Contingency table
     T←'*' 'A1' 'A2','B1' 'B2'⍪T ⍝ Add pseudo "labels"
     'A1'(⍺⍺ _prob T)'B1'        ⍝ Get probability
 }


 _prob←{
⍝⍟   Copyright 2020 by Stephen M. Mansour
⍝∇   Calculate probabilies using logic rules
⍝⍺   A Variable 1 Name (Optional)
⍝⍺⍺  f Logical function:  (~ not) (^ and) (∨ or) (| conditional)
⍝⍵⍵  T Contingency or probability table, vector or 2-vector of vectors
⍝⍵   B Variable 2 Name
⍝⍕   P←'A' (f prob T) 'B'
⍝⍎   Table←frequency D.Sex D.Party        ⍝ Contingency table
⍝⍎   P←(⊢ prob Table)'Female'             ⍝ Marginal prob
⍝⍎   P←(~ prob Table)'Republican'         ⍝ Complement rule
⍝⍎   P←'Male' (∧ prob Table)'Republican'  ⍝ Multipication rule
⍝⍎   P←'Female (∨ prob Table)'Democrat'   ⍝ Addition rule
⍝⍎   P←'Republican' (| prob Table)'Male'  ⍝ Conditional rule
     ⎕IO←0 ⋄ ⎕ML←3 ⋄ ⎕DIV←1 ⋄ ⍺←0 1       ⍝ Set defaults
     2∊≡¨⍺ ⍵:⍺ ∇¨⍥⊆⍵                      ⍝ Scalar extension
     1=⍴⍴⍵⍵:⍺(⍺⍺ ∇∇(frequency ⍵⍵))⍵       ⍝ Create table from data
     '*'≠↑↑⍵⍵:⍺(⍺⍺ ∇∇('*' '⍙⍙'⍪⍵⍵))⍵      ⍝ If freq dist make contingency
     2=⎕NC'⍺⍺':⍺⍺                         ⍝ If 0 or 1, make result
     R←,¨1↓⍵⍵[;0] ⋄ C←,¨1↓⍵⍵[0;]          ⍝ Row, column names
     P←{⍵÷+/,⍵}1 1↓⍵⍵                     ⍝ Probabilities
     B←R C∊¨⊂,¨⍺ ⍵ ⋄ i←(⊂,⍵)∊R            ⍝ Row, column categories
     G←+/¨B                               ⍝ Categories in same group?
     OP←0 1 0 1 ⍺⍺ 0 0 1 1                ⍝ Test operand
     (0=≢OP)∧⍺≡0 1:⍵(~⊢)∇∇ ⍵⍵⊢⍵           ⍝ monadic ~
     0 1≡⍺:∇⍨⍵                            ⍝ Supply left arg if missing
     Cond←⍺⍺{2≠2⊥⍵:0 ⋄ 2 ⍺⍺ 1}OP          ⍝ Conditional probability?
     Cond∧G≡1 1:⍺{U←∨/¨B1←R C∊¨⊂⊂,⍵       ⍝ Where is right arg?
         X←,B1{(⍵⊃⍺)/[⍵]P}1⊃U             ⍝
         X{(⍺+.×⍵)÷+/⍺}(U[0]⊃R C)∊⊂,⍺}⍵   ⍝ p(A|B) Conditional prob
     Alt←⍺⍺{0 5∊⍨2⊥⍵:+/1 2 ⍺⍺ 2 3 ⋄ 3}OP  ⍝ Check for alternate symbols
     Alt=2:⍺∧∇∇ ⍵⍵⊢⍵                      ⍝ Substitute ∧ for ∩
     Alt=1:⍺>∇∇ ⍵⍵⊢⍵                      ⍝ Substitute > for dyadic ~
     Alt=6:⍺∨∇∇ ⍵⍵⊢⍵                      ⍝ Substitute ∨ for ∪
     (1∊G)∧⍺≢⍵:(⍉⍣i⊢P)+.×⍥,↑∘.⍺⍺/⌽⍣i⊢B    ⍝ Subset
     G≡0 0:⍺ ⍺⍺ ∇∇(⍵⍵⍪(⊂⍵),0⍴⍨⍴C)⊢⍵       ⍝ Add if right arg not in table
     M←1⌽(⌽∘⍳∘≢⌽⊢,(0 ¯1+≢)⍴0∘⍴)+/[i]P     ⍝ Put marginal probs into diagonal matrix
     L←↑(×G)/R C                          ⍝ Labels
     T←('*',L,¨'∆')⍪L,M                   ⍝ Build Table - Make col names unique
     ⍺ ⍺⍺ ∇∇ T⊢⍵,'∆'                      ⍝ Recalculate
 }


 across←{
 ⍝⍟ Copyright (C) 2024 by Stephen M. Mansour
 ⍝∇ Apply a summary function to the rows of a matrix
 ⍝∊ Operator
 ⍝⍺ [X]      X Optional left argument to summary function
 ⍝⍺⍺ fS fS
 ⍝⍵ Xm Xm  numeric matrix
 ⍝← Zv Zv  Vector whose length is number of rows in right argument
 ⍝⍕ Zv ← [N] fS across Xm
 ⍝⍎ 6 15 ← sum across  2 3 matrix 1 2 3 4 5 6
 ⍝⍎ 2 5 ← mean across  2 3 matrix 1 2 3 4 5 6
 ⍝⍎ 1 1 ← sdev across  2 3 matrix 1 2 3 4 5 6
 ⍝⍎ 4.5 9.5 ← 3 quartile across 2 5 matrix 1 to 10
     0=⎕NC'⍺':⍺⍺⍤1⊢⍵  ⍝ Apply function across
     ⍺ ⍺⍺⍤1⊢⍵}


 bootstrap←{
 ⍝⍟ Copyright (C) 2018 by Stephen M. Mansour
 ⍝∇ Bootstrap resampling
 ⍝∊ Operators
 ⍝⍺ N: Number of times to sample with replacement
 ⍝⍺⍺ fS: Summary Function
 ⍝⍵ Xv:  Sample data
 ⍝⍵ Xm:  2-column matrix: [midpoints|frequencies]
 ⍝⍵ NS: Namespace containing count, mean, sdev
 ⍝← Vector of Sample Statistics
 ⍝⍕ Sample ← N fS bootstrap Xv|Xm|NS
 ⍝⍎ 100 mean bootstrap normal randomVariable 10
 ⍝⍎ 100 median bootstrap 1 exponential randomVariable 25
 ⍝⍎ 67.581 69.419 ← mean confInt stats 49 68.5 3.2
     ⍺←1000         ⍝ Default =  1000 iterations
     n←≢⍵           ⍝ Original Sample Size
     ⍺⍺⍤1⊢⍵[?⍺ n⍴n] ⍝ Resample with replacement; apply summary fn
 }


 confInt←{
 ⍝⍟ Copyright (C) 2018, 2023 by Stephen M. Mansour
 ⍝∇ Calculate a confidence interval for a parameter or response variable
 ⍝∊ Operator
 ⍝⍺ [Level←0.95] Ps Confidence level - default .95
 ⍝⍺ [N←0]  N   Population Size (required for sum); infinite if not specified
 ⍝⍺⍺ fS  fS  Summary Function
 ⍝⍺⍺ fM  fm  Multivariate function; typically linear:  f(X) =  b0 + b1*x1 + ... + bn*xn
 ⍝⍵  Sample1   Xv Sample Data
 ⍝⍵  [Sample2] Xv Second Sample Data
 ⍝⍵_  [Groups]  Cv Grouping Data
 ⍝⍵  FreqDist Xm  2-column matrix: [midpoints|frequencies]
 ⍝⍵  SummaryDat W  Summary Data including count, mean, sdev
 ⍝← Interval:  Xv Lower and Upper Bounds
 ⍝⍕ Interval ← [.95|Level][N] fS|fM confInt Sample1 [Sample2|Groups]|FreqDist|SummaryData
 ⍝⍎ mean confInt Height          ⍝ 95% Confidence Interval for Mean Height
 ⍝⍎ 0.77344 80715←0.9 proportion confInt Sex eq 'M' ⍝ 90% Confidence interval for Male Proportion
 ⍝⍎ 11.81 24.496←0.99 var confInt Height       ⍝ 99% Confidence interval Height variance
 ⍝⍎ mean confInt Height splitBy Sex            ⍝ Difference between Male and Female
 ⍝⍎ 67.581 69.419 ← mean confInt stats 49 68.5 3.2 ⍝ Summary Data
 ⍝⍎ MODEL←Height regress ShoeSize ⍝ Regression Model
 ⍝⍎ 68.378 69.274←MODEL.f confInt 10           ⍝ Estimate Height Confidence Interval for size 10 Shoe
     ⍺←0.95 ⋄ ⎕ML←3 ⋄ ⎕IO←0                    ⍝ Default conf level is 95%
     'Linear'≡⍺⍺'Type':⍺ 1 ⍺⍺ ⍵                ⍝ If regression, conf int for y
     'Association'≡⍺⍺'Type':⍺ ⍺⍺{⍺≠⍥≢↑⊆⍵:⍺ ⍺⍺ ⍵  ⍝ If bivariate function
         0.95 ⍺⍺ ⍺ ⍵}⍵                         ⍝    check left argument
     x←⊆⍣(V←2>↑⍴⍴⍵)⊢⍵                          ⍝ Enclose if simple numeric vector
     V∧2<≢x:'No more than two right arguments permitted'
     x←{(9∊⎕NC¨'⍵')∨0 1≢11=⎕DR¨⍵:⍵             ⍝ If 2nd arg boolean
         ↑{(⍵/⍺)((~⍵)/⍺)}/⍵}x                  ⍝ Partition into two groups
     C←80 160 320                              ⍝ Character codes
     x←{↑⊂groupBy/⍵}⍣(C∊⍨⎕DR⊃↑⌽⍵)⊢x            ⍝ Group the data?
     A←⍺~0J1
     CL←{0=≢⍵:0.95 ⋄ ↑⍣(1=≢⍵)⊢⍵}(645=⎕DR¨A)/A  ⍝ Find CL where 0<CL<1
     M←{0=≢⍵:0 ⋄ W←↑⍣(1=≢⍵)⊢⍵                  ⍝ Disclose singletons
         toNestedVector W}A~CL,⊂CL              ⍝ Find population size or method(s)
     NL x←{~C∊⍨⎕DR↑⍵:' '⍵                      ⍝ If grouped data,
         w←(1<≢¨0⌷[1]⌽⍵)⌿⍵                     ⍝ Eliminate 1-item groups
         2=↑⍴⍉w:↓⍉w                            ⍝ One set of groups
         NL x←↓⍉⊃¨w⊂⍨2-C∊⍨⎕DR¨w[0;]
         (toDelimitedList¨NL)(,¨x)}x           ⍝ Multiple sets of groups
     0J1∊⍺:(CL M NL)⍺⍺ formatConfInt x         ⍝ Format using "show" operator
     M←↑¨⎕C M
    ⍝(twoSample x)∧1=≢NL:⊃(CL,¨M)⍺⍺¨⊂x         ⍝ Two-sample case
     (twoSample x)∧1=≢NL:⊃(CL∘.,M)⍺⍺¨⊂x       ⍝ Two-sample case
     K←≢¨CL M x                                ⍝
     K∧.=1:(CL,M)⍺⍺↑x                          ⍝ Simple case - 2-vector result
    ⍝ ⊃(CL,¨M)∘.⍺⍺ x
     2=⍴⍴x:⊃(CL∘.,M)∘.⍺⍺⊂x                     ⍝ Frequency distribution
     ⊃(CL∘.,M)∘.⍺⍺ x                           ⍝ Raw data or namespace
 }


 critVal←{⍺←0 1 ⋄ ⍺(⍺⍺ criticalValue ⍵⍵)⍵}


 criticalValue←{
 ⍝⍟ Copyright (c) 2018, 2024 by Stephen M. Mansour
 ⍝∇ Boundary for a specified proportion of values; Inverse of Probability Operator
 ⍝∊ Operator
 ⍝⍺ [Parms]        Xv   Distribution parameters (default 0 1)
 ⍝⍺⍺ fD            fD
 ⍝⍵⍵ fR            fR
 ⍝⍵  Prob          P   Probability
 ⍝←  Value         X
 ⍝⍕ X ←  [Parms|_0_1] fD criticalValue fR P
 ⍝⍎ 1.645 ← normal criticalValue > .95           ⍝ Lower tail standard normal (greater than 95% of all values)
 ⍝⍎ 15.086 ← 5 chiSquare criticalValue < 0.01    ⍝ Upper tail chi-Square (less than 1% of all values)
 ⍝⍎ ¯2.1788 2.1788 ← 12 tDist criticalValue outside 0.95 ⍝ 2-tail Student t
 ⍝⍎  9.0135 ← 5 3 fDist critVal < 0.05           ⍝ F Critical value less than 5% of all values
     ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⍺←0 1                       ⍝ Default parameters (0,1)
     1<|≡⍵:⍺∘∇ pervasive ⍵                       ⍝ Allow nested input
     ~'P'check ⍵:⎕SIGNAL 11                      ⍝ Must be probability
     ⍝ ~'Q'check ⍵:⎕SIGNAL 11                      ⍝ Must be probability
     a←⍺'q'
     b←0 1 ⍵⍵ 1 0                                ⍝ Test relation
     b←b∨1≡2 ⍵⍵ 1 3                              ⍝ If between, same as ≠
     p←b{≠/⍺:|⍵-↑⍺ ⋄ w←⍪⍣(×↑⍴⍴⍵)⊢⍵
         q⌽(⊢,1∘-)2÷⍨w+q←~↑⍺}⍵                   ⍝ Find probability
     2=⎕NC'⍺⍺':⍺⍺{                               ⍝ If numeric left operand
         v q←↓⍉frequency ⍺⍺                      ⍝
⍝         v[+/⍵∘.≥+\q÷+/q]}p                     ⍝
         b←⍵∘.⍵⍵⍨+\q÷+/q                         ⍝
         v[+/b=0 ⍵⍵ 1]}⍵⍵ p                      ⍝ Flip if < or ≤
     d←('Discrete'≡⍺⍺'Type')∧0 1=.⍵⍵ 1 1         ⍝ If discrete and > or ≤
     p←p+d×2×⎕CT                                 ⍝ Adjust p
     2>≡p:9○a ⍺⍺ p                               ⍝ Simple right argument
     9○a∘⍺⍺¨p                                    ⍝ Enclosed or nested arguemnt
 }


 decision←{
⍝⍺ probabilities (optional)
⍝⍵ Actions
⍝⍵ Payoffs for state of nature 1
⍝⍵   "      "    "    "    "   2
⍝⍵   "      "    "    "    "   n
⍝⍺⍺ max or min
⍝⍵⍵ decision criterion (function): max, min, mean, sdev, var, cv, rrr
    ⍝ ##.Util.Init
     ⎕IO←0 ⋄ ⎕ML←3
     b←0≠↑0⍴↑⍵             ⍝ Is 1st item decision?
     a←toNestedVector↑⍵    ⍝ Action
     s←⊂[0]⊃1↓⍵            ⍝ Payoffs
     ⍺←(≢1↓⍵)⍴1            ⍝
     p←⍺÷+/⍺               ⍝ probabilities
     x←⍵⍵∘{⍵,[0.5]p}¨s     ⍝
     i←x⍳⍺⍺ x              ⍝
     ns←⎕NS''              ⍝
     ns.Action←i⊃a         ⍝
     ns.OptimalValue←i⊃x  ⍝
     ns.Values←x           ⍝
     ⍝ns.EVwPI←p+.×⍺⍺¨1↓⍵       ⍝
     ⍝ns.EVwPI←⍵⍵(⍺⍺¨1↓⍵),[0.5]p    ⍝
    ⍝ns.(EVPI←|EVwPI-OptimalValue)
     ns.EOL←mean¨(s-⍨⌈/s),[0.5]¨⊂p
     ns.EVPI←⌊/ns.EOL
     ns                    ⍝

 }


 down←{
 ⍝⍟ Copyright (C) 2024 by Stephen M. Mansour
 ⍝∇ Apply a summary function to the columns of a matrix
 ⍝∊ Operator
 ⍝⍺ N X Optional left argument to summary function
 ⍝⍺⍺ fS fS Any Summary Function
 ⍝⍵  Xm Xm  numeric matrix
 ⍝← Zv  Zv  Vector whose length is number of columns in right argument
 ⍝⍕ Zv ← [N] fS down Xm
 ⍝⍎ 5 7 9 ← sum down  2 3 matrix 1 2 3 4 5 6
 ⍝⍎ 2.5 3.5 4.5 ← mean down  2 3 matrix 1 2 3 4 5 6
 ⍝⍎ 2 5 3←max down  2 3 matrix 1 5 3 2 4 3
     0=⎕NC'⍺':⍉⍺⍺⍤1⊢⍉⍵
     ⍉⍺ ⍺⍺⍤1⊢⍉⍵}


 each←{0=⎕NC'⍺':⍺⍺¨⍵ ⋄ ⍺ ⍺⍺¨⍵}


 goodnessOfFit←{
 ⍝⍟ Copyright (c) 2018, 2022 by Stephen M. Mansour
 ⍝∇ Determine whether a sample fits a particular distribution
 ⍝∇ Discrete Distributions use ChiSquare Test.
 ⍝∇ Coninuous Distributions use Kolmogorov Test.
 ⍝∇ Non-specified Parameters use Lillefors Test.
 ⍝∇ Two Samples use Smirnov Test.
 ⍝∊ Operators
 ⍝⍺  [Categories] N    [Number of categories]
 ⍝⍺⍺ fD         fD  Distribution function
 ⍝⍺⍺ RelFreq    Cm  (Relative) frequency distribution
 ⍝⍺⍺ DataSet    Xv  Numeric Data
 ⍝⍵⍵ fR         fR
 ⍝⍵  NumData    Xv   Numeric variable
 ⍝⍵  CharData   Cv   Categorical variable
 ⍝⍵  FreqDist   Nm   Frequency Distribution [Values Counts]
 ⍝←  NameSpace  W    Namespace containing the following variables:   DegreesOfFreedom  Factors  P  Table  TestStatistic  Type
 ⍝⍕ NameSpace ← fD|RelFreq|DataSet goodnessOfFit fR NumData|CharData|FreqDist
 ⍝⍎ report normal goodnessOfFit = Height        ⍝ Lillefors Test
 ⍝⍎ report exponential goodnessOfFit = Family   ⍝ Lillefors Test
 ⍝⍎ report uniform goodnessOfFit = Family       ⍝ ChiSquare Test
 ⍝⍎ report poisson goodnessOfFit = Family       ⍝ Poisson Test
 ⍝⍎ report 68 3 normal goodnessOfFit < Height   ⍝ Kolmogorov Test
 ⍝⍎ report Weight goodnessOfFit > 2 × Height    ⍝ Smirnov Test
     ⍺←⍬ ⋄ ⎕ML←3 ⋄ ⎕IO←0                            ⍝ Set system variables
  ⍝   c←'Continuous'≡⍺⍺'Type'                       ⍝ Is distribution continuous?
     rAv∧2=⎕NC'⍺⍺':⍺⍺ goodnessOfFitR ⍵              ⍝ If requested use R
     rAv:⍺ goodnessOfFitR ⍵                         ⍝
     ns←⎕NS''                                       ⍝ Create namespace for output
     g←⍺⍺{A C←⍺⍺{11::1 ⋄ ⍵('Continuous'≡⍺⍺'Type')}⍺ ⍝ Decide which type of
       ⍝  C:A ⍺⍺ npgof ⍵⍵ ⍵                         ⍝ Goodness-of-fit test
         C∧1=⍴⍴⍵:A ⍺⍺ npgof ⍵⍵ ⍵                    ⍝ Goodness-of-fit test
         C∧2=⍴⍴⍵:A ⍺⍺ x2gof ⍵                       ⍝ Do chi-square if grouped data
         2=⎕NC'⍺⍺':⍺⍺{1=⍴⍴⍺⍺:⍺⍺ smirnovGOF ⍵⍵ ⍵     ⍝ If left operand vector, smirnov
             (↓⍉⍺⍺)mgof ⍵ 0}⍵⍵ ⍵                    ⍝ If matrix; multinomial
         p≠⌈p←1 ⍺⍺ 1:⍺ pgof ⍵ ⋄ ⍺ mgof ⍵ p}⍵⍵       ⍝ Determine distribution
     d pm p x2 df tbl←⍺ g ⍵
     ns.(TestStatistic P Parameters)←x2 p pm        ⍝ Put variables into namespace
     ns.DegreesOfFreedom←df                         ⍝   "      "
     h←⊂('-'∊∊tbl[1;0])⊃'Value' 'From/To'           ⍝
     h←(h,'Observed' 'Expected' 'Difference' 'ChiSquare') ⍝,[¯0.5]¨'-'
     ns.Table←h⍪⍣(×≢df)⊢tbl                         ⍝ Assign table
     ns.Factors←'CAT1' 'CAT2'                       ⍝ Dummyvalues
     ns.Type←'GoodnessOfFit'                        ⍝ Set Type
     ns.rel←⍵⍵                                      ⍝ Relation
     b←2=⎕NC'⍺⍺'                                    ⍝
     ns.SampleSize←b(⍺⍺{⍺:count¨⍺⍺ ⍵ ⋄ count ⍵})⍵   ⍝
     ns._Distribution←d                             ⍝
     ns}


 groupBy←{
⍝⍟ Copyright (c) 2018, by Stephen Mansour
⍝∇ Group a variable
⍝⍺  Numeric Field
⍝⍺⍺ Summary function(s)
⍝⍵  Grouping field (may be character or numeric)
⍝←  Matrix Em [Key|[...[KeyM]]|Value1|[...[ValueN]]]
⍝⍕  Matrix←
⍝⍎       Height mean groupBy State
⍝⍕  PA  69.57142857
⍝⍕  NJ  70
⍝⍕  NY  70
⍝⍎       Height (mean,sdev) groupBy State
⍝⍕  PA  69.57142857 3.10145895
⍝⍕  NJ  70          4.281744193
⍝⍕  NY  70          3.651483717
⍝⍎       Height (mean,median,mode) groupBy State Sex
⍝⍕  PA  M  70.33333333 70.5 70
⍝⍕  NJ  F  67.66666667 68   65
⍝⍕  NY  M  70          70   68
⍝⍕  NJ  M  71.75       72   74
⍝⍕  PA  F  65          65   65
⍝⍕      Height groupBy Sex
⍝⍕    M 40 71.5  2.87
⍝⍕    F 11 65.09 3.11
     ⎕ML←1 ⋄ ⎕IO←0
     5::⍺⍺⊂∇∇ ⍵                             ⍝ Length error
     0=⎕NC'⍺':⍵{(deleteExcessBlanks↑⍺)⍵}⌸⍺⍺ ⍝ Partition Left operand
     0::⍵ ⍺⍺{⍺,⍺⍺ ⍵}⌸⍺                      ⍝ Fall back
     a←toNestedVector ⍺                     ⍝
     op←{w←⍉↑toNestedVector¨⍵               ⍝ Summary operator
         w ⍺⍺{⍺,⍺⍺ ⍵}⌸⍺}                    ⍝
     0≠⊃0⍴⊃a:a⊆∘⍺⍺ op ⍵                    ⍝ Enclose character
     a≠⍥≢⍵:a ⍺⍺ op ⍵                        ⍝ Multiple Grouping
     w←toNestedVector ⍵                     ⍝
     w ⍺⍺{⍺,⍺⍺ ⍵}⌸a                         ⍝ Single Grouping
 }


 hazardRate←{
 ⍝∇ Hazard Rate or Failure Rate
 ⍝⍺  S: Parameters of failure distribution
 ⍝⍺⍺ Distribution
 ⍝⍵  T: Time
 ⍝⍕  Uv ← Sv fD hazardRate Tv
 ⍝⍎  2 2 2 ← 2 exponential hazardRate 3 4 5
 ⍝⍎  0.44627 2.5 1 ← 5 2 weibull hazardRate  1.3 2 4.5
 ⍝⍎  0.3168 ← 1.2 1.5 lognormal hazardRate 2.3
     ⍝(⍺ ⍺⍺ ⍵)÷⍺ ⍺⍺ prob>⍵
     ⍺(⍺⍺÷⍺⍺ prob>)⍵       ⍝ h(t)=f(t)/R(t)
 }


 hyp←{⍺ ⍺⍺ hypothesis ⍵⍵ ⍵}


 hypothesis←{
 ⍝⍟ Copyright (c) 2018 by Stephen M. Mansour
 ⍝∇ Perform a hypothesis test
 ⍝∊ Operators
 ⍝⍺ Sample1  Xv Sample Data
 ⍝⍺⍺ fS  fS mean, proportion, var, sdev
 ⍝⍵⍵ fR  fR Relational Function
 ⍝⍵ Value  Xs   Hypothesized Value
 ⍝⍵ Sample2 Xv  Sample2Data
 ⍝← Model   W   Contains the following  DegreesOfFreedom  Estimate  P-Value  SampleSize  StandardDeviation    StandardError  TestStatistic  Type
 ⍝⍕ Model ← Sample1 fS hypothesis  fR Value|Sample2
 ⍝⍎ Height mean hypothesis = 68               ⍝ One sample t-test
 ⍝⍎ (Sex eq 'F') proportion hypothesis > 0.5  ⍝ One sample proportion
 ⍝⍎ MaleHeight mean hypothesis > FemaleHeight ⍝ Two-sample t test
 ⍝⍎ mean hypothesis = Height splitBy Sex
 ⍝⍎ mean hypothesis > Height splitBy Sex eq 'M'
 ⍝⍎ (After-Before) mean hypothesis > 0        ⍝ Paired t-test
 ⍝⍎ Height var hypothesis < 9                 ⍝ Chi-Square test
 ⍝⍎ MaleHeight var hypothesis = FemaleHeight  ⍝ F-test
    ⍝ 0=⎕NC'⍺':∇/⍵                            ⍝ Is left argument missing?
     ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⍝x1 x2←⍺ ⍵               ⍝ Set system variables
     r←2⊥0 0 1 ⍵⍵ 0 1 0                       ⍝ Identify relation
     c←r<4                                    ⍝ Identify claim 0=null, 1=alternative
     R←(¯1+r⌊7-r)⊃1 ¯1 0                      ⍝ Lower, upper or two tails?
     i←⍺⍺{⍵:20+1 2 3 ⍺⍺ 4 6 8                 ⍝ Identify bivariate function
         ⍺⍺ 1 2 6}'Association'≡⍺⍺'Type'      ⍝ Identity summary function
     ⍝    +/,⍺⍺ 1 2 6}
     0=⎕NC'⍺':r c i monadicHypothesis ⍵       ⍝ Equal Variances; three or more groups
     i=12:⍵⍵ wilcoxon ⍺-⍵                     ⍝ paired median
     i=13:⍺ ⍵⍵{ns←(⍺-⍵)mean hypothesis ⍺⍺ 0   ⍝ paired mean
         ns._Paired←1 ⋄ ns}⍵
     x1 x2←⍺ ⍵
     s←↑(⍴⍴x2)∨9=⎕NC'x2'                      ⍝ One or two samples?
     f←{
         i=3:⍺ meanHo ⍵                       ⍝ Determine
         i=¯1:⍺ proportionHo ⍵                ⍝   type
         i=2:⍺ medianHo ⍵
         7∊i*1 2:⍺ varianceHo ⍵               ⍝       of
         i=21:⍺ corrHo ⍵                      ⍝         test
         i=20.5:⍺ slopeHo ⍵
         i=19:⍺ interceptHo ⍵
         i=20:⍺ mseHo ⍵
         i=22:⍺ covHo ⍵}
     SD←7=i*2                                 ⍝ Is std deviation?
    ⍝ y←{9=⎕NC'⍵':⍵ ⋄ ⍵*2*(s=0)∧i≠⌊i}x2       ⍝ Square if sdev
     y←SD{9=⎕NC'⍵':⍵ ⋄ ⍵*2*⍺∧s=0}x2          ⍝ Square if sdev
     ns←R s f x1 y                            ⍝ Apply the test
     ns.Estimate*←÷2*SD                       ⍝ Adjust for sdev
     ns←SD{~⍺:⍵ ⋄ ⍵._Parameter←'σ' ⋄ ⍵}ns     ⍝  "      "   "
     ns._Populations←s+1                      ⍝ One or two populations?
     ns._Relation←(¯1 0 1⍳R)⊃'<≠>'            ⍝ One or two tails?
  ⍝  ns._Parameter←(¯1 21 2 3 7⍳i)⊃'p⍴ηµ',⌽,\'σ²'  ⍝ Parameter of interest
     ns._Claim←c                              ⍝ Claim is H1 if <, > or ≠, otherwise H0
     ns._HypothesizedValue←s⊃x2 0             ⍝ Assumed value
     ns._Paired←{0::0 ⋄ ⍵._Paired}ns          ⍝ Set paired to 0 if it doesn't exist
     ns.Type←'Hypothesis' ⋄ ns}


 paired←{
 ⍝ Paired samples  Operator
     ⎕IO←0 ⋄ ⎕ML←3
⍝     ⍺≠⍥≢⍵:'Domain Error'
⍝     1=≢⍵:'Domain Error'
     10+⍺⍺ ⍵
    ⍝ ns←⍺ ⍺⍺ 0J1,⍵
⍝     ns._Paired←1
⍝     ns




 }


 parameters←{
 ⍝⍺⍺  Distribution Function
 ⍝⍵   Data from Distribution
 ⍝←   Parameter Estimates
 ⍝⍕  Sv←fD parameters Xv
 ⍝⍎  2 1←normal parameters 1 2 3
 ⍝⍎  0.5←exponential parameters 1 2 3
     0::'Nonce Error'
     NS←0 ⍺⍺'Parameters'
     NS.f ⍵}


 predInt←{
 ⍝⍟ Copyright (C) 2018, 2023 by Stephen M. Mansour
 ⍝∇ Calculate a prediction interval for a parameter
 ⍝∊ Regression
 ⍝⍺ [Level]: Confidence level - default .95
 ⍝⍺⍺ fL: Linear Function  f(X) =  b0 + b1*x1 + ... + bn*xn
 ⍝⍵ Xv:   numeric vector
 ⍝← Interval:   2-item vector
 ⍝⍕ Interval ← [.95|P] fL predInt X[A]|Cm|NS
 ⍝⍎ MODEL.f predInt 5
 ⍝⍎ .99 MODEL.f predIt 5
     ⍺←0.95 ⋄ ⎕IO←0 ⋄ ⎕ML←3    ⍝ Default 95%
     ⍺ 2 ⍺⍺ ⍵             ⍝ Call linear function
    ⍝ ⊃⍺ 2∘⍺⍺¨⍵                 ⍝
 }


∇ z←{y}(f prime)x
⍝ ((⍺⍺ ⍵+h)-⍺⍺ ⍵)÷h←1E¯10}
 :If 0≠⎕NC'y' ⋄ f←y∘f ⋄ :EndIf
 z←f{⍺←0(|0.00001×⍵+⍵=0)
     d0 h←⍺ ⋄ ⎕IO←0
   ⍝  d←(⍺⍺ ⍵(+,[-0.5]-)⍤0⊣h)-.÷2×h
     y←⍺⍺ ⍵(+,[-0.5]-)⍤0⊣h ⍝ Find y values
     d←(-/y)÷2×h           ⍝ Estimate dy/dx
     d∧.=d0:d              ⍝ If equal done
     d(h÷2)∇ ⍵}x
∇

 prob←{⍺←0 1 ⋄ ⍺(⍺⍺ probability ⍵⍵)⍵}


 probability←{
 ⍝⍟ Copyright (c) 2018, 2020 by Stephen M. Mansour
 ⍝∇ Calculate probability of a distribution or from a contingency table
 ⍝∊ Operator
 ⍝⍺ [Parameters]   Xv   Distribution parameters (default 0 1)
 ⍝⍺  EventA        Cv   Name of Event A
 ⍝⍺  ProbA         Ps   Probability of Event A
 ⍝⍺⍺ fD            fD
 ⍝⍺⍺ fL            fL
 ⍝⍵⍵ fR            fR  or independent
 ⍝⍵⍵ Table         Xm  Contingency Table
 ⍝⍵  Value         X   Critical Value
 ⍝⍵  EventB        Cv  Name of Event B
 ⍝⍵  ProbB         Ps  Probability of Event B
 ⍝←  Probability   P
 ⍝⍕ P ←  [Parms|_0_1]EventA|ProbA (fD|fL probability fR|Table|independent) X|EventB|ProbB
 ⍝⍎ 0.1640625 ← 7 0.5    binomial probability = 2
 ⍝⍎ 0.76193 ← 5.2 poisson probability > 3
 ⍝⍎ 0.89435 ← normal probability < 1.25
 ⍝⍎ 0.58885 ← 68 3 normal probability between 66 71
 ⍝⍎ Table←frequency Sex Party               ⍝ Create Contingency Table
 ⍝⍎ 0.3158←'M' (∧ prob Table)'R'   ⍝ Multipication rule:  Probability Male and Republican
 ⍝⍎ 0.44737←'F' (∨ prob Table)'D'   ⍝ Addition rule:       Probability Female or Democrat
 ⍝⍎ 0.41379←'R' (| prob Table)'M'   ⍝ Conditional rule     Probability Republican given Male
 ⍝⍎ 0.1←.2 (∧ prob independent) .5            ⍝ P(A and B) independent
 ⍝⍎ 0.6←.2 (∨ prob independent) .5            ⍝ P(A and B) independent
     ⍺←0 1 ⋄ ⎕ML←3 ⋄ ⎕IO←0                  ⍝ Default parameters (0,1)
     2=⎕NC'⍺⍺':(⍺⍺ _discreteprob ⍵⍵)⍵
     2=⎕NC'⍵⍵':⍺(⍺⍺ _prob ⍵⍵)⍵              ⍝ If variable right operand calculate
     ¯1≡1 ⍵⍵ 0:⍺(⍺⍺ _indprob)⍵              ⍝ Independent prob
     1<|≡⍵:⍺∘∇ pervasive ⍵                  ⍝ Pervasive right argument
    ⍝2=⎕NC'⍺⍺':⍺(⍺⍺ probability_ ⍵⍵)⍵       ⍝ ???
     c←(⍺ ⍺⍺'Type')≡'Continuous'            ⍝ Is distribution continuous?
     b←1 0 1 ⍵⍵ 0 1 1                       ⍝ Test relation
     b←,b×2≠⍴⍴b                             ⍝ If outside or between, make all 0's
    ⍝fc←⍺⍺{a←⍺,1                            ⍝ Continuous
     fc←⍺⍺{a←⍺'p'                           ⍝ Continuous
         2>≡⍵:|b[0]-(a ⍺⍺ ⍵)×≠/2↑b          ⍝ Simple case
         |b[0]-(a∘⍺⍺¨⍵)×≠/2↑b}              ⍝
     fd←⍺⍺{e←(↑b)≠1↓b                       ⍝ Discrete
         d←⌊⍵+(⍵≠⌊⍵)∨∧/e                    ⍝ round up or down
        ⍝a←⍺,↑e                             ⍝ cumulative?
         a←⍺('dp'[↑e])
         =/2↑b:|b[0]-a ⍺⍺ ⍵                 ⍝ P(X=x) or P(X≠x)
         2>≡⍵:|b[0]-a ⍺⍺ d-↑e               ⍝ P(X<x) , P(X≤x) etc.
         |b[0]-a∘⍺⍺¨d-↑e}                   ⍝ Nested case
     fk←⍺⍺{                                 ⍝ Character
         |b[0]-⍺ ⍺⍺ ⍵}
     fi←⍺⍺{+/⍺ ⍺⍺ ⍵}                        ⍝ Pr(X∊x1,...,xn)
    ⍝fb←⍺⍺{a←⍺,1                            ⍝ Pr(X∊(a,b))
     fb←⍺⍺{a←⍺'p'                          ⍝ Pr(X∊(a,b))
         2=≢⍵:|-/a ⍺⍺ ⍵                      ⍝ If 2 values, include both
         1=≢⍵:1-2×1-a ⍺⍺ ⍵                  ⍝ If 1 value include negative
         ¯2-/a ⍺⍺ ⍵}
     f←⍵⍵{a b c←⍺                           ⍝ General probability
         ∧/b:a fi ⍵                         ⍝ X∊⍵
         0≠↑0⍴∊⍵:a fk ⍵                     ⍝ X is character
         c∧∧/~b:|(a fb ⍵)-1 ⍺⍺ 2 3          ⍝ X cont between a,b
         c:a fc ⍵                           ⍝ X cont
         1≠≢∪b:a fd ⍵                       ⍝ X discrete
         lu←(⌊↑⍵),⌈¯1+↑⌽⍵                   ⍝ lower, upper bounds
     ⍝   b[0]:a fb ¯1 1+lu                  ⍝ discrete include
         2 ⍺⍺ 1 3:a fb lu                   ⍝ discrete between
         1-a fb ¯1 1+lu}                    ⍝ discrete outside
     ⍺ b c f ⍵                              ⍝ Apply it
 }


 probability1←{
 ⍝⍟ Copyright (c) 2018 by Stephen M. Mansour
 ⍝∇ Calculate probability of a distribution
 ⍝∊ Operators
 ⍝⍺ [Parameters]:   Distribution parameters (default 0 1)
 ⍝⍺⍺ fD:  Distribution function
 ⍝⍵⍵ fR:  Relational function, e.g.   eq, ne, gt, lt, ge, le, in, between
 ⍝⍵  X:    Value
 ⍝← P:    Probability 0≤P≤1
 ⍝⍕ P ←  [Parms|0 1] fD probability fR X
 ⍝⍎ 0.1640625 ← 7 0.5    binomial probability = 2
 ⍝⍎ 0.76193 ← 5.2 poisson probability > 3
 ⍝⍎ 0.89435 ← normal probability < 1.25
 ⍝⍎ 0.58885 ← 68 3 normal probability between 66 71
     ⍺←0 1 ⋄ ⎕ML←3 ⋄ ⎕IO←0                  ⍝ Default parameters (0,1)
     1<|≡⍵:⍺∘∇ pervasive ⍵                  ⍝ Pervasive right argument
    ⍝2=⎕NC'⍺⍺':⍺(⍺⍺ probability_ ⍵⍵)⍵       ⍝ ???
     c←(⍺ ⍺⍺'Type')≡'Continuous'            ⍝ Is distribution continuous?
     b←1 0 1 ⍵⍵ 0 1 1                       ⍝ Test relation
    ⍝ fc←⍺⍺{a←⍺,1                           ⍝ Continuous
⍝        2>≡⍵:|b[0]-(a ⍺⍺ ⍵)×≠/2↑b          ⍝ Simple case
⍝         |b[0]-(a∘⍺⍺¨⍵)×≠/2↑b}             ⍝
     fc←⍺⍺{g←⍺⍺ variant 1                   ⍝ Continuous
         2>≡⍵:|b[0]-(⍺ g ⍵)×≠/2↑b            ⍝ Simple case
         |b[0]-(a∘⍺⍺¨⍵)×≠/2↑b}              ⍝
     fd←⍺⍺{e←(↑b)≠1↓b                       ⍝ Discrete
         d←⌊⍵+(⍵≠⌊⍵)∨∧/e                    ⍝ round up or down
         a←⍺,↑e                             ⍝ cumulative?
         =/2↑b:|b[0]-a ⍺⍺ ⍵                 ⍝ P(X=x) or P(X≠x)
         2>≡⍵:|b[0]-a ⍺⍺ d-↑e               ⍝ P(X<x) , P(X≤x) etc.
         |b[0]-a∘⍺⍺¨d-↑e}                   ⍝ Nested case
     fk←⍺⍺{                                 ⍝ Character
         |b[0]-⍺ ⍺⍺ ⍵}
     fi←⍺⍺{+/⍺ ⍺⍺ ⍵}                        ⍝ Pr(X∊x1,...,xn)
     fb←⍺⍺{a←⍺,1                            ⍝ Pr(X∊(a,b))
         2=≢⍵:|-/a ⍺⍺ ⍵                     ⍝ If 2 values, include both
         1=≢⍵:1-2×1-a ⍺⍺ ⍵}                 ⍝ If 1 value include negative
     f←⍵⍵{a b c←⍺                           ⍝ General probability
         ∧/b:a fi ⍵                         ⍝ X∊⍵
         0≠↑0⍴∊⍵:a fk ⍵                     ⍝ X is character
         c∧∧/~b:|(a fb ⍵)-1 ⍺⍺ 2 3          ⍝ X cont between a,b
         c:a fc ⍵                           ⍝ X cont
         1≠≢∪b:a fd ⍵                       ⍝ X discrete
         lu←(⌊↑⍵),⌈¯1+↑⌽⍵                   ⍝ lower, upper bounds
     ⍝   b[0]:a fb ¯1 1+lu                  ⍝ discrete include
         2 ⍺⍺ 1 3:a fb lu                   ⍝ discrete between
         1-a fb ¯1 1+lu}                    ⍝ discrete outside
     ⍺ b c f ⍵                              ⍝ Apply it
 }


 randVar←{⍺←0 1
     ⍺(⍺⍺ randomVariable)⍵}


 randomVariable←{
 ⍝⍟ Copyright (c) 2018, 2024 by Stephen M. Mansour
 ⍝∇ Simulate sampling from a specified distribution
 ⍝∊ Operator
 ⍝⍺ [Parms]        Xv   Distribution parameters (default 0 1)
 ⍝⍺⍺ Distribution  fD   Probability Distribution
 ⍝⍺⍺ FreqDist      Xm   Two-Column Matrix:  Values, Probabilities
 ⍝⍵  SampleSize    N    Number of samples to simulate, use 0 for scalar.
 ⍝←  SampleData    Xv   Random values from fD
 ⍝⍕  SampleData ←  [Parms|_0_1] fD|FreqDist randomVariable SampleSize|0
 ⍝⍎  2 1 5 4 2←7 .3 binomial randomVariable 5   ⍝ Binomial Random Variables
 ⍝⍎  ¯0.98264 1.07195 0.34821←normal randomVariable 3          ⍝ Standard Normal Random Variables
     ⎕IO←0 ⋄ ⎕ML←3
     ⍺←0 1                     ⍝ Default parameters
     2=⎕NC'⍺⍺':⍺⍺{             ⍝ If left operand is a variable
        ⍝ x p←↓⍉frequency ⍺    ⍝ Separate values, probabilities
         x p←{2≠⍴⍴⍵:⍵          ⍝ If two vectors, done
             ↓⍉frequency ⍵}⍺   ⍝ Else separte values, probabilities
         u←?⍵⍴0                ⍝ Generate random Variable
         x[+/u∘.≥+\p÷+/p]}⍵    ⍝ Select from distribution
     1<≡⍺:(↑,¨/⍺)∇¨⍵           ⍝ If nested parameters, make pervasive
     w←{(⊂⍵)∊0 '':⍬ ⋄ ⍵}⍵      ⍝ If 0, treat as scalar
     w ⍺⍺⍨⍺'r'                 ⍝ If simple parameter list just calculate
 }


 sampleSize←{
 ⍝⍟ Copyright (C) 2018 by Stephen M. Mansour
 ⍝∇ Estimate a sample size
 ⍝∊ Operators
 ⍝⍺ [CONF←.95] P Confidence level
 ⍝⍺_ [P←.8]: Power of the test - default .80      (Beta - not yet implemented)
 ⍝⍺⍺ fS     fS  sum, mean, proportion, or variance
 ⍝⍵ ME      Ts  Margin of Error
 ⍝⍵ StdDev  Ss  Standard Deviation for mean
 ⍝⍵ Prop    P   Estimated Proportion
 ⍝⍵ [PopSize←0] Ms  Population Size; 0 =  infinite
 ⍝← Samplesize N
 ⍝⍕ N ← [.95|CONF] fS sampleSize ME StdDev|[Prop|0.5] [PopSize]
 ⍝⍎ 35←mean sampleSize 1 3
 ⍝⍎ 664←0.99 proportion sampleSize .05
 ⍝⍎ 70←0.9 sum sampleSize 20 .1 1000
     ⎕IO←0 ⋄ ⎕ML←3                       ⍝
     ⍺←0.95                              ⍝ Default is 95% confidence
    ⍝N←↑1↓⍺                              ⍝ Sample Size, 0 = infinite
    ⍝i←⍺⍺ ¯1 1                           ⍝ Mean, proportion or variance?
     i←⍺⍺ 0 1 5                          ⍝ Mean, proportion or variance?
     n_sum←{p←0.5×1-⍺                    ⍝ Sample size for total
         Z←normal criticalValue<p        ⍝ Calculate normal critical value
         E S M←3↑⍵                       ⍝ Margin of Error, Sdev,
         ⍝E V M←3↑⍵
         N0←(M×Z×S÷E)*2                  ⍝ Sample Size Estimate
         ⍝N0←V×(M×Z÷E)*2
         ⌈÷N0+⍥÷M}                       ⍝ Finite correction factor
     n_mean←{p←0.5×1-⍺ ⋄ M←↑⌽3↑⍵         ⍝ Sample size for mean
         z←normal criticalValue<p        ⍝   First pass use normal
        ⍝n←10⌈⌈(÷/z,⍵)*2                 ⍝   Estimate n from normal distribution
         n←2*⍨÷/z,2↑⍵                    ⍝   Estimate n from normal distribution
         M>0:⌈÷M+⍥÷n ⋄ ⌈n              ⍝   Finite population factor
         ⍝⌈M(÷+⍥÷)⍣(×⊣)n                  ⍝   Include finite pop. factor?
         ⍵{t←(↑⍵-1)tDist criticalValue<p ⍝   Estimate using tDist
             2⌈⌈(÷/t,⍺)*2}n}             ⍝   Using n-1, estimate from t dist
     n_p←{E p M←⍵[0,1+{~⍺}\(⊢=⌈)1↓⍵]     ⍝ Sample Size for proportion
         p←p+0.5×p=0                     ⍝ Default p = 0.5
         z←normal criticalValue<0.5×1-⍺  ⍝   Use normal dist
         N←(p×1-p)×(z÷E)*2               ⍝ Estimate sample Size
         M=0:⌈N                          ⍝ If population infinite, done
         ⌈M×N÷M+N-1}                     ⍝ Otherwise use Hypergeometric formula
     n_var←{E s2←2↑⍵                     ⍝ Sample size for variance
         f←⍺∘{x2←(⍵-1)chiSquare criticalValue<1(×,-)0.5×1-⍺
             E+0.5×-/s2×(⍵-1)÷x2}        ⍝ Estimate error from sample size
         ⌈f SecAlg 3 100,f¨20 100}       ⍝   Backsolve
     i=6:⍺ n_sum ⍵                       ⍝ Sample size for total
     i=2:⍺ n_mean ⍵                      ⍝ Sample size for mean
     i=¯1:⍺ n_p 3↑⍵~0                    ⍝ Sample size for proportion
     i=7:⍺ n_var ⍵                       ⍝ Sample size for variance
     7=i*2:⍺ n_var ⍵*2                   ⍝ Sample size for stdDev
     'Domain Error'}


 theoretical←{
 ⍝⍟ Copyright (C) 2018 by Stephen M. Mansour
 ⍝∇ Calculate parameter  for a theoretical distribution
 ⍝∊ Operators
 ⍝⍺ [Parms←0&nbsp.1] Xv Parameter List for Distribution
 ⍝⍺⍺ fD fD
 ⍝⍵⍵ fS fS
 ⍝⍵  Dummy X   Dummy value unless fS requires a left argument
 ⍝←  Value X   Result of summary function applied to a population with distribution fD
 ⍝⍕  Value←[Parms|_0_1]fD theoretical fS Dummy
 ⍝⍎  2.1 ← 7 .3 binomial thoretical mean ''
 ⍝⍎  0.9295 ← 13 5 52 hyperGeometric theoretical sdev ''
 ⍝⍎  0.6745 ← normal theoretical quartile 3
     ⎕IO←0 ⋄ ⎕ML←3 ⋄ ⍺←0 1
     (2=⎕NC'⍺⍺')∧⍵≠0:⍵ ⍵⍵ ⍺⍺               ⍝ If variable left arg, apply fn
     2=⎕NC'⍺⍺':↑⍵⍵ ⍺⍺
     f←⍺⍺{d←3 ⍵⍵ 2 2 3 5 7
         d=6:⍺ ⍺⍺ criticalValue≥⍵÷4        ⍝ Quartile
         d=2:⍺ ⍺⍺ criticalValue≥⍵÷100      ⍝ Percentile
         d=60:100×⍺ ⍺⍺ probability<⍵       ⍝ PercentileRank
     }⍵⍵
     ⍵≠0:⍺ f ⍵
     d←⍵⍵ 2 2 3 5 7                        ⍝ Apply function to known data
     d=3.8:⍺ ⍺⍺'Mean'                      ⍝ Mean
     d=4.7:⍺ ⍺⍺'Variance'                  ⍝ Variance
    ⍝ 4.7=d*2:(⍺ ⍺⍺'Variance')*÷2           ⍝ Standard Deviation
     4.7=d*2:⍺⍺{z←⍵ ⍺⍺'Variance'
         sqrt z}⍺
       ⍝d=3:⍺ ⍺⍺ criticalValue≤0.5            ⍝ Median
     d=3:⍺ ⍺⍺'Median'
     d=2:⍺ ⍺⍺'Mode'                        ⍝ Mode
     d=4:-/⍺ ⍺⍺ criticalValue≤0.25 0.75    ⍝ iqr
     d<0:⍺ ⍺⍺'Kurtosis'                    ⍝ Kurtosis
     d<1:⍺ ⍺⍺'Skewness'                    ⍝ Skewness

 }


:EndNamespace 
