﻿:Namespace Summary
(⎕IO ⎕ML ⎕WX)←1 1 3

 corr←{⎕IO←0 ⋄ ⎕ML←3 ⋄ ⎕DIV←1
      ⍝⍟ Copyright 2018, 2025 by Stephen M. Mansour
      ⍝∇ Correlation
      ⍝∊ Summary Function (Dyadic); Measure of Association
      ⍝⍺ [Y]          Yv  Raw Data
      ⍝⍺ [Database]   W   Namespace containing Variables
      ⍝⍵ X            Xv  Raw Data
      ⍝⍵ NameList     Cv  List of Variable Names
      ⍝⍵ XX           Xv  X1 X2 ... Xn
      ⍝⍵ Database     W   Namespace
      ⍝← Value   Xs  Sample Covariance
      ⍝← Matrix  Xm  Correlation Matrix
      ⍝← Table   Cm  Correlation Table with Labels
      ⍝⍕ [Y|Database] corr X|XX|NameList|Database
      ⍝⍎ Value←Y corr X                      ⍝ Produces Sample Correlation Coefficient
      ⍝⍎ Matrix←corr X1 X2 ... Xn            ⍝ Produces Correlation Matrix
      ⍝⍎ Matrix←corr Database.(X1 X2 ... Xn) ⍝ Produces Correlation Matrix
      ⍝⍎ Table←corr Database                 ⍝ Produces Correlation Table for all numeric variables
      ⍝⍎ Table←'X1 X2 ... Xn' corr Database  ⍝ Produces Correlation Table for X1 ... Xn
      ⍝⍎ Height corr Weight
      ⍝⍎ corr Height Weight ShoeSize
     ⍵≡'Type':'Association'             ⍝ Bivariate function
     0=⎕NC'⍺':∇{
         0=≡⍵:⍵ ⍺⍺⍨⍕2 variables ⍵       ⍝ If namespace make corr table with labels
         1=≡⍵:↑⍺⍺/2 toNestedVector ⍵    ⍝ If simple vector split into two
         2=≡⍵:∘.⍺⍺⍨⍵}⍵                  ⍝ If nested vector make corr matrix
     (9=⎕NC'⍵'):⍕⍵ ∇{m←⍺⍺ ⍺⍎⍵           ⍝ If namespace
         m{(⍵,⍨⊂'corr'),⍵⍪⍺}(⍵≠' ')⊂⍵}⍺ ⍝ Add labels
     C Y X←⍺{1<≢⍺:0 ⍺ ⍵                 ⍝ Confidence Level
         ⍺,2 toNestedVector ⍵}⍵         ⍝ Dependent and Independent Vars
     B←∧/⎕NULL≠Y,⍪X                     ⍝ Check for missing data
     Y X←B∘/¨Y X                        ⍝ Remove incomplete data
     rAv∧C=0:+##.∆r.x'cor(⍵,⍵)'Y X      ⍝ Let R do it
     M←mean¨Y X ⋄ S←sdev¨Y X            ⍝ Find means and stdev's
     R←(+/×⌿⊃Y X-M)÷×/S,¯1+≢X           ⍝ Point estimate of correlation
     C=0:R                              ⍝ If confidence interval
     Z←normal critVal<0.5×1-C           ⍝ Get normal distribution
     7○(¯7○R)(-,+)Z÷sqrt ¯3+≢X          ⍝ Montgomery p. 49 (arctanh R)
 }

 count←{
      ⍝∇ Sample or Population Size
      ⍝∊ Summary Function; Measure of Quantity
      ⍝⍟ Copyright 2017 by Steve Mansour
      ⍝⍵ NumData     Xv   Sample or Population Data
      ⍝⍵ CharData    Ca   Comma Delimited, Nested or Matrix
      ⍝⍵ FreqDist    Xm2  Values followed by Counts or Probabilities
      ⍝⍵ SummaryData W    Namespace containing count, mean, sdev
      ⍝← Size        Ns   Number of items
      ⍝⍕  Ns ← count Xv|Cv|Cm|W
      ⍝⍎  5 ← count 2 5 1 3 2
     ⎕IO←0 ⋄ ⎕ML←3
     9=⎕NC'⍵':⍵.count          ⍝ If namespace, extract count variable
     w←toNestedVector ⍵        ⍝ If character convert to nested vector
         ⍝2≠⍴⍴w:≢w~⎕NULL            ⍝ If vector, get tally
     1=⍴⍴w:≢w                  ⍝ If vector, get tally
     ~isFrequency w:'Rank Error'
     n←⍵[;1]                   ⍝ Count is 2nd col of matrix
     n∧.=⌊n:+/n                ⍝ If integer treat as freq dist
     (1=+/n)∧n∧.≥0:⌊/⍬         ⍝ If prob distribution, pop is infinite
     'Domain Error'            ⍝ Else domain
 }

 cov←{
      ⍝⍟ Copyright 2018 by Stephen M. Mansour
      ⍝∇ Covariance
      ⍝∊ Summary Function (Dyadic); Measure of Association
      ⍝⍺ [Y]          Yv  Raw Data
      ⍝⍺ [Database]   W   Namespace containing Variables
      ⍝⍵ X            Xv  Raw Data
      ⍝⍵ NameList     Cv  List of Variable Names
      ⍝⍵ XX           Xv  X1 X2 ... Xn
      ⍝⍵ Database     W   Namespace
      ⍝← Value   Xs  Sample Covariance
      ⍝← Matrix  Xm  Covariance Matrix
      ⍝← Table   Cm  Covariance Table with Labels
      ⍝⍕ [Y|Database] cov X|XX|NameList|Database
      ⍝⍎ Value←Y cov X                      ⍝ Produces Sample Covariance
      ⍝⍎ Matrix←cov X1 X2 ... Xn            ⍝ Produces Covariance Matrix
      ⍝⍎ Table←Database cov 'X1 X2 ... Xn'  ⍝ Produces Covariance Table for X1 ... Xn
      ⍝⍎ Matrix←cov Database.(X1 X2 ... Xn) ⍝ Produces Covariance Matrix
      ⍝⍎ Table←cov Database                 ⍝ Produces Covariance Table for all numeric variables
      ⍝⍎ Height cov Weight
      ⍝⍎ cov Height Weight ShoeSize
     ⎕IO←0 ⋄ ⎕ML←3
         ⍝ 0=⎕NC'⍺':∘.∇⍨⍵                   ⍝ No left arg, make cov matrix
     0=⎕NC'⍺':∇{9≠⎕NC'⍵':∘.⍺⍺⍨⍵
         ⍵ ⍺⍺⍕2 variables ⍵}⍵
     9=⎕NC'⍺':⍕⍺ ∇{m←⍺⍺ ⍺.(⍎⍵)         ⍝ If namespace
         m{(⍵,⍨⊂'cov'),⍵⍪⍺}(⍵≠' ')⊂⍵}⍵ ⍝ Add labels
     0J1∊⍺:(1↓⍺)showCov ⍵              ⍝ Display covariance
     1∊≢¨⍺ ⍵:0                         ⍝ 0 if scalar
     rAv:+##.∆r.x'cov(⍵,⍵)'⍺ ⍵         ⍝ R calculation
     n←≢⍵                              ⍝ Sample Size
     z←(n×⍺+.×⍵)-×/+/⊃⍺ ⍵              ⍝ Sum of Squares
     z÷n×n-1                           ⍝ Covariance
 }

 cv←{(sdev÷mean)⍵}

 geometricMean←{
     isFrequency ⍵:{*(⍵+.×⍟⍺)÷+/⍵}/↓⍉⍵
     (*+/∘⍟÷≢)⍵}

 harmonicMean←{
     isFrequency ⍵:{(+/⍵)÷⍵+.÷⍺}/↓⍉⍵
     (≢÷+/∘÷)⍵}

 intercept←{
      ⍝⍟ Copyright 2025 by Stephen M. Mansour
      ⍝∇ Estimated intercept using least squares
      ⍝∊ Summary Function (Dyadic); Measure of Association
      ⍝⍺ Y            Yv  Raw Data
      ⍝⍵ X            Xv  Raw Data
      ⍝← Beta0        Zs  Slope (Beta1)
      ⍝⍕ Zs ← Yv intercept Xv
      ⍝⍎ Beta1←Y intercept X            ⍝ Produces Sample Slope
      ⍝⍎ [.95] slope confInt Y X
      ⍝⍕
     ⎕ML←1 ⋄ ⎕IO←0 ⋄ ⍺←0           ⍝ Initialize
     ⍵≡'Type':'Association'        ⍝ Measure of center
        ⍝  C Y X←⍺{1<≢⍺:0 ⍺ ⍵           ⍝ Confidence Level
     ⍝         ⍺,2 toNestedVector ⍵}⍵   ⍝ Dependent and Independent Vars
     C Y X←⍺{⍺=⍥≢⊃⊆⍵:0 ⍺ ⍵         ⍝ Confidence Level
         (⊂⍺),2 toNestedVector ⍵}⍵    ⍝ Dependent and Independent Vars
     B M←Y⌹1,⍪X                    ⍝ Calculate Intercept Slope
     C≡0:B                         ⍝ If conf=0, done
     DF←¯2+≢X                      ⍝ Degrees of Freedom
     S←{(⍺+.×⍵)-(+/⍺)×(+/⍵)÷≢⍵}    ⍝ Calculate Sxx Sxy Syy
     MSE←((S⍨Y)-M×X S Y)÷DF        ⍝ Mean Square Error
     T←DF tDist critVal<0.5×1-C    ⍝ Student T distribution
     X2←2*⍨mean X                  ⍝
         ⍝ B(-,+)T×sqrt MSE×(÷≢X)+X2÷S⍨X ⍝ Confidence Interval
     SE←sqrt MSE×(÷≢X)+X2÷S⍨X      ⍝ Standard Error
     B+(T×SE)∘.×¯1 1               ⍝ Confidence intervals
 }

 iqr←{
      ⍝⍟ Copyright (c) 2018, 2024 by Stephen Mansur
      ⍝∇ Interquarile range  |Q3-Q1|
      ⍝∊ Summary Function; Measure of Spread
      ⍝⍵ DataSet     Xv  Sample of Population Data
      ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
      ⍝⍵ SummaryData W   Namespace containing count, mean, sdev
      ⍝← Diff Ts  Difference between third and first quartiles
      ⍝⍕ Diff ← iqr DataSet|FreqDist|SummaryData
      ⍝⍎ 5 ← iqr 2 8 5 1 3                              ⍝ Raw Data
      ⍝⍎ 1 ← iqr 4 2 matrix 0 20 1 30 2 40 3 10         ⍝ Frequency Distribution
     9=⎕NC'⍵':-/{P←0.75 0.25
         2=⍵.⎕NC'events':⍵.events<P×⍵.count
         ⍵.(mean sdev)normal critVal>P}⍵        ⍝ Normal or
     (1<⍴⍴⍵)∧~isFrequency ⍵:'Rank Error'           ⍝ If not frequency
     rAv:+##.∆r.x'IQR(⍵)'⍵                         ⍝ Call R if requested
     -/3 1 quartile ⍵   ⍝ Q3 - Q1                  ⍝ Else difference between Q3 and Q1
 }

 kurtosis←{
      ⍝⍟ Copyright 2018, 2024 by Stephen Mansour
      ⍝∇ Excess Kurtosis (Thickness of the tails)
      ⍝∊ Summary Function; Measure of Shape
      ⍝⍵ RawData     Xv  Sample or Population Data
      ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
      ⍝← Z           Zs  Z>0 Leptokurtic; Z=0 Mesokurtic; Z<0 PlatykurticLeft
      ⍝⍕ Zs ← kurtosis Rawdata|FreqDist
      ⍝⍎ ¯0.1518 ← kurtosis  3 4 5 2 3 4 5 6 4 7   ⍝ Sample
     ⎕DIV←1 ⋄ ⎕ML←3 ⋄ ⎕IO←0     ⍝ Set system variables
     9=⎕NC'⍵':'Undefined for stats'   ⍝ Undefined
        ⍝  x←{isFrequency ⍵:∊(//⌽⍵) ⋄ ⍵}⍵    ⍝ If frequency, replicate values
     w←⍵⌿⍨⎕NULL≠⊣/⍪⍵                  ⍝ Remove nulls
     n m s←(count,mean,sdev)w
     4>n:⎕NULL                        ⍝ Sample size too small
     d4←{~isFrequency ⍵:+/(⍵-m)*4
         ⍵[;1]+.×(⍵[;0]-m)*4 ⋄ }w
          ⍝ rAv:+#.∆r.x'kurtosis(⍵)'w       ⍝ If requested, call R
         ⍝ z←+/(s÷⍨x-m)*4                   ⍝ Get z-score
     z←(+/d4)÷s*4
     z←z×(n×n+1)÷n×.-1 2 3            ⍝
     z-3×(n-1 2 3)×.*2 ¯1 ¯1          ⍝ Calculate kurtosis
     
     
 }

 max←{
          ⍝⍟ Copyright (c) 2018, by Stephen Mansur
          ⍝∇ Find maximum of a data set or frequency distribution
          ⍝∊ Summary Function; Measure of Position
          ⍝⍺ [X]         Xv  Sample or Population Data
          ⍝⍵ Y           Yv  Sample or Population Data
          ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
          ⍝⍵ SummaryData W   Namespace containing count, mean, sdev
          ⍝← Result      Zs  Numeric Scalar
          ⍝⍕ Result ←  max Y|Freqdist|SummaryData    ⍝ Monadic
          ⍝⍕ Result ←  X max Y        ⍝ Dyadic - smaller of two values
          ⍝⍎ 8 ← max 2 8 5 1 3
          ⍝⍎ 5 2 4 ← 5 2 3 max 2 1 4
     ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⍺←⍬              ⍝ Set system variables
     ⍺≢⍬:⍺{~⎕NULL∊⍺,⍵:⍺⌈⍵
         b←∧/⎕NULL≠yx←⍺,⍪⍵            ⍝ Dyadic version
         (⎕NULL@((~b)/⍳⍴b))b\⌈/b⌿yx}⍵ ⍝
     9=⎕NC'⍵':{n←⍵.count              ⍝ Get count
         z←normal criticalValue>n÷n+1 ⍝ Estimate z-score
         ⍵.mean+z×⍵.sdev}⍵            ⍝ Estimate max
        ⍝  m←↑⌈⌿⍵                          ⍝ Get maximum
     b←∧/⎕NULL≠⍪⍵
     m←↑⌈⌿b⌿⍵                         ⍝ Exclude nuls
     1=⍴⍴⍵:m                          ⍝ If data Set, done
     ~isFrequency ⍵:'Rank Error'
          ⍝i←∪2-/⍵[⍋⍵;0]                   ⍝ Else get interval
     i←∪2-/{⍵[⍋⍵;0]}b⌿⍵               ⍝ Else get interval
     1<≢i:m                           ⍝ If irregular, done
     m-⌈i÷2                           ⍝ Else get upper boundary
 }

 mean←{
          ⍝⍟ Copyright (c) 2017, 2023 by Stephen Mansour
          ⍝∇ Calculate mean of a data set or frequency distribution
          ⍝∊ Summary Function; Measure of Center
          ⍝⍺_ 0j1 = Show calculations; 0 = only show answer
          ⍝⍵ RawData     Xv  Sample or Population Data
          ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
          ⍝⍵ SummaryData W   Namespace containing count, mean, sdev
          ⍝← Avg Xs Average Value; Statistical Mean of Data
          ⍝⍕ Avg ← mean RawData|FreqDist|SummaryData
          ⍝⍎ 3.8 ← mean 2 8 5 1 3      ⍝ Raw Data
          ⍝⍎ 1.4 ← mean 4 2 matrix 0 20 1 30 2 40 3 10 ⍝ Frequency Distribution
          ⍝⍎ 1.4 ← mean 4 2 matrix 0 0.2 1 0.3 2 0.4 3 0.1 ⍝ Relative Frequency
     ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⎕DIV←1 ⋄ ⍺←0              ⍝ Set up variables
     ⍵≡'Type':'Center'                         ⍝ Measure of center
     C M←2↑⎕C ⍺
     M∊'kn':C{z←normal critVal<0.5×1-⍺         ⍝ Known sigma, use normal
         ~twoSample ⍵:(mean ⍵)(-,+)z×sqrt(var÷count)⍵ ⍝ One-sample estimate
         n←count¨⍵ ⋄ x←mean¨⍵ ⋄ v←var¨⍵        ⍝ Two-sample estimate
         (-/x)(-,+)z×sqrt v+.÷n}⍵
     M='p':C{ok←{MTX←⊃(⍴∘⍴,≢)¨⍵                ⍝ Paired difference
             (1∧.=MTX[;0])∧=/MTX[;1]}⍵         ⍝ Check inputs
         ~ok:'Must be raw data of equal length.'
         n s←(≢,sdev)d←↑-/⍵                    ⍝ Sample size and std dev
         t←(n-1)tDist critVal<0.5×1-⍺          ⍝ Use Student t (sigma unknown)
         (mean d)(-,+)t×s÷sqrt n}⍵
     M='e':C{~twoSample ⍵:'Two samples required' ⍝ Equal Variances
         df←¯1+n←count¨⍵ ⋄ x←mean¨⍵ ⋄ v←var¨⍵
         sp←(v+.×df)÷+/df
         se←sqrt sp×+/÷n
         t←(+/df)tDist critVal<0.5×1-⍺
         (-/x)(-,+)t×se}⍵
     M='u':C 0 confMean ⍵
     C between 0 1:⍺ confMean ⍵                ⍝ Default method
     twoSample ⍵:-/∇¨⍵                         ⍝ If two samples, calculate mean difference
     9=⎕NC'⍵':⍵.mean                           ⍝ If namespace, get mean property
        ⍝  ~'X'check ⍵:⎕SIGNAL 11                   ⍝
     w←frequency⍣(↑(0J1=⍺)∧1=⍴⍴⍵)⊢⍵            ⍝ Convert to frequency?
     isFrequency w:⍺{⍺=0:(+/×/⍵)÷+/⍵[;1]       ⍝ If matrix, treat as frequency
         d←⍵[;,0],(⍵[;1]÷+/⍵[;1])              ⍝ Values, frequencies
         d←d,×/d ⋄                             ⍝ X×p
         FS←'I10,F10.5,F10.5'                  ⍝ Set widths to 10
         z←FS ⎕FMT d⍪+⌿d                       ⍝ Format data and totals
         h←∊¯10↑¨'x' 'P(x)' 'xP(x)'            ⍝ Build Heading
         u←30⍴1 9/' -'                         ⍝ Underline heading
         (h⍪u⍪(¯1↓z)⍪u)⍪¯30↑'Total:',10↓,¯1↑z  ⍝ Assemble table
     }w
     2=≡w:∇¨w~¨⎕NULL                           ⍝ Nested?
     rAv:+##.∆r.x'mean(⍵)'(⍵~⎕NULL)            ⍝ If available, use R
     (+/÷≢)w~⎕NULL                             ⍝ Use fork to calculate mean
     
 }

 median←{
          ⍝⍟ Copyright (c) 2017, 2023 by Stephen Mansur
          ⍝∇ Find the middle value of a series of numbers
          ⍝∊ Summary Function; Measure of Center
          ⍝⍵ RawData  Xv   Sample or Population Data
          ⍝⍵ FreqDist Xm2  Values followed by Counts or Probabilities
          ⍝← Middle   Xs   Middle or average of two middle values
          ⍝⍕ Middle ← median RawData|FreqDist
          ⍝⍎ 3 ← median 2 8 5 1 3
          ⍝⍎ 3.5 ← median 2 8 5 1 3 4
     ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⍺←⍬ ⋄ 0∊⍴⍵:0          ⍝ Set up variables
     ⍵≡'Type':'Center'                     ⍝ Measure of center
     showMedian←{0::⍵                      ⍝ Error check
         n←≢d←⍵[⍋⍵]                        ⍝ Sort data and get sample size
         odd←2∘|                           ⍝ Odd or Even?
         c←⌈(n-1)÷2                        ⍝ Center of the data
         odd n:(c↑d)('(',d[c],')')((-c)↑d) ⍝ If odd, show middle value
         (c↑d)('|')(c↓d)                   ⍝ If even, put bar between values
     }
     confMed←{C N←⍺ ⋄ n←≢d←⍵[⍋⍵]           ⍝ Sort the data
         cp←N{⍺=0:⍵ 0.5 binomial prob≤⍳⍵+1 ⍝ Cumulative probabilities
             (⌊⍺÷2)⍵ ⍺ hyperGeometric prob≤⍳⍵+1}n
         p←{⍵,1-⍵}(1-C)÷2                  ⍝ Upper lower probs
         j←0 1+{⍵/⍳⍴⍵}2≠/≠⌿p∘.<cp          ⍝ Get conf int
         d[j⌊¯1+≢d]}
     confMed1←{C N←⍺ ⋄ p←0.5×1-C ⋄ D←⍵[⍋⍵] ⍝ Conover, Non-Parametric
         W←(≢D)wilcoxonT critVal>p         ⍝ Statistics, p. 360 (Hodges-Lehmann)
         M←sort D,0.5×+pairwise D
         M[-⍨\W,≢M]}
     0J1∊⍺:showMedian ⍵~⎕NULL              ⍝ Show operator
           ⍝  ↑⍺ between 0 1:⍺ confMed1 ⍵
     C M←2↑⎕C ⍺
     C between 0 1:C M{M='b':C 0 confMed ⍵
         M='h':C 0 confMed1 ⍵
         C M confMed ⍵}⍵
     isFrequency ⍵:{(x p)←↓⍉⍵[⍋⍵;]          ⍝ Frequency Dist?
         p∨.<0:'Domain Error'              ⍝ Validate probabilities
         b←(⊢∧.=⌊)⍵[;0]                    ⍝ If all integers
         b←b∧1∧.=¯2-/⍵[;0]                 ⍝ separated by 1 unit
         (x p)←(p≠0)∘/¨x p                 ⍝ Remove 0 frequencies
         c←(+\÷+/)p                        ⍝ Cumulative probabilities
         b:x[(c⍸0.5)+1,~0.5∊c]+.÷2         ⍝ Find position of midpoint
         i j←0 1+c⍸0.5                     ⍝ Indices of neighboring groups
         u←c[i]÷.-⍨0.5,c[j]                ⍝ Proportion within group
         x[i]+(-/x[j i])×u+0.5             ⍝ See Tobias p. 14
     }⍵
     rAv:+##.∆r.x'median(⍵)'(⍵~⎕NULL)      ⍝ Call R if available
     n←↑⍴x←{⍵[⍋⍵]}⍵~⎕NULL                  ⍝ Sort data and get sample size
     i←¯1+(n+1)÷2                          ⍝ Find median index
     x[(⌈i),⌊i]+.÷2                        ⍝ Average middle value(s)
 }

 min←{
          ⍝⍟ Copyright (c) 2018, by Stephen Mansur
          ⍝∇ Find minimum of a data set or frequency distribution
          ⍝∊ Summary Function; Measure of Position
          ⍝⍺ [X]         Xv  Sample or Population Data
          ⍝⍵ Y           Yv  Sample or Population Data
          ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
          ⍝⍵ SummaryData W   Namespace containing count, mean, sdev
          ⍝← Result      Zs  Numeric Scalar
          ⍝⍕ Result ←  min Y|Freqdist|SummaryData    ⍝ Monadic
          ⍝⍕ Result ←  X min Y        ⍝ Dyadic - smaller of two values
          ⍝⍎ 1 ← min 2 8 5 1 3
          ⍝⍎ 2 1 3 ← 5 2 3 min 2 1 4
     ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⍺←⍬         ⍝ Set system variables
     ⍺≢⍬:⍺{~⎕NULL∊⍺,⍵:⍺⌊⍵
         b←∧/⎕NULL≠yx←⍺,⍪⍵       ⍝ Dyadic version
         (⎕NULL@((~b)/⍳⍴b))b\⌊/b⌿yx}⍵ ⍝
     9=⎕NC'⍵':{n←⍵.count         ⍝ Get count
         z←normal criticalValue>n÷n+1 ⍝ Estimate z-score
         ⍵.mean-z×⍵.sdev}⍵       ⍝ Estimate min
     b←∧/⎕NULL≠⍪⍵
     m←↑⌊⌿b⌿⍵                    ⍝ Exclude nuls 1=⍴⍴⍵:m                 ⍝ If data Set, done
     1=⍴⍴⍵:m                     ⍝ If data Set, done
     ~isFrequency ⍵:'Rank Error'
     i←∪2-/{⍵[⍋⍵;0]}b⌿⍵          ⍝ Else get interval
     1<≢i:m                      ⍝ If irregular, done
     m+⌈i÷2                      ⍝ Else get lower boundary
 }

 mode←{
          ⍝⍟ Copyright (c) 2017, by Stephen Mansur
          ⍝∇ Find the most frequently occuring value(s).
          ⍝∊ Summary Function; Measure of Center
          ⍝⍺ 0 - find first mode; 1 - find all modes
          ⍝⍵ RawData  Xv   Sample or Population Data
          ⍝⍵ CharData Ca   Comma Delimited, Nested or Matrix
          ⍝⍵ FreqDist Xm2  Values followed by Counts or Probabilities
          ⍝← MostFreq Xv   Most frequently occuring value(s).
          ⍝⍕ Z ← [_0|1] mode RawData|CharData|FreqDist
          ⍝⍎ 2 ← mode 2 5 1 2 2 3      ⍝ Most frequent value
          ⍝⍎ 3 ← 0 mode 2 3 3 5 5 7    ⍝ First most frequent value
          ⍝⍎ 3 5 ← 1 mode 2 3 3 5 5 7  ⍝ Bimodal
          ⍝⍎ ⍬ ← 1 mode 1 2 3 4 5      ⍝ No mode
          ⍝⍎ 'NJ' ←  mode 'NY,NJ,NJ,PA,NJ,PA' ⍝ Character Data
          ⍝⍎ 'NJ' 'PA' ← 1   mode 'NY,NJ,NJ,PA,NJ,PA,PA' ⍝ Bimodal character data
     ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⍺←0          ⍝ Set up variables
     9=⎕NC'⍵':⎕SIGNAL 11          ⍝ Don't permit namespace input
         ⍝b←(2=⍴⍴w)∧(2=≡w)∨0≡↑0⍴w←1/⍵  ⍝ Frequency?
        ⍝ b←2∧.=(⍴,↑∘⌽)⍴w←1/⍵          ⍝ Frequency?
     b←isFrequency⊢w←1/⍵
     b←↑b∧(2=≡w)∨0≡↑0⍴w           ⍝
     b:⍺{(x p)←↓⍉⍵                ⍝ If frequency
         ⍺=0:↑x[p⍳⌈/p]            ⍝ Pick first mode
         z←(p=m←⌈/p)/x            ⍝ Get all modes
         ((,1)≢∪m,⌊/p~0)/z}⍵      ⍝ No mode if all unique
     w←toNestedVector ⍵           ⍝ If character, make nested vector
     d←{i,⍪+⌿⍵∘.=i←∪⍵}⍳⍨w         ⍝ Count occurrences
     j←d[{(⍵=⌈/⍵)/⍳≢⍵}d[;1];0]    ⍝ Select greatest frequencies
     ⍺=0:↑w[j]                    ⍝ First mode
     (1<⌈/d[;1])/w[j]             ⍝ All modes
 }

 mse←{
      ⍝⍟ Copyright 2025 by Stephen M. Mansour
      ⍝∇ Estimated mean square error
      ⍝∊ Summary Function (Dyadic); Measure of Association
      ⍝⍺ Y            Yv  Raw Data
      ⍝⍵ X            Xv  Raw Data
      ⍝← Sigma2       Zs  Mean Square Error
      ⍝⍕ Zs ← Yv mse Xv
      ⍝⍎ Beta1←Y mse X       ⍝ Produces Sample Slope
      ⍝⍎ [.95] mse confInt Y X
      ⍝⍕
     ⎕ML←1 ⋄ ⎕IO←0 ⋄ ⍺←0           ⍝ Initialize
     ⍵≡'Type':'Association'        ⍝ Measure of center
     C Y X←⍺{1<≢⍺:0 ⍺ ⍵            ⍝ Confidence Level
         ⍺,2 toNestedVector ⍵}⍵    ⍝ Dependent and Independent Vars
     M←1⊃Y⌹1,⍪X                    ⍝ Calculate Slope
     S←{(⍺+.×⍵)-(+/⍺)×(+/⍵)÷≢⍵}    ⍝ Calculate Sxx Sxy Syy
     DF←¯2+≢X                      ⍝ Degrees of Freedom
     MSE←((S⍨Y)-M×X S Y)÷DF        ⍝ Mean Square Error
     C=0:MSE                       ⍝ If conf=0, done
     P←0.5×1(+,-)C                 ⍝ Cumulative probs
     X2←DF chiSquare critVal>P     ⍝ ChiSquare distribution
     MSE×DF÷X2                     ⍝ Confidence Interval
 }

 pctRank←{
      ⍝⍟ Copyright (c) 2018, by Stephen Mansour
      ⍝∇ Inverse of percentile
      ⍝∊ Summary Function; Measure of Position
      ⍝⍺ Value    Xv  ValueQuartile (0=min,2=Median,4=max),omitted = All quartiles
      ⍝⍵ RawData  Xv  Sample or Population Data
      ⍝⍵ FreqDist Xm2 Values followed by Counts or Probabilities
      ⍝← Percent  Iv  Numeric Scalar
      ⍝⍕ Percent ← Value pctRank RawData|FreqDist
      ⍝⍎ 30 ← 44.5 pctRank 32 58 57 96 76 87 21 58 90 5  ⍝ First Quartile (Q1)
      ⍝⍎ 30 70 ← 44.5 81.5 pctRank 32 58 57 96 76 87 21 58 90 5  ⍝ Third Quartile (Q3)
      ⍝ #.R_Available:+#.∆r.x'quantile(⍵,⍵)'⍵(⍺÷100)
        ⍝ Written by Steve Mansour
     ⎕IO←0 ⋄ ⎕ML←3 ⋄ ⎕DIV←1
     isFrequency ⍵:⍺{(x p)←↓⍉⍵  ⍝ Values, probs
         x,←(↑⌽x)--/¯2↑x        ⍝ Include upper bound
         x+←0.5×-/2↑x           ⍝ Midpoints to lower bounds
         p←(0,p)÷+/p            ⍝ probabilities
         a←(⌈/x)⌊(⌊/x)⌈⍺        ⍝ Bound values
         j←¯1++/a∘.≥x            ⍝ Find poisitions
         c←+\p
         r←(a-x[j])÷-/(x,⌈/x)[j∘.+1 0]
         ⌊0.5+100×c[j]+r×p[j]
     }⍵
     1<⍴⍴⍵:'Rank Error'
     r←(+/⍺∘.≥⍵)              ⍝ Rank
     ⌊0.5+100×r÷≢⍵}

 percent←{⎕ML←3 ⋄ ⎕IO←0
     z←⍵
     z[;1]←{1=+/⍵:100×⍵           ⍝ Relative frequency
         1=(↑⌽⍵)∧s←⍵≡⍵[⍋⍵]:100×⍵  ⍝ Relative Cumulative frequency
         s:100×⍵÷↑⌽⍵              ⍝ Cumulative freuqnecy
         100×⍵÷+/⍵}z[;1]          ⍝ Frequency
     z}

 percentile←{
      ⍝⍟ Copyright (c) 2018, by Stephen Mansour
      ⍝∇ Find the Nth Percentile of a DataSet
      ⍝∊ Summary Function; Measure of Position
      ⍝⍺ N        Iv  Percentile (0<N<100)
      ⍝⍵ RawData  Xv  Sample or Population Data
      ⍝⍵ FreqDist Xm2 Values followed by Counts or Probabilities
      ⍝← Value    Zs  Numeric Scalar
      ⍝⍕ Result ← N percentile RawData|FreqDist
      ⍝⍎ 44.5 ← 30 percentile 32 58 57 96 76 87 21 58 90 5
      ⍝⍎ 44.5 81.5 ← 30 70 percentile 32 58 57 96 76 87 21 58 90 5
     ⍵≡'Type':'Position'             ⍝ Measure of position
     ∨/~⍺ between 0 100:⎕SIGNAL 11   ⍝ Domain error
     P←⍺÷100 ⋄ ⎕ML←3 ⋄ ⎕IO←0
      ⍝ #.R_Available:+#.∆r.x'quantile(⍵,⍵)'⍵ P
     ptileconf←{C P←⍺ ⋄ N←count ⍵                   ⍝ Conover, Non-Parametric Statistics
         I←C{N Q←⍵                                   ⍝ p. 143 ConfInt for a Quartile
             N≤20:¯1+N Q binomial critVal outside ⍺  ⍝ Small Samples
             Z←normal critVal<0.5×1-⍺                ⍝ Large Samples
             ⌊(N×Q)(-,+)Z×sqrt N×Q×1-Q
         }N,P÷100
         9=⎕NC'⍵':{b←(⊂'events')∊variables ⍵
             b:I≥⍵.Events
             ⍵.(mean sdev)normal critVal≥I÷N}⍵
         2=⍴⍴⍵:⍵[1+(+\⍵[;1])⍸I;0]
         X←⍵[⍋⍵] ⋄ X[0⌈I⌊¯1+⍴X]}                     ⍝ Sort and find bounds at position
     1>↑⍺:⍺ ptileconf ⍵
     9=⎕NC'⍵':P{b←(⊂'events')∊variables ⍵
         b:⍺≥⍵.(events÷count)
         ⍵.(mean sdev)normal critVal>⍺}⍵
     isFrequency ⍵:P{(x p)←↓⍉⍵[⍋⍵;]                  ⍝ Frequency Dist?
         p∨.<0:'Domain Error'
         i←+/⍺∘.≥c←(+\p)÷+/p                         ⍝ Cum probs
         x[i⌊¯1+≢x]}⍵
     n←≢x←⍵[⍋⍵]                                      ⍝ Sort, get shape
     (n×P){i←⌊⍺ ⋄ i≠⍺:⍵[i]                           ⍝ If not integer, Round up
         0.5+.×⍵[i-0 1]}¨⊂x                          ⍝ Else average
 }

 percentileRank←{
       ⍝ Written by Steve Mansour
     ⎕IO←0 ⋄ ⎕ML←3 ⋄ ⎕DIV←1
     isFrequency ⍵:⍺{(x p)←↓⍉⍵  ⍝ Values, probs
         x,←(↑⌽x)--/¯2↑x        ⍝ Include upper bound
         x+←0.5×-/2↑x           ⍝ Midpoints to lower bounds
         p←(0,p)÷+/p            ⍝ probabilities
         a←(⌈/x)⌊(⌊/x)⌈⍺        ⍝ Bound values
         j←¯1++/a∘.≥x            ⍝ Find poisitions
         c←+\p
         r←(a-x[j])÷-/(x,⌈/x)[j∘.+1 0]
         ⌊0.5+100×c[j]+r×p[j]
     }⍵
       ⍝  r←(+/⍺∘.>⍵)+0.5×+/⍺∘.=⍵  ⍝ Rank
     1<⍴⍴⍵:'Rank Error'
     r←(+/⍺∘.≥⍵)              ⍝ Rank
     ⌊0.5+100×r÷≢⍵}

 product←{ ⍝⍟ Copyright (c) 2018, by Stephen Mansur
          ⍝∇ Calculate sample product of a data set or frequency distribution
          ⍝∊ Summary Function; Measure of Quantity
          ⍝⍵ DataSet Xv Sample of Population Data
          ⍝⍵ FrequencyDistribution Nm  2-column matrix: Values Counts
          ⍝⍵ SummaryData W Namespace containing count, mean, sdev
          ⍝← Product Xs   Numeric Scalar
          ⍝⍕ Ys ← product Xv|Xm|W
          ⍝⍎ 240 ← product 2 8 5 1 3
     11::(×/×⍵)×⌊/⍳0         ⍝ Represent infinity if domain error
     9=⎕NC'⍵':(mean*count)   ⍝ Use Geometric mean
     w←(∧/⎕NULL≠⍪⍵)⌿⍵
     isFrequency w:×.*/↓⍉w   ⍝ Frequency Distribution
     ×/w                     ⍝ Raw Data
 }

 proportion←{
          ⍝⍟ Copyright 2018, 2025 by Stephen M. Mansour
          ⍝∇ Sample proportion
          ⍝∊ Summary function
          ⍝⍺_ Confidence Level, (Optional, 0 = point estimate)
          ⍝⍺_ PopulationSize Is (Optional, 0 = infinite)
          ⍝⍺_ Method  (Optional, Clopper Pearson,
          ⍝⍵ BooleanValues  Bv True = 1, False = 0
          ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
          ⍝⍵ SummaryData W   Namespace containing count, events
          ⍝← ProportionTrue Ps Proportion 0≤P≤1
          ⍝⍕ ProportionTrue ← [PopulationSize] proportion BooleanValues
          ⍝⍎ .4 ← proportion 1 0 1 0 0
          ⍝⍎ proportion STATE eq 'PA'
          ⍝⍎ proportion Height > 72
     ⎕IO←0 ⋄ ⎕ML←3 ⋄ ⍺←0
     ⍵≡'Type':'Center'                          ⍝ Measure of center
     ptEst←{twoSample ⍵:-/∇¨⍵                   ⍝ Difference of proportions
         9=⎕NC'⍵':{1≠×/×⍵-0 1:⍵ ⋄ ¯1}⍵.mean     ⍝ Get mean if summary
         2=⍴⍴⍵:{b←∧/(⍵[;0]∊0 1)                 ⍝ 1st col boolean?
             b←b∧(⍵[;1]≡⌊⍵[;1])∧⍵[;1]∧.≥0       ⍝ 2nd col pos integer?
             b:(+/×/⍵)÷+/⍵[;1] ⋄ ¯1}⍵           ⍝ Frequency dist
         ~∧/⍵∊0 1:¯1 ⋄ (+/⍵)÷↑⍴⍵}               ⍝ Boolean vector
     ⍺≡0:ptEst ⍵                                ⍝ Point estimate
          ⍝C M←{2=≢A←{⍵[⍋⍵]},⎕C↑¨⍵:A                  ⍝ Confidence Level
     ⍝         (¯1*645≠⎕DR A)↓0.95,A,0}⍺              ⍝   and Method
     C M←⎕C ⍺
     twoSample ⍵:C{n p←↓⍉⊃(count,mean)¨⍵        ⍝ Two-sample confint
         Z←normal critVal<0.5×1-⍺
         (-/p)(-,+)Z×sqrt+/p×(1-p)÷n}⍵
     n x←×\(count,mean)⍵                        ⍝ Sample Size, Events   bin
     f←{Z←normal critVal<0.5×1-C
         p←⍵÷⍺ ⋄ p(-,+)Z×sqrt p×(1-p)÷⍺}        ⍝ Normal approximation
     M='b':C clopperPearson n x                 ⍝ Uses beta distribution
     M='e':C confPropExact n x                  ⍝ Uses binomial distribution
     M='w':C 0 wilsonScoreInt n x               ⍝ Wilson score int without CC
     M='c':C 1 wilsonScoreInt n x               ⍝   "       "   "    With  CC
          ⍝M='n':n{Z←normal critVal<0.5×1-C
     ⍝         p←⍵÷⍺ ⋄ p(-,+)Z×sqrt p×(1-p)÷⍺}x       ⍝ Normal approximation
     M='n':n f x
     M≠0:{PP←⌈\(⍳M)n M hyperGeometric prob>⍵    ⍝ Finite Population
         M÷⍨PP⍸0.5×1(-,+)C}x                    ⍝ p. 59 Thompson, Sampling
     n f x                                      ⍝ Default = normal approx
          ⍝n>30:C 1 wilsonScoreInt n x                ⍝ Default method
     ⍝     C confPropExact n x
 }

 quartile←{
          ⍝⍟ Copyright (c) 2018, by Stephen Mansur
          ⍝∇ Find the Nth Quartile of a data set
          ⍝∊ Summary Function; Measure of Position
          ⍝⍺ [N]         Iv  Quartile (0=min,2=Median,4=max),omitted = All quartiles
          ⍝⍵ RawData     Xv  Sample or Population Data
          ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
          ⍝← Result      Zs  Numeric Scalar
          ⍝⍕ Result ←  [0|1|2|3|4] quartile RawData|FreqDist
          ⍝⍎ 32 ← 1 quartile 32 58 57 96 76 87 21 58 90 5  ⍝ First Quartile (Q1)
          ⍝⍎ 87 ← 3 quartile 32 58 57 96 76 87 21 58 90 5  ⍝ Third Quartile (Q3)
          ⍝⍎ 5 32 58 87 96 ← quartile 32 58 57 96 76 87 21 58 90 5 ⍝ All quartiles
     ⎕ML←3 ⋄ ⎕IO←0
     ⍵≡'Type':'Position'             ⍝ Measure of position
     ⍺←⍳5
         ⍝ 1>↑⍺:(↑⍺)(25×1↓⍺)percentile confInt ⍵
     (↑⍺)between 0 1:(↑⍺)(25×1↓⍺)percentile confInt ⍵
     9=⎕NC'⍵':(25×⍺)percentile ⍵
     
     showQuartile←{
             ⍝ showMedian¨showMedian ⍵
         l c r←showMedian ⍵
         (showMedian l)c(showMedian r)
     }
     showMedian←{
         0::⍵
         n←≢d←⍵[⍋⍵]   ⍝ Sample Size
         odd←2∘|      ⍝ Odd or Even?
         c←⌈(n-1)÷2   ⍝ Center of the data
         odd n:(c↑d)('(',d[c],')')((-c)↑d)
         (c↑d)('|')(c↓d)
     }
     0J1∊⍺:showQuartile ⍵~⎕NULL
     1=≡⍺:⍺ ∇¨⊂⍵
     ⎕NULL∊⍵:⍺∘quartile tolerant ⍵
     ⍺=0:↑⌊⌿⍵
     ⍺=4:↑⌈⌿⍵
     m←median ⍵
     ⍺=2:m
     2=⍴⍴⍵:(25×⍺)percentile ⍵
     n←≢x←⍵[⍋⍵]
     ⍺=1:median(⌊n÷2)↑x
     ⍺=3:median(⌈n÷2)↓x
     ⎕SIGNAL 11
 }

 range←{
          ⍝⍟ Copyright (c) 2024 by Stephen Mansur
          ⍝∇ Calculate range of a data set or frequency distribution
          ⍝∊ Summary Function; Measure of Spread
          ⍝⍵ RawData     Xv  Sample or Population Data
          ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
          ⍝⍵ SummaryData W   Namespace containing count, mean, sdev
          ⍝← Diff Ts  Difference between largest and smallest value
          ⍝⍕ Diff ← range RawData|FreqDist|SummaryData
          ⍝⍎ 7 ← range 2 8 5 1 3                        ⍝ Raw Data
          ⍝⍎ 3 ← range 4 2 matrix 0 20 1 30 2 40 3 10   ⍝ Frequency Distribution
     9=⎕NC'⍵':∇{2=⍵.⎕NC'events':~⍵.(events∊0,count)⍝ If bernoulli, 1 or 0
         ⍺⍺ ⍵.(mean sdev)normal randVar ⍵.count}⍵  ⍝ Else assume normal
     (1<⍴⍴⍵)∧~isFrequency ⍵:'Rank Error'           ⍝ If not frequency
          ⍝(⌈/-⌊/)(⍪⍵)[;⎕IO]~⎕NULL}                     ⍝
     (⌈/-⌊/){(⍵∧.≠⎕NULL,0)/⊣/⍵}(1,⍨⍪⍵)[;⍳2]        ⍝ Remove nulls, quan = 0
 }

 rankSum←{
     ⍝⍺  Sample 1
     ⍝⍵  Sample 2   or  Boolean order
     ⍺←⍬ ⋄ ⎕ML←3 ⋄ ⎕IO←0
     B←{11=⎕DR ⍵:⍵ ⋄ ⍵=↑⍵}⍵              ⍝ If not Boolean, use first item as 1, everything else as 0
     U←⍺{⍬≡⍺:(+/B/1+⍳≢B)-×/0.5,0 1++/B←⍵ ⍝
         +/,(⍺∘.>⍵)+0.5×⍺∘.=⍵}⍵          ⍝ calculate ranks
     MN←⍺{⍬≡⍺:(+/⍵)×+/~⍵ ⋄ ⍺×⍥≢⍵}⍵       ⍝
     U
     ⌊/|-\U,MN
 }

 runs←{
     ⍝⍟ (c) 2021 by Stephen M. Mansour
     ⍝∇ Calculate the number of runs in a data set
     ⍝⍵ Data Set
     ⍝← Runs, M= Category 1,N=Cateogry 2
     ⍝⍎ 4 ← runs 1 1 0 1 1 1 0 0
     ⎕IO←0 ⋄ ⎕ML←3
     X←toNestedVector ⍵         ⍝ If character or exactly two categories
     B←{(0≠↑↑0⍴⍵)∨2=≢∪⍵:⍵∊1↑⍵   ⍝ Make boolean
         M←median ⍵             ⍝ Else group by above or below median
         M<⍵~M}X
     1≥≢B:2 1/(≢B)0             ⍝ If zero or one element, exit
     (1++/2≠/B)(+/B)(+/~B)      ⍝ Count the number of changes
 }

 sdev←{
      ⍝⍟ Copyright 2017 by Stephen M. Mansour
      ⍝∇ Standard Deviation
      ⍝∊ Summary Function; Measure of Spread
      ⍝⍺_ 0j1 = Show calculations
      ⍝⍺ [Population]  Bs 1=population, 0=sample, Omitted: Determine from data
      ⍝⍵ DataSet     Xv  Sample or Population Data
      ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
      ⍝⍵ SummaryData W   Namespace containing count, mean, sdev
      ⍝← Result      Ss  Variance or unalikeability if character
      ⍝⍕ Result ← [0|1|_¯1] sdev DataSet|FreqDist|SummaryData
      ⍝⍎ 2.775 ← sdev 2 8 5 1 3      ⍝ Raw Data
      ⍝⍎ 0.92113 ← sdev 4 2 matrix 0 20 1 30 2 40 3 10 ⍝ Frequency Distribution
      ⍝⍎ 0.91652 ← sdev 4 2 matrix 0 0.2 1 0.3 2 0.4 3 0.1 ⍝ Relative Frequency
     ⍺←¯1 ⋄ ⎕IO←0 ⋄ ⎕ML←1
     9=⎕NC'⍵':⍵.sdev×sqrt 1-(⍺=1)÷⍵.count
     ⍵≡'Type':'Spread'
     ⊃⍺ between 0 1:sqrt ⍺ confVar ⍵
     rAv:⍺{z←##.∆r.x'sd(⍵)'(⍵~⎕NULL)
         z≡⎕NULL:0
         ⍺=1:z×sqrt÷/¯1 0+≢⍵ ⋄ z}⍵
          ⍝(⍺ var ⍵)*÷2}
     ⎕NULL≡z←⍺ var ⍵:z
     z*÷2}

 skewness←{
      ⍝⍟ Copyright 2017, 2024 by Stephen Mansour
      ⍝∇ Sample Skewness https://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm
      ⍝∊ Summary Function; Measure of Shape
      ⍝⍺ [Population]  Bs 1=population, 0=sample, Omitted: Determine from data
      ⍝⍵ RawData     Xv  Sample or Population Data
      ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
      ⍝⍵ SummaryData W   Namespace containing count, mean, sdev
      ⍝← Z           Zs  Z>0 Skewed right; Z=0 Symmetric; Z<0 Skewed Left
      ⍝⍕ Zs ← skewness Rawdata|FreqDist
      ⍝⍎ 0.35954 ← skewness  3 4 5 2 3 4 5 6 4 7   ⍝ Sample
      ⍝⍎ 0.30319 ← 1 skewness  3 4 5 2 3 4 5 6 4 7 ⍝ Population
     ⎕DIV←1 ⋄ ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⍺←¯1 ⍝ Set system variables
          ⍝ rAv:+##.∆r.x'skewness(⍵)'⍵  ⍝ If requested call R
     n m s←(count,mean,1∘sdev)⍵    ⍝ Get count, mean, sdev of data
     9=⎕NC'⍵':'Nonce Error'
         ⍝ x←{isFrequency ⍵:∊(//⌽⍵) ⋄ ⍵}⍵
     isFrequency ⍵:⍺{U←+/⍵[;1]×3*⍨⍵[;0]-m
         1∨.=⍺,+/⍵[;1]:U÷(+/⍵[;1])×s*3  ⍝ Population
         (U÷n)×(sqrt n×n-1)÷(n-2)×s*3}⍵ ⍝ Sample                        ⍝ Sample
         ⍝ z←+/(s÷⍨⍵-m)*3
     3>n←≢⍵:'Sample Size Too Small' ⍝ If Sample size < 3, get out
     z←-n÷⍨+/(s÷⍨m-⍵~⎕NULL)*3
     ⍺=1:z
     z×(sqrt n×n-1)÷n-2
 }

 slope←{
      ⍝⍟ Copyright 2025 by Stephen M. Mansour
      ⍝∇ Estimated slope using least squares
      ⍝∊ Summary Function (Dyadic); Measure of Association
      ⍝⍺ Y            Yv  Raw Data
      ⍝⍵ X            Xv  Raw Data
      ⍝← Beta1        Zs  Slope (Beta1)
      ⍝⍕ Zs ← Yv slope Xv
      ⍝⍎ Beta1←Y slope X       ⍝ Produces Sample Slope
      ⍝⍎ [.95] slope confInt Y X
      ⍝⍕
     ⎕ML←1 ⋄ ⎕IO←0 ⋄ ⍺←0           ⍝ Initialize
     ⍵≡'Type':'Association'        ⍝ Measure of center
         ⍝ C Y X←⍺{1<≢⍺:0 ⍺ ⍵            ⍝ Confidence Level
     ⍝         ⍺,2 toNestedVector ⍵}⍵    ⍝ Dependent and Independent Vars
     C Y X←⍺{⍺=⍥≢⊃⊆⍵:0 ⍺ ⍵          ⍝ Confidence Level
         (⊂⍺),2 toNestedVector ⍵}⍵ ⍝ Dependent and Independent Vars
     M←1⊃Y⌹1,⍪X                    ⍝ Calculate Slope
     C≡0:M                         ⍝ If conf=0, done
     P←0.5×1-C                     ⍝ Upper tail probability
     DF←¯2+≢X                      ⍝ Degrees of Freedom
     S←{(⍺+.×⍵)-(+/⍺)×(+/⍵)÷≢⍵}    ⍝ Calculate Sxx Sxy Syy
     MSE←((S⍨Y)-M×X S Y)÷DF        ⍝ Mean Square Error
     T←DF tDist critVal<P          ⍝ Student T distribution
         ⍝M(-,+)T×sqrt MSE÷S⍨X          ⍝ Confidence Interval
     SE←sqrt MSE÷S⍨X               ⍝ Standard Error
     M+(T×SE)∘.×¯1 1               ⍝ Confidence intervals
     
 }

 standardDeviation←{⍺←¯1
     ⍵≡'Type':'Spread'
     rAv:⍺{z←##.∆r.x'sd(⍵)'⍵
         ⍺=1:z×sqrt÷/¯1 0+≢⍵ ⋄ z}⍵
     (⍺ var ⍵)*÷2}

 stats←{
          ⍝⍟ Copyright (c) 2018, by Stephen Mansour
          ⍝∇ Create summary stats object (count,mean,sdev)
          ⍝∊ Summary Functions
          ⍝⍺ Name Cv
          ⍝⍵ Vector2 Nv    Numeric 2- vector:   Count, Events|Prob   (Events if integer, prob if between 0 and 1)
          ⍝⍵ Vector3 Nv    count, mean, sdev
          ⍝⍵ VectorN Nv    Data set
          ⍝⍵ Namespace W   Stats object
          ⍝← Stats object
          ⍝⍕ NS ← ['Name'] stats  Count Events|(Mean Sdev)
          ⍝⍎ stats 1000 646        ⍝ N=1000, Sucesses=626
          ⍝⍎ stats 1000 0.4        ⍝ N=1000, P=0.4
          ⍝⍎ stats 30 10.5 2.7     ⍝ N=30, mean=10.5 sdev=2.7
          ⍝⍎ 'Height' stats 30 68 3.2
          ⍝⍎ stats Height          ⍝ Raw Data
     ⎕IO←0 ⋄ ⎕ML←3 ⋄ ⍺←'' ⋄ ⎕PP←5                ⍝ Set system variables
         ⍝ 326≡⎕DR ⍵:∇{∆←⍺⍺ 1↓⍵ ⋄ ∆.Name←↑⍵ ⋄ ∆}⍵ ⍝ Insert name if 1st item Char
     b←0=≢⍺
     name←b⊃⍺'Namespace'
     3=≢⍵:name{count mean sdev←3↑⍵                  ⍝ If 3-vector
         var←sdev*2 ⋄ name←⍺                     ⍝ Calculate variance
         NS←⎕NS b↓'name' 'count' 'mean' 'sdev' 'var'
         DF←⍺,': [n=',(⍕count),'; xbar=',(⍕mean),'; s=',(⍕sdev),']'
         _←NS.⎕DF DF ⋄ NS}⍵   ⍝ Include count,mean,sdev
     2=≢⍵:name{count←↑⍵                             ⍝ If 2-vector
         mean←↑{¯1=×/×⍵-0 1:⍵                    ⍝ Assume binomial
                  ⍝(⍵>0)∧(⍵=⌈⍵)∧⍵≤⍺:⍵÷⍺ ⋄ ¯1}/⍵       ⍝ Calc mean
             (⍵≥0)∧(⍵=⌈⍵)∧⍵≤⍺:⍵÷⍺ ⋄ ¯1}/⍵        ⍝ Calc mean
         var←{⍵×1-⍵}mean ⋄ name←⍺                ⍝ Var = p(1-p)
         events←⌊0.5+count×mean                  ⍝ Successes
         sdev←var*÷2                             ⍝ sdev= sqrt var
         NS←⎕NS b↓'name' 'count' 'events' 'mean' 'sdev' 'var'
         DF←⍺,': [n=',(⍕count),'; x=',(⍕events),']'
         _←NS.⎕DF DF ⋄ NS}⍵                    ⍝
     6=≢⍵:⍺{'Nonce Error'}⍵
     9=⎕NC'⍵':⍵                                  ⍝ If ns leave alone
     count mean var sdev←(count,mean,var,sdev)⍵  ⍝ Put variables
         ⍝ name←⍺
     NS←⎕NS b↓'name' 'count' 'mean' 'sdev' 'var' ⍝   into namespace
     DF←name,': [n=',(⍕count),'; xbar=',(⍕mean),'; s=',(⍕sdev),']'
     _←NS.⎕DF DF ⋄ ~∧/⍵∊0 1:NS ⋄
     NS.events←+/⍵ ⋄ DF←name,': [n=',(⍕count),'; x=',(⍕NS.events),']'
     _←NS.⎕DF DF ⋄ NS                            ⍝ Include events if binary
 }

 stdErr←{⍝11≠⎕DR ⍵:(sdev div sqrt∘count)⍵
     11≠⎕DR ⍵:(sdev÷{⍵*÷2}∘≢)⍵
     sqrt({⍵×1-⍵}∘proportion÷≢)⍵
 }

 sum←{
        ⍝⍟ Copyright (c) 2018, by Stephen Mansur
          ⍝∇ Calculate sample sum of a data set or frequency distribution
          ⍝∊ Summary Function; Measure of Quantity
          ⍝⍺_ Confidence Level
          ⍝⍵ DataSet Xv Sample of Population Data
          ⍝⍵ FrequencyDistribution Nm  2-column matrix: Values Counts
          ⍝⍵ SummaryData W Namespace containing count, mean, sdev
          ⍝← Total Xs Sum of data in right argument
          ⍝⍕ Ys ← sum Xv|Xm|W
          ⍝⍎ 19 ← sum 2 8 5 1 3
     ⎕ML←1 ⋄ ⎕IO←0 ⋄ ⍺←0
     ⍵≡'Type':'Quantity'           ⍝ Measure of quantity
     ⊃⍺ between 0 1:⍺ confTotal ⍵  ⍝ 1st Left arg∊(0,1) confint
         ⍝⍺←count ⍵
     C N←¯2↑⍺                      ⍝ C = 0, then point est
     9=⎕NC'⍵':N×⍵.mean             ⍝ Namespace
     w←(∧/⎕NULL≠⍪⍵)⌿⍵              ⍝ Any nulls?
     M←count w                     ⍝ Sample Size
     N←N+M×N=0                     ⍝ If N=0, replace with sample size
     1≥⍴⍴w:(N÷M)×+⌿w               ⍝ Raw Data
     isFrequency w:(N÷M)×+.×/↓⍉w   ⍝ Frequency Distribution
     'Rank Error'                  ⍝ Error Message
 }

 sumProduct←{
      ⍝⍟ Copyright (c) 2018, by Stephen Mansur
          ⍝∇ Calculate vector product
          ⍝∊ Summary Function; Measure of Quantity
          ⍝⍺ Quantity Xv List of Quantities
          ⍝⍵ Price    Yv Corresponding Prices
          ⍝← TotalCost Zv Numeric Scalar
          ⍝⍕ Ys ← Quantity sumProduct Price
          ⍝⍎ 16.5 ← 2 5 3 sumProduct 1 2 1.5
     
     ⍺+.×⍵}

 sumSquares←{
          ⍝⍟ Copyright (c) 2018, by Stephen Mansur
          ⍝∇ Calculate sample sum of squares of a data set or frequency distribution
          ⍝∊ Summary Function; Measure of Quantity
          ⍝⍵ DataSet Xv Sample of Population Data
          ⍝⍵ FrequencyDistribution Nm  2-column matrix: Values Counts
          ⍝⍵ SummaryData W Namespace containing count, mean, sdev
          ⍝← Numeric Scalar
          ⍝⍕ Ys ← sum Xv|Xm|W
          ⍝⍎ 103 ← sumSquares 2 8 5 1 3
     ⎕IO←0                             ⍝ Set system variables
        ⍝ 9=⎕NC'⍵':⍵.(((count-1)×sdev*2)+count×mean*2) ⍝ Namespace
     9=⎕NC'⍵':{ss←⍵.(count-1)×⍵.sdev*2 ⍝ Namespace
         ss+⍵.(count×mean*2)}⍵         ⍝
     2=⍴⍴⍵:⍵[;1]+.×⍵[;0]*2             ⍝ Frequency Distribution
     ⍵+.*2                             ⍝ Raw data
 }

 var←{
           ⍝⍟ Copyright 2017 by Stephen M. Mansour
           ⍝∇ Variance or Unalikeability
           ⍝∊ Summary Function; Measure of Spread
           ⍝⍺_ 0j1 = Show calculations
           ⍝⍺ [Population]  Bs 1=population, 0=sample, Omitted: Determine from data
           ⍝⍵ RawData     Xv  Sample or Population Data
           ⍝⍵ Categorical Ca  Categorical Data
           ⍝⍵ FreqDist    Xm2 Values followed by Counts or Probabilities
           ⍝⍵ SummaryData W   Namespace containing count, mean, sdev
           ⍝← Result      Ss  Variance or unalikeability if character
           ⍝⍕ Result ← [0|1] var RawData|Categorical|FreqDist|SummaryData
           ⍝⍎ 7.7 ← var 2 8 5 1 3              ⍝ Raw Data
           ⍝⍎ 0.84848 ← var 4 2 matrix 0 20 1 30 2 40 3 10 ⍝ Frequency Distribution
           ⍝⍎ 1.84 ← var 4 2 matrix 0 0.2 1 0.3 2 0.4 3 0.1 ⍝ Relative Frequency
           ⍝⍎ 0.59375 ← var 'NY,NY,PA,NY,PA,NJ,PA,PA'  ⍝ Unalikeability (See Kader and Perry 2007)
     ⍵≡'Type':'Spread'               ⍝ Spread function
     ⎕ML←3 ⋄ ⎕IO←0 ⋄ ⍺←¯1            ⍝ Set system variables
     (↑⍺~0J1)between 0 1:⍺ confVar ⍵ ⍝ If 0<⍺<1:Conf Int
     9=⎕NC'⍵':⍵.var×1-(⍺=1)÷⍵.count  ⍝ If namespace, extract variance
     b←isFrequency⊢w←1/⍵
     b←↑b∧(2=≡w)∨0≡↑0⍴w              ⍝
     w←toNestedVector⍣(~b)⊢w         ⍝ Nest it?
     varChar←{⍺:1-+/2*⍨(⊢÷+/)⍵[;1]   ⍝ Variability for Categorial Variables
         (+/,∘.≢⍨⍵)÷(≢⍵)*2}          ⍝   (Kader and Perry 2007)
     0J1∊⍺:(⍺~0J1)showCov w          ⍝ Display form?
     0≠↑0⍴↑w:b varChar w             ⍝ Character variability
     b:↑⍺{(⍺⍺=0)∧1≥n←+/⍵:⎕NULL
         n≠1:+/((⍵+.×⍺*2)-n÷⍨2*⍨⍵+.×⍺)÷n-⍺⍺≠1 ⍝ Frequency
         (⍵+.×⍺*2)-(⍵+.×⍺)*2}/↓⍉w    ⍝ Probability
     w←w~⎕NULL
     rAv∧1=⍴⍴⍵:⍺{z←##.∆r.x'var(⍵)'⍵  ⍝ Do R calculation?
         ⍺=1:z×÷/¯1 0+≢⍵ ⋄ z}w
     (0≥⍺)∧1≥n←≢w:⎕NULL              ⍝
     (n-⍺≤0)÷⍨(w+.*2)-n÷⍨(+/w)*2     ⍝ unweighted variance
 }

 variance←{⍺←¯1 ⋄ ⍺ var ⍵}

 zScore←{
      ⍝⍟ Copyright (c) 2024, by Stephen Mansour
      ⍝∇ Find the Z-Score of a data item
      ⍝∊ Summary Function; Measure of Position
      ⍝⍺ Value    Xv
      ⍝⍵ RawData  Yv  Sample or Population Data
      ⍝⍵ FreqDist Xm2 Values followed by Counts or Probabilities
      ⍝← Z        Zv  (X - mean) ÷ sdev
      ⍝⍕ Z ← Value zScore RawData|FreqDist
      ⍝⍎ ¯0.42331 ← 45 zScore 32 58 57 96 76 87 21 58 90 5
      ⍝⍎ ¯0.42331 0.71638 ← 45 80 zScore 32 58 57 96 76 87 21 58 90 5
     ⍵≡'Type':'Position'             ⍝ Measure of position
     ⍺←⍵ ⋄ (⍺-mean ⍵)÷sdev ⍵}

 percentRank←{
⍝∇ Calculate PERCENTRANK as defined by Excel
⍝⍺ Value
⍝⍺⍺ 0=Include enpoints (.INC) , 1=Exclude endpoiints (.EXC)
⍝⍵ DataSet
⍝← 0≤P≤100 ⍝ Percent in ⍵ ≤ values in ⍺
⍝⍎ W←1 2 3 6 6 6 7 8 9
⍝⍎ 0 12.5 25 37.5 37.5 37.5 75 87.5 100←0 percentRank W
⍝⍎ 10 20 30 40 40 40 70 80 90 ← 1 percentRank W
⍝⍎ 38.1 ← 5.43 (1 percentRank) W
     ⎕IO←0 ⋄ ⎕ML←3 ⋄ ⎕DIV←1 ⍝ Set system variables
     ⍺←⍵                     ⍝ Default left arg is all values on right
     n←≢⍵                    ⍝ Get count (Sample Size)
     v←{⍵[⍋⍵]}∪⍵             ⍝ Unique, sorted values
     fr←+/v∘.=⍵              ⍝ Include frequencies
     j←v⍸⍺                   ⍝ Locate lower bounds
     bw←(¯2-/v),0            ⍝ bin widths
     r←(⍺-v[j])÷bw[j]        ⍝ remainder
     b←r>0                   ⍝
     i←r++/¨(~b),¨(j+b)↑¨⊂fr ⍝ combine position, remainder
     100×(i-~⍺⍺)÷n-1-2×⍺⍺    ⍝ One-liner for ⍺⍺=0,1
 }


:EndNamespace 
