Python statistics Module

Python's statistics module provides functions for statistical calculations without needing external libraries like NumPy or pandas. Perfect for quick analysis and smaller datasets.

Averages

Mean (Arithmetic Average)

from statistics import mean
 
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
print(mean(data))  # 5.5

Median (Middle Value)

from statistics import median, median_low, median_high
 
odd = [1, 3, 5, 7, 9]
even = [1, 2, 3, 4, 5, 6]
 
print(median(odd))       # 5
print(median(even))      # 3.5 (average of middle two)
print(median_low(even))  # 3 (lower of middle two)
print(median_high(even)) # 4 (higher of middle two)

Mode (Most Common)

from statistics import mode, multimode
 
data = [1, 2, 2, 3, 3, 3, 4]
print(mode(data))  # 3
 
# Multiple modes
bimodal = [1, 1, 2, 2, 3]
print(multimode(bimodal))  # [1, 2]

Geometric Mean

For multiplicative relationships (growth rates, ratios):

from statistics import geometric_mean
 
returns = [1.1, 1.2, 0.9, 1.15]  # 10%, 20%, -10%, 15%
avg_return = geometric_mean(returns)
print(f"Average return factor: {avg_return:.4f}")

Harmonic Mean

For rates and ratios:

from statistics import harmonic_mean
 
speeds = [60, 40]  # mph for equal distances
avg_speed = harmonic_mean(speeds)
print(f"Average speed: {avg_speed:.1f} mph")  # 48.0

Measures of Spread

Variance

from statistics import variance, pvariance
 
data = [2, 4, 4, 4, 5, 5, 7, 9]
 
# Sample variance (n-1)
print(variance(data))   # 4.571...
 
# Population variance (n)
print(pvariance(data))  # 4.0

Standard Deviation

from statistics import stdev, pstdev
 
data = [2, 4, 4, 4, 5, 5, 7, 9]
 
# Sample standard deviation
print(stdev(data))   # 2.138...
 
# Population standard deviation
print(pstdev(data))  # 2.0

Quantiles

from statistics import quantiles
 
data = list(range(1, 101))  # 1 to 100
 
# Quartiles (default n=4)
q = quantiles(data)
print(q)  # [25.5, 50.5, 75.5]
 
# Deciles
d = quantiles(data, n=10)
print(d)  # 9 cut points
 
# Percentiles
p = quantiles(data, n=100)
print(p[49])  # 50th percentile

Covariance and Correlation

from statistics import covariance, correlation
 
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 4, 5]
 
# How variables move together
cov = covariance(x, y)
print(f"Covariance: {cov}")
 
# Correlation (-1 to 1)
corr = correlation(x, y)
print(f"Correlation: {corr:.3f}")

Linear Regression

from statistics import linear_regression
 
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 4, 5]
 
slope, intercept = linear_regression(x, y)
print(f"y = {slope:.2f}x + {intercept:.2f}")
 
# Predict
predict_x = 6
predict_y = slope * predict_x + intercept
print(f"At x=6, y={predict_y:.2f}")

NormalDist Class

Work with normal distributions:

from statistics import NormalDist
 
# Create from parameters
dist = NormalDist(mu=100, sigma=15)  # IQ distribution
 
# From data
data = [98, 102, 101, 99, 100, 103, 97]
dist = NormalDist.from_samples(data)
 
# Properties
print(f"Mean: {dist.mean}")
print(f"Stdev: {dist.stdev}")
print(f"Variance: {dist.variance}")
 
# Probability density at a point
print(f"PDF at 100: {dist.pdf(100):.4f}")
 
# Cumulative distribution
print(f"CDF at 100: {dist.cdf(100):.4f}")  # Prob of being <= 100
 
# Inverse CDF (quantile)
print(f"85th percentile: {dist.inv_cdf(0.85):.2f}")

Working with NormalDist

from statistics import NormalDist
 
iq = NormalDist(100, 15)
 
# What percentage scores above 130?
above_130 = 1 - iq.cdf(130)
print(f"Above 130: {above_130:.2%}")
 
# What score is the top 1%?
top_1_percent = iq.inv_cdf(0.99)
print(f"Top 1%: {top_1_percent:.1f}")
 
# Overlap between two distributions
test_a = NormalDist(100, 15)
test_b = NormalDist(105, 12)
overlap = test_a.overlap(test_b)
print(f"Distribution overlap: {overlap:.2%}")

Practical Example: Analyzing Test Scores

from statistics import (
    mean, median, stdev, quantiles, NormalDist
)
 
scores = [72, 85, 90, 78, 92, 88, 76, 95, 82, 89,
          91, 73, 87, 84, 79, 93, 81, 86, 77, 94]
 
print(f"Mean: {mean(scores):.1f}")
print(f"Median: {median(scores)}")
print(f"Std Dev: {stdev(scores):.2f}")
 
q = quantiles(scores)
print(f"Q1: {q[0]:.1f}, Q2: {q[1]:.1f}, Q3: {q[2]:.1f}")
 
# Fit normal distribution
dist = NormalDist.from_samples(scores)
 
# What's the probability of scoring above 90?
prob_above_90 = 1 - dist.cdf(90)
print(f"Probability above 90: {prob_above_90:.2%}")

Summary

The statistics module handles common statistical needs without external dependencies:

Averages: mean, median, mode, geometric_mean, harmonic_mean
Spread: variance, stdev, quantiles
Relationships: covariance, correlation, linear_regression
Distributions: NormalDist for normal distribution calculations

For larger datasets or advanced statistics, you'll want NumPy, pandas, or scipy. But for quick calculations on smaller data, the stdlib has you covered.

React to this post:

#Averages

#Mean (Arithmetic Average)

#Median (Middle Value)

#Mode (Most Common)

#Geometric Mean

#Harmonic Mean

#Measures of Spread

#Variance

#Standard Deviation

#Quantiles

#Covariance and Correlation

#Linear Regression

#NormalDist Class

#Working with NormalDist

#Practical Example: Analyzing Test Scores

#Summary

Keep Reading

Need help shipping fast?