Python's statistics module provides functions for statistical calculations without needing external libraries like NumPy or pandas. Perfect for quick analysis and smaller datasets.
Averages
Mean (Arithmetic Average)
from statistics import mean
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
print(mean(data)) # 5.5Median (Middle Value)
from statistics import median, median_low, median_high
odd = [1, 3, 5, 7, 9]
even = [1, 2, 3, 4, 5, 6]
print(median(odd)) # 5
print(median(even)) # 3.5 (average of middle two)
print(median_low(even)) # 3 (lower of middle two)
print(median_high(even)) # 4 (higher of middle two)Mode (Most Common)
from statistics import mode, multimode
data = [1, 2, 2, 3, 3, 3, 4]
print(mode(data)) # 3
# Multiple modes
bimodal = [1, 1, 2, 2, 3]
print(multimode(bimodal)) # [1, 2]Geometric Mean
For multiplicative relationships (growth rates, ratios):
from statistics import geometric_mean
returns = [1.1, 1.2, 0.9, 1.15] # 10%, 20%, -10%, 15%
avg_return = geometric_mean(returns)
print(f"Average return factor: {avg_return:.4f}")Harmonic Mean
For rates and ratios:
from statistics import harmonic_mean
speeds = [60, 40] # mph for equal distances
avg_speed = harmonic_mean(speeds)
print(f"Average speed: {avg_speed:.1f} mph") # 48.0Measures of Spread
Variance
from statistics import variance, pvariance
data = [2, 4, 4, 4, 5, 5, 7, 9]
# Sample variance (n-1)
print(variance(data)) # 4.571...
# Population variance (n)
print(pvariance(data)) # 4.0Standard Deviation
from statistics import stdev, pstdev
data = [2, 4, 4, 4, 5, 5, 7, 9]
# Sample standard deviation
print(stdev(data)) # 2.138...
# Population standard deviation
print(pstdev(data)) # 2.0Quantiles
from statistics import quantiles
data = list(range(1, 101)) # 1 to 100
# Quartiles (default n=4)
q = quantiles(data)
print(q) # [25.5, 50.5, 75.5]
# Deciles
d = quantiles(data, n=10)
print(d) # 9 cut points
# Percentiles
p = quantiles(data, n=100)
print(p[49]) # 50th percentileCovariance and Correlation
from statistics import covariance, correlation
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 4, 5]
# How variables move together
cov = covariance(x, y)
print(f"Covariance: {cov}")
# Correlation (-1 to 1)
corr = correlation(x, y)
print(f"Correlation: {corr:.3f}")Linear Regression
from statistics import linear_regression
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 4, 5]
slope, intercept = linear_regression(x, y)
print(f"y = {slope:.2f}x + {intercept:.2f}")
# Predict
predict_x = 6
predict_y = slope * predict_x + intercept
print(f"At x=6, y={predict_y:.2f}")NormalDist Class
Work with normal distributions:
from statistics import NormalDist
# Create from parameters
dist = NormalDist(mu=100, sigma=15) # IQ distribution
# From data
data = [98, 102, 101, 99, 100, 103, 97]
dist = NormalDist.from_samples(data)
# Properties
print(f"Mean: {dist.mean}")
print(f"Stdev: {dist.stdev}")
print(f"Variance: {dist.variance}")
# Probability density at a point
print(f"PDF at 100: {dist.pdf(100):.4f}")
# Cumulative distribution
print(f"CDF at 100: {dist.cdf(100):.4f}") # Prob of being <= 100
# Inverse CDF (quantile)
print(f"85th percentile: {dist.inv_cdf(0.85):.2f}")Working with NormalDist
from statistics import NormalDist
iq = NormalDist(100, 15)
# What percentage scores above 130?
above_130 = 1 - iq.cdf(130)
print(f"Above 130: {above_130:.2%}")
# What score is the top 1%?
top_1_percent = iq.inv_cdf(0.99)
print(f"Top 1%: {top_1_percent:.1f}")
# Overlap between two distributions
test_a = NormalDist(100, 15)
test_b = NormalDist(105, 12)
overlap = test_a.overlap(test_b)
print(f"Distribution overlap: {overlap:.2%}")Practical Example: Analyzing Test Scores
from statistics import (
mean, median, stdev, quantiles, NormalDist
)
scores = [72, 85, 90, 78, 92, 88, 76, 95, 82, 89,
91, 73, 87, 84, 79, 93, 81, 86, 77, 94]
print(f"Mean: {mean(scores):.1f}")
print(f"Median: {median(scores)}")
print(f"Std Dev: {stdev(scores):.2f}")
q = quantiles(scores)
print(f"Q1: {q[0]:.1f}, Q2: {q[1]:.1f}, Q3: {q[2]:.1f}")
# Fit normal distribution
dist = NormalDist.from_samples(scores)
# What's the probability of scoring above 90?
prob_above_90 = 1 - dist.cdf(90)
print(f"Probability above 90: {prob_above_90:.2%}")Summary
The statistics module handles common statistical needs without external dependencies:
- Averages:
mean,median,mode,geometric_mean,harmonic_mean - Spread:
variance,stdev,quantiles - Relationships:
covariance,correlation,linear_regression - Distributions:
NormalDistfor normal distribution calculations
For larger datasets or advanced statistics, you'll want NumPy, pandas, or scipy. But for quick calculations on smaller data, the stdlib has you covered.
React to this post: