<><><><>
import random
import math
import time
import numpy as np
import pandas as pd
# 定义MCpi1的参数:样本量,随时生成数据的种子编号,以及判断变量message
def MCpi1(n=500, seed=1, message=0):
random.seed(seed) # 固定随机生成的种子
m = 0
for i in range(0,n): # 循环开始 系数从0到n-1
# 在[0,1]中生成随机x,y
# 根据前面确定的种子,随机生成横坐标x2与纵坐标y2
x2 = random.random()
y2 = random.random()
# 如果在单位圆内,则递增(半径 sqrt(x^2 + y^2)<= 1)
if math.sqrt(x2**2 + y2**2) <= 1.0:
m += 1 # 则 m会自动+1
# inside / total = pi / 4
p_hat = float(m)/n # 定义两个变量的估计量并储存结果
pi_hat = 4*p_hat
# 输出结果
if message == 1: # 判断变量如果取值为1
print("m =",m, "; n =",n, "; pi hat =",pi_hat)
# 输出估计的相关数据 m, n, 以及估计量pi_hat的结果
return pi_hat
start = time.time() # 调用计时器
MCpi1(500, message=1)
print("Simulation takes %f seconds." % (time.time()-start))
m = 380 ; n = 500 ; pi hat = 3.04
Simulation takes 0.000323 seconds.
inaccurate estimate
generate a more accurate estimate by increasing
start = time.time()
MCpi1(10000000, message=1)
print("Simulation takes %f seconds." % (time.time()-start))
m = 7852532 ; n = 10000000 ; pi hat = 3.1410128
Simulation takes 4.385045 seconds.
def MCpi2(iter, n):
pi = np.zeros(iter) # 定义一个零变量可存储所有模拟次数产生的结果
for i in range(0,iter):
pi[i] = MCpi1(n, seed=i) # 第i次使用种子数i
return pi
pi_s = np.array([MCpi2(1000,100),
MCpi2(1000,500),
MCpi2(1000,1000),
MCpi2(1000,10000)])
dfMC = pd.DataFrame(pi_s.T,columns=['n=100', 'n=500',
'n=1000', 'n=10000'])
dfMC.describe()
for-loop that is not computationally efficient.start = time.time()
n = 10000000 # 确定样本量
dfMC = pd.DataFrame(columns=['x', 'y', 'r','Location',
'CurrentPi'])
# x,y随机在-1到1之间取值且服从UNIFORM分布
dfMC['x'] = 2*(np.random.rand(n)-0.5)
dfMC['y'] = 2*(np.random.rand(n)-0.5)
# 计算点的半径 sqrt(x^2 + y^2)
dfMC['r'] = np.sqrt(dfMC['x']**2+dfMC['y']**2)
# 判断哪些点在圆内/圆外(半径<=1)
dfMC.loc[dfMC['r']<=1,'Location']='Inside'
dfMC.loc[dfMC['r']>1,'Location']='Outside'
#计算估计参数的值pi
dfMC['CurrentPi'] = 4*(dfMC['Location'] =='Inside').cumsum()/(dfMC.index+1)
# 将计算的pi保存在NumPy数组中
pi_hat = np.array(dfMC['CurrentPi'])[-1]
print("pi hat=%f, Simulation takes %f seconds."
% (pi_hat,time.time()-start) )
pi hat=3.141376, Simulation takes 2.496126 seconds.
import matplotlib.pyplot as plt
import seaborn as sns
def MCpi3(n=500):
# 生成一组介于-1和1之间的x和y值,计算在xy平面上半径
dfMC = pd.DataFrame(columns=['x', 'y', 'r','Location',
'CurrentPi'])
# 在-1和1之间均匀生成x和y
dfMC['x'] = 2*(np.random.rand(n)-0.5)
dfMC['y'] = 2*(np.random.rand(n)-0.5)
dfMC['r'] = np.sqrt(dfMC['x']**2+dfMC['y']**2)
dfMC.loc[dfMC['r']<=1, 'Location'] = 'Inside'
dfMC.loc[dfMC['r']>1, 'Location'] = 'Outside'
dfMC['CurrentPi'] =4*(dfMC['Location'] == 'Inside').cumsum()/(dfMC.index+1)
pi_hat = np.array(dfMC['CurrentPi'])[-1]
plt.figure(figsize=(5,5))
squareX = [1,-1,-1,1,1]
squareY = [1,1,-1,-1,1]
circleX = (np.cos(np.pi*np.arange(360+1)/180)*1)
circleY = (np.sin(np.pi*np.arange(360+1)/180)*1)
plt.plot(squareX, squareY, color='gray')
plt.plot(circleX, circleY, color='k')
# 使用 seaborn 绘制散点图
sns.scatterplot(x='x', y='y', data=dfMC,
style='Location', color='k')
plt.title(r'Figure: Estimating $\pi$', fontsize=14)
plt.legend(loc="upper left", fontsize=12)
plt.xlabel(r'$x$', fontsize=12)
plt.ylabel(r'$y$', fontsize=12)
plt.show()
# 最终的估计值
print('\n' + f'Pi is approximately {pi}\n')
return dfMC
# 执行
dfgenerate = MCpi3(500)
def fairs(n=10000):
X = np.zeros(n)
U = np.random.rand(n)
X[U <= 1/3] = 1
return X.mean()
fairs(1000000)
0.333306
import itertools
def mcintgrate(integrand, sampler, measure=1.0, n=300):
total = 0.0
for x in itertools.islice(sampler, n):
f = integrand(x)
total += f
return measure*total/n
To apply this function, consider a simple example:
# 定义被积函数
def integrand(x):
return (x**2)
# 定义取样函数
def sampler():
while True:
yield random.random()
# 运行
mcintgrate(integrand, sampler(), measure=1.0, n=int(1e+6))
0.3333118169170228
from scipy.integrate import quad
quad(integrand, 0, 1)
(0.33333333333333337, 3.700743415417189e-15)
from scipy.stats import norm, multivariate_normal
from scipy.integrate import quad, nquad
# 定义被积函数(蒙特卡洛积分使用)
def integrand(x):
return multivariate_normal.pdf(np.array(x), mean=np.array([0,0,0,0]), cov=1)
# 定义被积函数(nquad使用)
def integrand_quad(x1, x2, x3, x4):
return multivariate_normal.pdf(np.array([x1,x2,x3,x4]),
mean=np.array([0,0,0,0]), cov=1)
# 定义取样方程
def sampler():
while True:
x1 = random.uniform(1,3)
x2 = random.uniform(0,3)
x3 = random.uniform(0,3)
x4 = random.uniform(0,3)
yield(x1, x2, x3, x4)
res_mc = mcintgrate(integrand, sampler(), measure=math.pow(3,3)*2, n=int(1e+6))
print("MC integration estimate=",res_mc)
res_nq = nquad(integrand_quad, [[1,3],[0,3],[0,3],[0,3]])
print("nquad estimate=",res_nq[0])
MC integration estimate= 0.01941219719883395
nquad estimate= 0.019504339426371822
# 定义原始被积函数
def integrand_quad(x):
return norm.pdf(np.array([x]), 0, 1)
# 变量变换
def transfers(t):
return t/(1-t**2)
# 权重
def weights(t):
return (1+t**2)/((1-t**2)**2)
# 定义换元后的被积函数
def integrand_mc(x):
w = weights(x)
xt = transfers(x)
return w*norm.pdf(np.array([xt]), 0, 1)
# 定义取样函数
def sampler():
while True:
x = random.uniform(-1,1)
yield(x)
print("SciPy's quad:", quad(integrand_quad, -np.inf, np.inf))
print("Monte Carlo method:",
mcintgrate(integrand_mc, sampler(), measure=2.0, n=int(1e+6)))
SciPy's quad: (0.9999999999999998, 1.0178191320905743e-08)
Monte Carlo method: [0.9995573]
a = 0.5
# 变量变换
def transfers_half(t):
return t/(1-t)
# 权重
def weights_half(t):
return 1/((1-t)**2)
# 定义换元后的被积函数
def integrand_mc(x):
w = weights_half(x)
xt = transfers_half(x)
return w*norm.pdf(np.array([a+xt]), 0, 1)
# 定义取样函数
def sampler():
while True:
x = random.uniform(0,1)
yield(x)
print("SciPy's quad:", quad(integrand_quad, a, np.inf))
print("Monte Carlo method:",
mcintgrate(integrand_mc, sampler(), measure=1.0, n=int(1e+6)))
SciPy's quad: (0.30853753872598694, 6.289465414715434e-10)
Monte Carlo method: [0.30820573]
import numpy as np
import scipy as sp
from scipy import stats
class MyMC:
def __init__(self, S, n, seed=100):
np.random.seed(seed)
self.save = np.zeros([S,3])
self.n = n
self.S = S
def MC_main(self):
for s in range(self.S):
Y = np.random.normal(loc=-1, scale=5/3, size=self.n)
def MC_main(self):
for s in range(self.S):
Y = np.random.normal(loc=-1, scale=5/3, size=self.n)
self.T1 = Y.mean()
self.T2 = sp.stats.trim_mean(Y, 0.20, axis=0)
self.T3 = np.percentile(Y, 50)
# 保存MC样本
self.save[s,:] = np.array([self.T1, self.T2, self.T3])
# MC统计量
self.meanhat = np.array([self.save]).mean(axis=1)
self.biashat = self.meanhat+1
self.SDhat = np.sqrt(np.sum(
(self.save-self.meanhat)**2,
axis=0) / (self.S-1))
self.MSEhat = self.SDhat**2 + self.biashat**2
S = 1000
n = 20
Results = MyMC(S, n, seed=2022)
Results.MC_main()
print("Means", Results.meanhat)
print("Bias", Results.biashat)
print("SDs ", Results.SDhat )
print("MSEs", Results.MSEhat )
Means [[-0.99190512 -0.99421817 -0.99450071]]
Bias [[0.00809488 0.00578183 0.00549929]]
SDs [0.37639327 0.39925436 0.44814659]
MSEs [[0.14173742 0.15943747 0.2008656 ]]
print("RE of T1 to T2=",Results.MSEhat[0,1]/Results.MSEhat[0,0])
print("RE of T1 to T3=",Results.MSEhat[0,2]/Results.MSEhat[0,0])
print("RE of T2 to T3=",Results.MSEhat[0,2]/Results.MSEhat[0,1])
RE of T1 to T2= 1.1248791713120874
RE of T1 to T3= 1.4171670639989176
RE of T2 to T3= 1.2598393677659603
Y = np.random.normal(loc=-1, scale=5/3, size=n)
T = np.array([Y.mean(),
sp.stats.trim_mean(Y,0.20,axis=0),
np.percentile(Y,50)]).reshape(3,1)
SE = np.sqrt( ( (1/(n-1)) * np.diag( np.dot((Y-T),(Y-T).T) ) )/n )
print(SE)
[0.3698925 0.37051157 0.36999013]
class MyMC:
def __init__(self, S, n, seed=2020):
np.random.seed(seed)
self.save = np.zeros([S,3])
self.n = n
self.S = S
self.SE = np.zeros([S,3])
def MC_main(self):
for s in range(self.S):
Y = np.random.normal(loc=-1, scale=5/3, size=self.n)
# 三个估计量
self.T1 = Y.mean()
self.T2 = sp.stats.trim_mean(Y, 0.20, axis=0)
self.T3 = np.percentile(Y, 50)
#保存MC样本
self.save[s,:] = np.array([self.T1,self.T2,self.T3])
# SE
T = np.array( [self.T1,self.T2,self.T3] ).reshape(3,1)
self.SE[s,:] = np.sqrt(((1/(self.n-1))
*np.diag(np.dot((Y-T),
(Y-T).T)))/self.n)
# MC统计量
self.meanhat = (self.save).mean(axis=0)
self.biashat = self.meanhat+1
self.SDhat = np.sqrt(np.sum((self.save-self.meanhat)**2,
axis=0)/(self.S-1))
self.MSEhat = self.SDhat**2 + self.biashat**2
self.SEhat = self.SE.mean(axis=0)
# 执行
S = 1000
n = 20
Results = MyMC(S, n, seed=2022)
Results.MC_main()
print("Means",Results.meanhat)
print("Bias",Results.biashat)
print("SDs ",Results.SDhat)
print("MSEs",Results.MSEhat)
print("Ave. SEs",Results.SEhat)
Means [-0.99190512 -0.99421817 -0.99450071]
Bias [0.00809488 0.00578183 0.00549929]
SDs [0.37639327 0.39925436 0.44814659]
MSEs [0.14173742 0.15943747 0.2008656 ]
Ave. SEs [0.36775997 0.36899262 0.37211594]
t05 = sp.stats.t.ppf(0.975, df=n-1)
s2_T1 = (1/(n-1)) * np.sum( (Y-Y.mean())**2 )
SE = np.sqrt(s2_T1)/np.sqrt(n)
CI = np.array( [Y.mean()-t05*SE, Y.mean()+t05*SE])
print(CI)
[-1.85789217 -0.30950438]
(CI[0] <= -1) & (CI[1] >= -1)
True
S = 1000
n = 20
Results = MyMC(S, n, seed=2022)
Results.MC_main()
# 包含概率
np.sum((Results.save - t05* Results.SE <= -1) &
(Results.save + t05* Results.SE >= -1 ),axis=0)/S
array([0.955, 0.938, 0.915])
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(8,5))
sns.histplot((Results.save - t05* Results.SE)[:,0],
kde=True, linewidth=0, stat='density',
color='gray', alpha=0.2, label="Lower Bounds" )
sns.histplot((Results.save )[:,0], kde=True,
linewidth=0, stat='density',
color='gray', alpha=0.5, label="Point Estimates" )
sns.histplot((Results.save + t05* Results.SE)[:,0],
kde=True, linewidth=0, stat='density',
color='gray', alpha=0.8, label="Upper Bounds" )
squareX = [-1,-1]
squareY = [0,1.0]
plt.plot(squareX, squareY, color='k', label="True Value = -1")
plt.title('Figure: Visualization of the Coverage Probability',
fontsize=14)
plt.xlabel(r'$x$', fontsize=12)
plt.ylabel(r'$y$', fontsize=12)
plt.legend(loc='best', fontsize=12, framealpha=0.5)
plt.show()

Two important factors to consider: size and power.
Revisit our previous example and conduct a test.
class MyMC2:
def __init__(self, S, n, seed=2020):
np.random.seed(seed)
self.n = n
self.S = S
# Size
self.ttest1 = np.zeros([S,3])
self.save = np.zeros([S,3])
self.SE = np.zeros([S,3])
# Power
self.ttest2 = np.zeros([S,3])
self.saveA = np.zeros([S,3])
self.SEA = np.zeros([S,3])
def MC_main(self):
for s in range(self.S):
# Size: 需要从原假设(H0)中抽取样本并计算估计值
Y = np.random.normal(loc=-1, scale=5/3,
size=self.n)
self.T1 = Y.mean()
self.T2 = sp.stats.trim_mean(Y, 0.20, axis=0)
self.T3 = np.percentile(Y, 50)
T = np.array([self.T1, self.T2,
self.T3]).reshape(3,1)
self.save[s,:] = np.array([self.T1,
self.T2,
self.T3])
sd = np.sqrt(np.diag(np.dot((Y-T), (Y-T).T))
/(self.n-1))
self.SE[s,:] = sd/np.sqrt(self.n)
self.ttest1[s,:] = (self.save[s,:]- -1)/self.SE[s,:]
YA = np.random.normal(loc=-1.75, scale=5/3,
size=self.n)
self.T1A = YA.mean()
self.T2A = sp.stats.trim_mean(YA, 0.20, axis=0)
self.T3A = np.percentile(YA, 50)
TA = np.array([self.T1A,
self.T2A,
self.T3A]).reshape(3,1)
self.saveA[s,:] = np.array([self.T1A,
self.T2A,
self.T3A])
self.SEA[s,:] = np.sqrt(np.diag(np.dot((YA-TA),
(YA-TA).T))/(self.n-1) )
sd = np.sqrt(np.diag(np.dot((YA-TA), (YA-TA).T))
/(self.n-1))
self.SEA[s,:]= sd/np.sqrt(self.n)
self.ttest2[s,:] = (self.saveA[s,:]-(-1))/self.SE[s,:]
# 计算size, power
t05 = sp.stats.t.ppf(0.975, df=n-1)
self.size = np.sum(np.abs(self.ttest1) > t05, axis=0)/S
self.power = np.sum(np.abs(self.ttest2) > t05, axis=0)/S
S = 10000
n = 50
Results = MyMC2(S, n, seed=2020)
Results.MC_main()
print("Sizes of three estimators:", Results.size)
print("Powers of three estimators:", Results.power)
Sizes of three estimators: [0.0479 0.0627 0.1055]
Powers of three estimators: [0.8725 0.8605 0.8271]
How well the Monte Carlo quantities approximate properties of the actual sampling distribution of the estimator/test statistic? and how "believable" are the results?
Use statistical principles.
Select a sample size
n = 50
Results = MyMC(S=20, n=n, seed=2022)
Results.MC_main()
print(Results.SDhat)
Results = MyMC(S=50, n=n, seed=2022)
Results.MC_main()
print(Results.SDhat)
Results = MyMC(S=200, n=n, seed=2022)
Results.MC_main()
print(Results.SDhat)
Results = MyMC(S=300, n=n, seed=2022)
Results.MC_main()
print(Results.SDhat)
[0.23522009 0.19817077 0.2011049 ]
[0.22815268 0.22255643 0.23408389]
[0.2478695 0.25475324 0.28252248]
[0.24800794 0.25101807 0.28432023]