Miscellaneous

Edward Justyre

周二 02 五月 2017

Category: programming

Tags: python jupyter notebook

In [4]:

# from statsmodels.compat import lzip
import itertools as it
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.stats.api as sms
import matplotlib.pyplot as plt
%matplotlib inline
import decimal as de
import seaborn
q = '　　'    # 两个全角空格，用于会计分录的缩进对齐

E:\Programs10\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools

In [6]:

# pretty matplotlib figs
# load libraries and set plot parameters
import PrettyTable as pt
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'png')
plt.rcParams['savefig.dpi'] = 75

plt.rcParams['figure.autolayout'] = False
plt.rcParams['figure.figsize'] = 10, 6
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 14

plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = "serif"
plt.rcParams['font.serif'] = "cm"
plt.rcParams['text.latex.preamble'] = r"\usepackage{subdepth}, \usepackage{type1cm}"

In [10]:

# pretty tables
data = np.array([[1,2,3],[2,3,4]])
pt.PrettyTable(data, [r"$\frac{a}{b}$", r"$b$", r"$c$"])
  # pt.PrettyTable takes a two-dimensional (!) numpy array and converts it to a table.

Out[10]:

$\frac{a}{b}$	$b$	$c$
1	2	3
2	3	4

In [12]:

# easy reading of learge text files
def loadtxt(filename, ncols=None, dtyp=None, commentchars=["#","@"]):
    global data
    if ncols==None:
        with open(filename) as f:
            l = f.readline()
            while l[0] in commentchars:
                print(l)
                l = f.readline()
            ncols = len(l.split())
            data = np.fromfile(filename, sep=" ", dtype=dtyp)
    return data.reshape((len(data)/ncols, ncols))

CASE 1: MISCELLANEOUS¶

Ch1 MAX DRAWDOWN¶

In [13]:

# Difference betw .iloc & .loc (.ix has been deprecated):
#  Cf. http://pandas-docs.github.io/pandas-docs-travis/indexing.html#ix-indexer-is-deprecated
# .loc is primarily label based, but may also be used with a boolean array.
# .iloc is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a boolean array.

# Below is an example
dfd = pd.DataFrame({'A': [1, 2, 3],\
                     'B': [4, 5, 6]},\
                    index=list('abc'))
# Deprecated way
# dfd.ix[[0, 2], 'A']

# Now recommended way
print(dfd.loc[dfd.index[[0, 2]], 'A'])  # this gives a pd.Series
print(dfd.loc[dfd.index[[0, 2]], ['A']])  # this gives a pd.DataFrame
print(dfd.iloc[[0, 2], dfd.columns.get_loc('A')])  # only applicable for ONE col; this gives a pd.Series
print(dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])])  # applicable for multicols; gives a pd.DataFrame

a    1
c    3
Name: A, dtype: int64
   A
a  1
c  3
a    1
c    3
Name: A, dtype: int64
   A  B
a  1  4
c  3  6

In [14]:

# Various max drawdown funcs:

# Func1: max drawdown for one col of ret
def mddoneret(ret):  
    # ret is a pd.Series
    r = ret.add(1).cumprod()
    dd = r.div(r.cummax()).sub(1)
    mdd = dd.min()
    end = dd.idxmin()
    start = r.loc[:end].idxmax()
    return mdd, start, end

# Func2: max active drawdown for two cols of ret(pf & bn)
def mddtworet(p, b):  
    p = p.add(1).cumprod()  # portfolio
    b = b.add(1).cumprod()  # benchmark
    pmb = p - b
    cam = pmb.expanding(min_periods=1).apply(lambda x: x.argmax())  # expanding window of cumulative argmax
    p0 = pd.Series(p.loc[cam.values.astype(int)].values, index=p.index)
    b0 = pd.Series(b.loc[cam.values.astype(int)].values, index=b.index)
    dd = p/p0 - b/b0
    mdd = dd.min()
    end = dd.idxmin()
    start = cam.loc[end]
    return mdd, start, end

# Func3: rolling max drawdown for one col of `ret` and for length of `win`
# Inputs: ret is pfret, win is window length (exogenous parameter)
# Outputs: tes is a pd.DataFrame, whose cols 'mdd, start, end' are 'maxdrawdown, mdd begday, mdd finday' resp
def rolmddoneret(ret, win):
    tes = pd.DataFrame(index=pd.date_range(start='20160101', periods=len(ret), freq='D'))
    tes['pfret'] = ret.values
    tes['pfnav'] = tes.pfret.add(1).cumprod()
    for i in range(len(tes)-win+1):
        rolp = tes.pfnav.iloc[i:i+win]
        cam = rolp.expanding(min_periods=1).apply(lambda x: x.argmax())  # expanding window
        rolp0 = pd.Series(rolp.iloc[cam.values.astype(int)].values, index=rolp.index)
        roldd = rolp/rolp0 - 1
        tes.loc[tes.index[i+win-1], 'mdd'] = roldd.min()
        tes.loc[tes.index[i+win-1], 'end'] = roldd.idxmin()
        tes.loc[tes.index[i+win-1], 'start'] = cam.index[int(cam.loc[tes.loc[tes.index[i+win-1], 'end']])]
    return tes

In [15]:

# Func4: rolling max active drawdown for two cols of ret (pf & bn)
# Inputs: p is pfret, b is bnret, (p,b must be of same length), win is window length (exogenous parameter)
# Outputs: tes is a pd.DataFrame, whose cols 'mdd, start, end' are same as mddtworet's meaning
def rolmddtworet(p, b, win):
    # tes = pd.DataFrame(index=pd.date_range(start='20160101', periods=200, freq='D'))
    # tes = tes.assign(pfret=p.values, bnret=b.values)  # `nav` is the new colname. Note: df.assign is NOT inplace!
    tes = pd.DataFrame(index=pd.date_range(start='20160101', periods=len(p), freq='D'))
    tes['pfret'] = p.values
    tes['bnret'] = b.values
    tes['pfnav'] = tes.pfret.add(1).cumprod()
    tes['bnnav'] = tes.bnret.add(1).cumprod()
    tes['pmb'] = tes.pfnav - tes.bnnav

    for i in range(len(tes)-win+1):
        rolp = tes.pfnav.iloc[i:i+win]
        rolb = tes.bnnav.iloc[i:i+win]
        rolpmb = tes.pmb.iloc[i:i+win]
        cam = rolpmb.expanding(min_periods=1).apply(lambda x: x.argmax())  # expanding window
        rolp0 = pd.Series(rolp.iloc[cam.values.astype(int)].values, index=rolp.index)
        rolb0 = pd.Series(rolb.iloc[cam.values.astype(int)].values, index=rolb.index)
        roldd = rolp/rolp0 - rolb/rolb0
        tes.loc[tes.index[i+win-1], 'mdd'] = roldd.min()
        tes.loc[tes.index[i+win-1], 'end'] = roldd.idxmin()
        tes.loc[tes.index[i+win-1], 'start'] = cam.index[int(cam.loc[tes.loc[tes.index[i+win-1], 'end']])]

    return tes

In [19]:

%matplotlib inline
np.random.seed(314)
p = pd.Series(np.random.randn(200) / 100 + 0.001)  # as pfret
b = pd.Series(np.random.randn(200) / 100 + 0.001)  # as bnret
win = 70  # window length

# tworets: p & b

mdd, sd, ed = mddtworet(p, b)
f, axarr = plt.subplots(2, 2, figsize=[16, 10], sharex='col', sharey='row')

tes = rolmddtworet(p, b, win)
sd2 = tes.index[int(sd)]
ed2 = tes.index[int(ed)]

tes[['pfnav', 'bnnav']].plot(title='Cumulative Net Asset Value (Portfolio vs Benchmark)', ax=axarr[0,0])
axarr[0,0].axvspan(sd2, ed2, alpha=0.1, color='r')  # Draw a vertical span (rectangle) from sd to ed.
axarr[0,0].legend(['Portfolio','Benchmark'])

tes[['pmb', 'mdd']].plot(title='Cumulative Active Return, Rolling \& Static Max Drawdown', ax=axarr[1,0])
axarr[1,0].axvspan(sd2, ed2, alpha=1, color='0.618')  # alpha(0-1) for transparency. Cf. matplotlib.patches.Polygon for kwargs
  # Gray shades can be given as `color=` a string encoding a float in the 0-1 range
axarr[1,0].axhline(y=mdd, color='r', linestyle='--', label='mdd')  # add a horizontal line of mdd
axarr[1,0].legend(['Active','Rolling MaxDD','Static MaxDD'])  # other loc=: best, upper/lower/center right/left/center, center

# oneret: p

mdd, sd, ed = mddoneret(p)
tes = rolmddoneret(p, win)    
sd2 = tes.index[int(sd)]
ed2 = tes.index[int(ed)]

tes[['pfnav']].plot(title='Cumulative Net Asset Value (Portfolio)', ax=axarr[0,1])
axarr[0,1].axvspan(sd2, ed2, alpha=0.1, color='r')  # Draw a vertical span (rectangle) from sd to ed.
axarr[0,1].legend(['Portfolio'])

tes[['mdd']].plot(title='Rolling \& Static Max Drawdown', ax=axarr[1,1])
axarr[1,1].axvspan(sd2, ed2, alpha=1, color='0.618')  # alpha(0-1) for transparency. Cf. matplotlib.patches.Polygon for kwargs
axarr[1,1].axhline(y=mdd, color='r', linestyle='--', label='mdd')  # add a horizontal line of mdd
axarr[1,1].legend(['Rolling MaxDD','Static MaxDD'], loc='lower right')  # other loc=: best, upper/lower/center right/left/center, center

# Fine-tune figure; show x ticks for top plots and y ticks for right plots
plt.setp([a.get_xticklabels() for a in axarr[0, :]], visible=True);  # semicolon to suppress meaningless output
plt.setp([a.get_yticklabels() for a in axarr[:, 1]], visible=True);

# # Below deprecated
# keys = ['Portfolio', 'Benchmark']
# cum = pd.concat([p, b], axis=1, keys=keys).add(1).cumprod()  # navs; keys in pd.concat is used as cols
# cum['Active'] = cum.Portfolio - cum.Benchmark  # exret
# cum[['Portfolio', 'Benchmark']].plot(title='Cumulative Absolute', ax=axarr[0,0])
# axarr[0,0].axvspan(sd, ed, alpha=0.1, color='r')  # Draw a vertical span (rectangle) from sd to ed. 
# cum[['Active']].plot(title='Cumulative Active', ax=axarr[1,0])
# axarr[1,0].axvspan(sd, ed, alpha=1, color='0.618')  # alpha(0-1) for transparency. Cf. matplotlib.patches.Polygon for kwargs
#   # Gray shades can be given as `color=` a string encoding a float in the 0-1 range

Ch2 ITERATION¶

In [6]:

def previous_and_next(some_iterable):
    prevs, items, nexts = it.tee(some_iterable, 3)
    prevs = it.chain([None], prevs)
    nexts = it.chain(it.islice(nexts, 1, None), [None])
    return zip(prevs, items, nexts)
mylist = ['banana', 'orange', 'apple', 'kiwi', 'tomato']

for previous, item, nxt in previous_and_next(mylist):
    print ("Item is now", item, "next is", nxt, "previous is", previous)

Item is now banana next is orange previous is None
Item is now orange next is apple previous is banana
Item is now apple next is kiwi previous is orange
Item is now kiwi next is tomato previous is apple
Item is now tomato next is None previous is kiwi

Ch3 SOLVING EQUATIONS¶

In [7]:

# 求解线性方程组：x+3y+5z=10, 2x+5y+z=8, 2x+3y+8z=3;及将右侧三数换为(9,8,13)

A = np.mat('1,3,5; 2,5,1; 2,3,8')    # 构造系数矩阵 A
# B = np.mat('10,8,3; 9,8,13').T       # 构造矩阵(向量) b,注意尺寸(3*2)，否则不符合矩阵的乘法
# 上行与下行等价
B = np.array([[10,8,3],[9,8,13]]).T  # 构造二维数组 B
x = np.linalg.solve(A,B)             # 调用 solve 函数
print(x)  # x的每列是相应方程组的解。可用x.T[0],[1]的形式取出
print(type(x))

# 检验结果；下两行等价
print(A * x)
print(np.dot(A, x))

[[-9.28  1.  ]
 [ 5.16  1.  ]
 [ 0.76  1.  ]]

[[ 10.   9.]
 [  8.   8.]
 [  3.  13.]]
[[ 10.   9.]
 [  8.   8.]
 [  3.  13.]]

In [8]:

# 求解非线性方程组：x+y=4, x**2+y**2=8.
import scipy.optimize as so
def f(x):  # 预备求解形如f(x)=0的方程组
    x1, x2 = x[0], x[1]
    return [x1 + x2 - 4, x1**2 + x2**2 - 8]  # 返回误差
result = so.fsolve(f, [1,1])  # 第二参数为未知向量(x,y)的初始值
print ('the result is', result)
print ('the error is', f(result))

# 若方程组中未知数多，而各方程所含未知数少，Jacobi matrix稀疏，可传递Jacobi给fsolve以提高计算精度
def j(x):                         #定义雅可比矩阵
    x1,x2=x[0],x[1]
    return [[1,1],[2*x1,2*x2]]

result=so.fsolve(f,[1,1],fprime=j)   #带有雅可比矩阵的 fsolve 函数调用

print ('the result (Jacobi provided) is',result)
print ('the error (Jacobi provided) is',f(result))

the result is [ 1.99999994  2.00000006]
the error is [-8.8817841970012523e-16, 3.5527136788005009e-15]
the result (Jacobi provided) is [ 1.99999999  2.00000001]
the error (Jacobi provided) is [0.0, 0.0]

Ch4 DATABASE CONNECTION¶

In [1]:

# 推荐用此方法连接M$ SQL Server. Justyre 20180115
import pyodbc
import pandas as pd
# import getpass
# pwd = getpass.getpass(prompt='Enter the password: ')  # this is a password getter requiring manual input
pwd = '111111'
constr = pyodbc.connect(DRIVER='{SQL SERVER}', \
                        SERVER="192.168.1.126", DATABASE="WindDB", UID="wind", PWD=pwd, charset="UTF-8")
sqlcmd = '''select top 10 * from WindDB.dbo.ASHAREDESCRIPTION'''
mydata3 = pd.read_sql(sqlcmd, constr)
mydata3

Out[1]:

	OBJECT_ID	S_INFO_WINDCODE	S_INFO_CODE	S_INFO_NAME	S_INFO_COMPNAME	S_INFO_COMPNAMEENG	S_INFO_ISINCODE	S_INFO_EXCHMARKET	S_INFO_LISTBOARD	S_INFO_LISTDATE	S_INFO_DELISTDATE	S_INFO_SEDOLCODE	CRNCY_CODE	S_INFO_PINYIN	S_INFO_LISTBOARDNAME	IS_SHSC	OPDATE
0	{0000B5D4-B786-4B03-A7B0-50E3767ABF84}	600373.SH	600373	中文传媒	中文天地出版传媒股份有限公司	Chinese Universe Publishing and Media Co.,Ltd.	CNE0000019X4	SSE	434004000	20020304	None	None	CNY	zwcm	主板	1.0	2015-09-09 09:07:23
1	{001EC435-194A-2377-E050-C80A10010479}	A14629.SZ	A14629	中科创新(IPO终止)	武汉中科创新技术股份有限公司	Wuhan Zhongke Innovation Technology Co., Ltd.	None	SZSE	434001000	None	None	None	CNY	zkcx	创业板	0.0	2016-06-03 21:56:58
2	{001EE265-3F28-DD72-E050-C80A100105DE}	300557.SZ	300557	理工光科	武汉理工光科股份有限公司	Wuhan Ligong Guangke Co.Ltd.	CNE100002DC1	SZSE	434001000	20161101	None	None	CNY	lggk	创业板	0.0	2017-01-05 15:26:58
3	{0058F562-B872-4558-A53B-313201BF5C86}	000416.SZ	000416	民生控股	民生控股股份有限公司	Minsheng Holdings Co.,Ltd	CNE0000009P1	SZSE	434004000	19960719	None	None	CNY	mskg	主板	0.0	2015-09-09 09:08:04
4	{00910667-25C7-429A-AF39-515A56E11D9E}	002156.SZ	002156	通富微电	通富微电子股份有限公司	TongFu Microelectronics Co.,Ltd.	CNE1000006C3	SZSE	434003000	20070816	None	None	CNY	tfwd	中小企业板	2.0	2017-03-20 00:22:43
5	{00A97A43-6466-41D6-B7B7-017B6868D630}	600500.SH	600500	中化国际	中化国际(控股)股份有限公司	Sinochem International Corporation	CNE0000011R3	SSE	434004000	20000301	None	None	CNY	zhgj	主板	1.0	2015-09-09 09:08:19
6	{00C69CD0-0F33-4887-8483-AD4B0435B1BA}	002123.SZ	002123	梦网集团	梦网荣信科技集团股份有限公司	Montnets Rongxin Technology Group Co.,Ltd.	CNE100000049	SZSE	434003000	20070328	None	None	CNY	mwjt	中小企业板	2.0	2017-08-20 21:16:58
7	{00D57DEC-3304-49C1-A367-74EAD2AAEA1D}	000610.SZ	000610	西安旅游	西安旅游股份有限公司	Xi'An Tourism Co.,Ltd.	CNE000000D73	SZSE	434004000	19960926	None	None	CNY	xaly	主板	0.0	2015-09-09 09:07:54
8	{0111DE5C-5A2D-4938-914F-A9E871FB3EA5}	601699.SH	601699	潞安环能	山西潞安环保能源开发股份有限公司	Shanxi Lu'An Environmental Energy Development ...	CNE000001NT7	SSE	434004000	20060922	None	None	CNY	lahn	主板	1.0	2015-09-09 09:09:18
9	{0136BACD-2EF3-EC2D-E053-1001C80AB03C}	603336.SH	603336	宏辉果蔬	宏辉果蔬股份有限公司	Great-Sun Foods Co.,Ltd.	None	SSE	434004000	20161124	None	None	CNY	hhgs	主板	0.0	2016-12-30 15:27:03

Ch5 PANDAS DATETIME RESAMPLING¶

In [2]:

data=pd.DataFrame({'price':[949.975 ,941.370,949.975,941.370,949.975],'volume':[0.01,0.01,0.01,0.01,0.01]})
data['timer']=['2014-01-15 14:29:54','2014-01-15 14:29:59','2014-01-15 14:30:17','2014-01-15 14:30:24','2014-01-15 14:30:36']
data.dtypes
data.timer=pd.to_datetime(data.timer)
data.set_index('timer',inplace=True)
data
a1 = data.price.resample('30s').ohlc()
a1['volsum'] = data.volume.resample('30s').sum()
a1

Out[2]:

	open	high	low	close	volsum
timer
2014-01-15 14:29:30	949.975	949.975	941.370	941.370	0.02
2014-01-15 14:30:00	949.975	949.975	941.370	941.370	0.02
2014-01-15 14:30:30	949.975	949.975	949.975	949.975	0.01

CASE 2: LINEAR REGRESSION - ORDINARY LEAST SQUARES¶

In [9]:

dat = pd.read_csv('Guerry.csv',index_col=0)
dat

Out[9]:

	dept	Region	Department	Crime_pers	Crime_prop	Literacy	Donations	Infants	Suicides	MainCity	...	Crime_parents	Infanticide	Donation_clergy	Lottery	Desertion	Instruction	Prostitutes	Distance	Area	Pop1831
1	1	E	Ain	28870	15890	37	5098	33120	35039	2:Med	...	71	60	69	41	55	46	13	218.372	5762	346.03
2	2	N	Aisne	26226	5521	51	8901	14572	12831	2:Med	...	4	82	36	38	82	24	327	65.945	7369	513.00
3	3	C	Allier	26747	7925	13	10973	17044	114121	2:Med	...	46	42	76	66	16	85	34	161.927	7340	298.26
4	4	E	Basses-Alpes	12935	7289	46	2733	23018	14238	1:Sm	...	70	12	37	80	32	29	2	351.399	6925	155.90
5	5	E	Hautes-Alpes	17488	8174	69	6962	23076	16171	1:Sm	...	22	23	64	79	35	7	1	320.280	5549	129.10
6	7	S	Ardeche	9474	10263	27	3188	42117	52547	1:Sm	...	76	47	67	70	19	62	1	279.413	5529	340.73
7	8	N	Ardennes	35203	8847	67	6400	16106	26198	2:Med	...	53	85	49	31	62	9	83	105.694	5229	289.62
8	9	S	Ariege	6173	9597	18	3542	22916	123625	1:Sm	...	74	28	63	75	22	77	3	385.313	4890	253.12
9	10	E	Aube	19602	4086	59	3608	18642	10989	2:Med	...	77	54	9	28	86	15	207	83.244	6004	246.36
10	11	S	Aude	15647	10431	34	2582	20225	66498	2:Med	...	80	35	27	50	63	48	1	370.949	6139	270.13
11	12	S	Aveyron	8236	6731	31	3211	21981	116671	2:Med	...	51	5	23	81	10	44	4	296.089	8735	359.06
12	13	S	Bouches-du-Rhone	13409	5291	38	2314	9325	8107	3:Lg	...	45	74	55	3	23	43	25	362.568	5087	359.47
13	14	N	Calvados	17577	4500	52	27830	8983	31807	2:Med	...	57	56	11	13	12	22	194	117.487	5548	494.70
14	15	C	Cantal	18070	11645	31	4093	15335	87338	2:Med	...	79	83	66	82	1	51	20	245.849	5726	258.59
15	16	W	Charente	24964	13018	36	13602	19454	25720	2:Med	...	2	7	81	60	61	47	8	224.339	5956	362.53
16	17	W	Charente-Inferieure	18712	5357	39	13254	23999	16798	2:Med	...	3	38	72	35	74	42	27	238.538	6864	445.25
17	18	C	Cher	21934	10503	13	9561	23574	19497	2:Med	...	69	11	86	44	51	83	26	116.257	7235	256.06
18	19	C	Correze	15262	12949	12	14993	19330	47480	2:Med	...	86	16	82	84	2	86	3	227.899	5857	294.83
19	21	E	Cote-d'Or	32256	9159	60	2540	15599	16128	2:Med	...	49	27	18	33	78	13	206	136.109	8763	375.88
20	22	W	Cotes-du-Nord	28607	7050	16	10387	36098	75056	2:Med	...	6	69	15	72	47	80	16	225.161	6878	598.87
21	23	C	Creuse	37014	20235	23	10997	14363	77823	1:Sm	...	75	24	75	85	4	71	12	180.846	5565	265.38
22	24	W	Dordogne	21585	10237	18	4687	21375	36024	2:Med	...	64	18	79	77	44	78	3	253.776	9060	482.75
23	25	E	Doubs	11560	5914	73	3436	12512	40690	2:Med	...	38	25	6	18	73	2	65	202.065	5234	265.54
24	26	E	Drome	13396	7759	42	2829	16348	23816	2:Med	...	21	13	62	54	46	38	8	295.543	6530	299.56
25	27	N	Eure	14795	4774	51	11712	16039	13493	2:Med	...	39	45	45	47	27	23	179	61.863	6040	424.25
26	28	C	Eure-et-Loir	21368	4016	54	4553	14475	15015	2:Med	...	18	62	14	48	72	18	180	54.558	5880	278.82
27	29	W	Finistere	29872	6842	15	23945	28392	25143	2:Med	...	24	78	25	36	77	81	42	276.210	6733	524.40
28	30	S	Gard	13115	7990	40	3048	28726	18292	2:Med	...	15	39	59	20	40	40	5	323.004	5853	357.38
29	31	S	Haute-Garonne	18642	7204	31	2286	15378	56140	3:Lg	...	62	59	13	25	15	33	8	361.668	6257	427.86
30	32	S	Gers	18642	10486	38	2848	15250	61510	2:Med	...	43	13	32	74	30	44	1	343.569	6309	312.16
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
57	59	N	Nord	26740	6175	45	6092	8926	13851	3:Lg	...	14	81	38	7	64	30	308	106.335	5743	989.94
58	60	N	Oise	28180	6659	54	5501	18021	5994	2:Med	...	31	86	50	43	57	20	337	33.768	5860	397.73
59	61	N	Orne	28329	8248	45	9242	20852	34069	2:Med	...	29	50	31	57	25	33	117	97.554	6103	441.88
60	62	N	Pas-de-Calais	23101	4040	49	5740	10575	15400	2:Med	...	36	79	10	27	48	26	163	104.400	6671	655.22
61	63	C	Puy-de-Dome	17256	12141	19	5963	22948	78148	2:Med	...	42	63	61	53	8	76	62	205.218	7970	573.11
62	64	W	Basses-Pyrenees	16722	8533	47	3299	12393	65995	2:Med	...	34	72	60	34	7	28	12	387.935	7645	428.40
63	65	S	Hautes-Pyrenees	12223	9797	53	6001	12125	148039	2:Med	...	85	75	71	76	20	21	5	386.559	4464	233.03
64	66	S	Pyrenees-Orientales	6728	7632	31	11644	15167	37843	2:Med	...	67	84	77	11	18	52	5	403.445	4116	157.05
65	67	E	Bas-Rhin	12309	4920	62	14472	14356	18623	3:Lg	...	23	48	51	5	53	12	101	217.752	4755	540.21
66	68	E	Haut-Rhin	7343	4915	71	6001	14783	21233	2:Med	...	40	53	17	10	56	5	26	217.971	3525	424.26
67	69	E	Rhone	18793	4504	45	1983	3910	17003	3:Lg	...	37	33	21	2	14	31	104	213.032	3249	434.43
68	70	E	Haute-Saone	22339	7770	59	11701	11850	39714	1:Sm	...	25	68	57	65	83	14	99	176.135	5360	338.91
69	71	E	Saone-et-Loire	28391	10708	32	3710	20442	22184	2:Med	...	11	10	58	45	31	49	40	168.713	8575	523.97
70	72	C	Sarthe	33913	8294	30	3357	10779	29280	2:Med	...	41	57	19	49	75	35	79	108.294	6206	457.37
71	75	N	Seine	13945	1368	71	4204	2660	3632	3:Lg	...	60	67	53	1	33	6	4744	0.000	762	935.11
72	76	N	Seine-Inferieure	18355	2906	43	7245	7506	9523	3:Lg	...	28	61	74	9	36	35	546	75.658	6278	693.68
73	77	N	Seine-et-Marne	22201	5786	54	5303	16324	7315	2:Med	...	16	73	26	29	67	37	453	27.647	5915	323.89
74	78	N	Seine-et-Oise	12477	3879	56	4007	16303	3460	2:Med	...	10	30	24	6	42	17	874	16.888	5334	448.18
75	79	W	Deux-Sevres	18400	6863	41	16956	25461	24533	2:Med	...	30	4	85	71	84	39	6	188.474	5999	297.85
76	80	N	Somme	33592	7144	44	4964	12447	12836	2:Med	...	7	64	33	30	80	34	302	69.520	6170	543.70
77	81	S	Tarn	13019	6241	20	3449	29305	68980	2:Med	...	13	9	47	67	17	73	3	328.146	5758	333.84
78	82	S	Tarn-et-Garonne	14790	8680	25	4558	23771	48317	2:Med	...	66	41	52	64	39	64	4	313.090	3718	242.51
79	83	S	Var	13145	9572	23	2449	14800	13380	2:Med	...	55	49	40	26	52	69	6	389.512	5973	317.50
80	84	S	Vaucluse	13576	5731	37	1246	17239	19024	2:Med	...	61	76	54	8	41	45	2	337.215	3567	239.11
81	85	W	Vendee	20827	7566	28	14035	62486	67963	1:Sm	...	50	44	30	68	79	59	4	212.459	6720	330.36
82	86	W	Vienne	15010	4710	25	8922	35224	21851	2:Med	...	20	1	44	40	38	65	18	170.523	6990	282.73
83	87	C	Haute-Vienne	16256	6402	13	13817	19940	33497	2:Med	...	68	6	78	55	11	84	7	198.874	5520	285.13
84	88	E	Vosges	18835	9044	62	4040	14978	33029	2:Med	...	58	34	5	14	85	11	43	174.477	5874	397.99
85	89	C	Yonne	18006	6516	47	4276	16616	12789	2:Med	...	32	22	35	51	66	27	272	81.797	7427	352.49
86	200	NaN	Corse	2199	4589	49	37015	24743	37016	2:Med	...	81	2	84	83	9	25	1	539.213	8680	195.41

86 rows × 23 columns

In [10]:

# Fit regression model (using the natural log of one of the regressaors)
results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()

# Inspect the results
# print(results.summary())
results.summary()

# Explanation of output:

# TABLE 1
# Basics:
# No. Observations: Total number of observations used in regression, n=86.
# Df Residuals, Df Model: Total variance has df of n-1=85. 
#   Model has df of #(estimated coef)-1=3-1=2 (since intercept is included by default).
#   Thus, residual df = total df - model df = 85-2=83.

# Anova Table: analysis of variance.
#   Sum of Squares of Regression(ie. MOdel) (SSR) = \sum_{i=1}^n (Y_i^hat - Y_bar)^2（拟合值vs期望）; Mean SR=SSR/DfModel.
#   SS of Error(ie. Residuals) (SSE)              = \sum_{i=1}^n (Y_i - Y_i^hat)^2（原值vs拟合值）; Mean SE=SSE/DfResiduals.
#   SS of Total (SSTO)                            = \sum_{i=1}^n (Y_i - Y_bar)^2（原值vs期望）.

# Overall Model Fit:
# R-squared: Proportion of variance in `Lottery` which can be explained by the independent vars (ie. predictors).
# Adj R-squared: Penalizes addition of extraneous predictors to the model.
#   AdjRsq = 1 – ((1 – Rsq)((N – 1) /( N – k – 1)) where k is the number of predictors
# F-statistic: F(DfModel, DfResiduals) = MSR/MSE.
# Prob (F-statistic): Prob > F, used in testing the null hypothesis that all of the model coefficients are 0. Smaller, better.

# Model Quality for Model Selection:
# Log-Likelihood: The `likelihood` is the product of the density evaluated at the observations. Bigger, better.
  # Used to compare the fit of different coefs. Log-likelihood Lhat = Prob(x|thetahat, Model), thetahat is paraval that maxs L.
# AIC: Measure of relative quality of statmodels for a given set of data. Smaller, better.
  # AIC = 2 * #(estimated coef) - 2 * log(Lhat)
# BIC: Criterion for model selection. Smaller, better.
  # BIC = log(No. Observations) * #(estiamted coef) - 2 * log(Lhat)

# TABLE 2
# Parameter Estimates:
# coef: coefficients of predictors.
# stderr: standard errors associated with the coefficients.
# t: t-statistics used in testing whether a given coefficient is significantly different from zero.
# P>|t|: 2-tailed p-values used in testing the null hypothesis that the coefficient (parameter) is 0. Smaller, better.
# [0.025, 0.975]: the 95% confidence intervals for the coefficients.
#   The confidence intervals are related to the p-values such that 
#   the coefficient will not be statistically significant at alpha = .05 if the 95% confidence interval includes zero.

# TABLE 3  
# Omnibus: a set of tests like F-test
# Skewness: 0 for symmetric data, positive for right-skewed (right tail is longer than left tail).
# Kurtosis: 3 for std normdist, >3 for heavy-tailed distribution, <3 for light-tailed dist.
# Durbin-Watson: Detect autocorrelation in residuals. Always betw 0 and 4.
#   2 is no autocorr; <<2 (eg.<1) is evidence of positive serial corr; >>2 nega corr, ie. underesti of stat significance.
# Jarque-Bera: a goodness-of-fit test of whether sample data have the skewness and kurtosis matching a normal distribution.
#   Smaller, better. For normdist, JB=0.
# Condition number: measures how sensitive a function is to changes or errors in the output, 
#   and how much error in the output results from an error in the input. Lower, better.
#   Low: well-conditioned problem, High: ill-conditioned. Can be used as a diagnostic for multicollinearity.
#   If >30, regression has significant mulcoll.
#   Mulcoll exists if >=2 variables related to the high condition number have high proportions of variance explained.

Out[10]:

OLS Regression Results
Dep. Variable:	Lottery	R-squared:	0.348
Model:	OLS	Adj. R-squared:	0.333
Method:	Least Squares	F-statistic:	22.20
Date:	Sat, 10 Jun 2017	Prob (F-statistic):	1.90e-08
Time:	11:19:37	Log-Likelihood:	-379.82
No. Observations:	86	AIC:	765.6
Df Residuals:	83	BIC:	773.0
Df Model:	2
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	246.4341	35.233	6.995	0.000	176.358	316.510
Literacy	-0.4889	0.128	-3.832	0.000	-0.743	-0.235
np.log(Pop1831)	-31.3114	5.977	-5.239	0.000	-43.199	-19.424

Omnibus:	3.713	Durbin-Watson:	2.019
Prob(Omnibus):	0.156	Jarque-Bera (JB):	3.394
Skew:	-0.487	Prob(JB):	0.183
Kurtosis:	3.003	Cond. No.	702.

In [11]:

dir(results)

Out[11]:

['HC0_se',
 'HC1_se',
 'HC2_se',
 'HC3_se',
 '_HCCM',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_cache',
 '_data_attr',
 '_get_robustcov_results',
 '_is_nested',
 '_wexog_singular_values',
 'aic',
 'bic',
 'bse',
 'centered_tss',
 'compare_f_test',
 'compare_lm_test',
 'compare_lr_test',
 'condition_number',
 'conf_int',
 'conf_int_el',
 'cov_HC0',
 'cov_HC1',
 'cov_HC2',
 'cov_HC3',
 'cov_kwds',
 'cov_params',
 'cov_type',
 'df_model',
 'df_resid',
 'diagn',
 'eigenvals',
 'el_test',
 'ess',
 'f_pvalue',
 'f_test',
 'fittedvalues',
 'fvalue',
 'get_influence',
 'get_prediction',
 'get_robustcov_results',
 'initialize',
 'k_constant',
 'llf',
 'load',
 'model',
 'mse_model',
 'mse_resid',
 'mse_total',
 'nobs',
 'normalized_cov_params',
 'outlier_test',
 'params',
 'predict',
 'pvalues',
 'remove_data',
 'resid',
 'resid_pearson',
 'rsquared',
 'rsquared_adj',
 'save',
 'scale',
 'ssr',
 'summary',
 'summary2',
 't_test',
 'tvalues',
 'uncentered_tss',
 'use_t',
 'wald_test',
 'wald_test_terms',
 'wresid']

In [12]:

# Normality test: Jarque-Bera
name = ['Jarque-Bera', 'Chi^2 two-tail prob.', 'Skew', 'Kurtosis']
test = sms.jarque_bera(results.resid)
list(zip(name, test))

Out[12]:

[('Jarque-Bera', 3.3936080248431852),
 ('Chi^2 two-tail prob.', 0.18326831231663193),
 ('Skew', -0.4865803431122351),
 ('Kurtosis', 3.003417757881635)]

In [13]:

# Omni test
name = ['Chi^2', 'Two-tail probability']
test = sms.omni_normtest(results.resid)
list(zip(name, test))

Out[13]:

[('Chi^2', 3.7134378115972022), ('Two-tail probability', 0.15618424580304663)]

In [14]:

# Influence of each observation
from statsmodels.stats.outliers_influence import OLSInfluence
test_class = OLSInfluence(results)
test_class.dfbetas[:5,:]

Out[14]:

array([[-0.00301154,  0.00290872,  0.00118179],
       [-0.06425662,  0.04043093,  0.06281609],
       [ 0.01554894, -0.03556038, -0.00905336],
       [ 0.17899858,  0.04098207, -0.18062352],
       [ 0.29679073,  0.21249207, -0.3213655 ]])

In [15]:

dir(OLSInfluence)

Out[15]:

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_get_drop_vari',
 '_ols_xnoti',
 '_res_looo',
 'cooks_distance',
 'cov_ratio',
 'det_cov_params_not_obsi',
 'dfbetas',
 'dffits',
 'dffits_internal',
 'ess_press',
 'get_resid_studentized_external',
 'hat_diag_factor',
 'hat_matrix_diag',
 'influence',
 'params_not_obsi',
 'resid_press',
 'resid_std',
 'resid_studentized_external',
 'resid_studentized_internal',
 'resid_var',
 'sigma2_not_obsi',
 'summary_frame',
 'summary_table']

In [16]:

%matplotlib inline
from statsmodels.graphics.regressionplots import plot_leverage_resid2
fig, ax = plt.subplots(figsize=(8,6))
fig = plot_leverage_resid2(results, ax = ax)

In [17]:

# Multicollinearity test
np.linalg.cond(results.model.exog), results.condition_number

Out[17]:

(702.17921454900625, 702.17921454900625)

In [18]:

# Heteroskedasticity test
# Breusch-Pagan test
name = ['Lagrange multiplier statistic', 'p-value',
        'f-value', 'f p-value']
test = sms.het_breuschpagan(results.resid, results.model.exog)
list(zip(name, test))

Out[18]:

[('Lagrange multiplier statistic', 4.8932133740940147),
 ('p-value', 0.086586905023519636),
 ('f-value', 2.5037159462564671),
 ('f p-value', 0.087940287826727276)]

In [19]:

# Goldfeld-Quandt test
name = ['F statistic', 'p-value']
test = sms.het_goldfeldquandt(results.resid, results.model.exog)
list(zip(name, test))

Out[19]:

[('F statistic', 1.1002422436378148), ('p-value', 0.3820295068692508)]

In [20]:

# Linearity test
# Harvey-Collier multiplier test for Null hypothesis that the linear specification is correct
name = ['t value', 'p value']
test = sms.linear_harvey_collier(results)
list(zip(name, test))

Out[20]:

[('t value', -1.0796490077789866), ('p value', 0.28346392475559085)]

In [21]:

results.params

Out[21]:

Intercept          246.434135
Literacy            -0.488923
np.log(Pop1831)    -31.311392
dtype: float64

In [22]:

all(results.resid == dat.Lottery - results.predict(dat))  # Validation: residual = observed - predicted

Out[22]:

True

In [23]:

src = pd.concat([pd.Series([1] * len(dat), index=list(range(1, len(dat)+1))).rename('B0'), \
                 dat.Literacy, np.log(dat.Pop1831)], axis=1)  # prepares matrix X
src['Observed_y'] = dat.Lottery
src['Predicted_y'] = results.predict(dat)  # gives predicted values
src['Residual_y'] = results.resid  # residuals
src

Out[23]:

	B0	Literacy	Pop1831	Observed_y	Predicted_y	Residual_y
1	1	37	5.846525	41	45.281116	-4.281116
2	1	51	6.240276	38	26.107315	11.892685
3	1	13	5.697966	66	61.666895	4.333105
4	1	46	5.049215	80	65.845713	14.154287
5	1	69	4.860587	79	60.506663	18.493337
6	1	27	5.831090	70	50.653644	19.346356
7	1	67	5.668570	31	36.185455	-5.185455
8	1	18	5.533864	75	64.360537	10.639463
9	1	59	5.506794	28	45.162269	-17.162269
10	1	34	5.598903	50	54.501280	-4.501280
11	1	31	5.883490	81	47.057261	33.942739
12	1	38	5.884631	3	43.599064	-40.599064
13	1	52	6.203952	13	26.755757	-13.755757
14	1	31	5.555244	82	57.335091	24.664909
15	1	36	5.893107	60	44.311499	15.688501
16	1	39	6.098636	35	36.409340	-1.409340
17	1	13	5.545412	44	66.443567	-22.443567
18	1	12	5.686399	84	62.517987	21.482013
19	1	60	5.929270	33	31.445032	1.554968
20	1	16	6.395045	72	38.373612	33.626388
21	1	23	5.581163	85	60.434920	24.565080
22	1	18	6.179499	77	44.144799	32.855201
23	1	73	5.581765	18	35.969876	-17.969876
24	1	42	5.702315	54	47.351938	6.648062
25	1	51	6.050323	47	32.055006	14.944994
26	1	54	5.630566	48	43.731396	4.268604
27	1	15	6.262255	36	43.020369	-7.020369
28	1	40	5.878800	20	42.803796	-22.803796
29	1	31	6.058796	25	41.568169	-16.568169
30	1	38	5.743516	74	48.017566	25.982434
...	...	...	...	...	...	...
57	1	45	6.897644	7	8.457733	-1.457733
58	1	54	5.985773	43	32.609371	10.390629
59	1	45	6.091038	57	33.713689	23.286311
60	1	49	6.484971	27	19.423414	7.576586
61	1	19	6.351078	53	38.283506	14.716494
62	1	47	6.060057	34	33.705901	0.294099
63	1	53	5.451167	76	49.837559	26.162441
64	1	31	5.056564	11	72.949443	-61.949443
65	1	62	6.291958	5	19.110919	-14.110919
66	1	71	6.050346	10	22.275799	-12.275799
67	1	45	6.074035	2	34.246094	-32.246094
68	1	59	5.825735	65	35.175792	29.824208
69	1	32	6.261434	45	34.734356	10.265644
70	1	30	6.125493	49	39.968728	9.031272
71	1	71	6.840664	1	-2.470148	3.470148
72	1	43	6.542011	9	20.570962	-11.570962
73	1	54	5.780404	29	39.039774	-10.039774
74	1	56	6.105195	6	27.892269	-21.892269
75	1	41	5.696590	71	48.020110	22.979890
76	1	44	6.298398	30	27.709905	2.290095
77	1	20	5.810662	67	54.715754	12.284246
78	1	25	5.491043	64	62.278850	1.721150
79	1	23	5.760478	26	54.820316	-28.820316
80	1	37	5.476924	8	56.853862	-48.853862
81	1	28	5.800183	68	51.132475	16.867525
82	1	25	5.644492	40	57.474134	-17.474134
83	1	13	5.652945	55	63.076545	-8.076545
84	1	62	5.986427	14	28.677522	-14.677522
85	1	47	5.865022	51	39.812721	11.187279
86	1	49	5.275100	83	57.306164	25.693836

86 rows × 6 columns

In [24]:

# Lottery ~ Literacy + np.log(Pop1831)
plt.figure(figsize=(9, 4))
plt.subplot(121)
plt.scatter(src.Residual_y, src.Predicted_y, marker='o')
plt.grid(True)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Residual vs Predicted')
plt.subplot(122)
plt.scatter(src.Observed_y, src.Predicted_y, marker='o')
plt.grid(True)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Observed vs Predicted')

Out[24]:

In [25]:

# A way to display Latex in cell output
from IPython.core.display import display, Latex
display(Latex(r'$\alpha_1$'))

$\alpha_1$

$\textbf{Linear Regression Principles}$

A multiple linear regression model can be written as

$y=\beta _{0}+\beta _{1}x_{1}+\beta _{2}x_{2}+\dots \beta _{k}x_{k}+u$

where $y$ is the explained variable, $x_{1},x_{2}\dots x_{k}$ are the explanatory variables, $u$ is the error, and $\beta _{0},\beta _{1}\dots \beta _{k}$ are unknown coefficients to be estimated. Given observations ${\displaystyle \left\{y_{i},x_{1i},x_{2i},\dots x_{ki}\right\}_{i=1}^{n}}$, we have a system of $n$ linear equations that can be expressed in matrix notation.

${\begin{bmatrix}y_{1}\\y_{2}\\\vdots \\y_{n}\end{bmatrix}}={\begin{bmatrix}1&x;_{11}&x;_{12}&\dots &x;_{1k}\\1&x;_{21}&x;_{22}&\dots &x;_{2k}\\\vdots &\vdots &\vdots &\ddots &\vdots \\1&x;_{n1}&x;_{n2}&\dots &x;_{nk}\\\end{bmatrix}}{\begin{bmatrix}\beta _{0}\\\beta _{1}\\\vdots \\\beta _{k}\end{bmatrix}}+{\begin{bmatrix}u_{1}\\u_{2}\\\vdots \\u_{n}\end{bmatrix}}$

$\mathbf {y} =\mathbf {X} {\boldsymbol {\beta }}+\mathbf {u}$

where $\mathbf {y}$ and $\mathbf {u}$ are each a vector of dimension $n\times 1$, $\mathbf {X}$ is a matrix of order $n\times (k+1)$, and $\boldsymbol {\beta }$ is a vector of dimension $(k+1)\times 1$. Under the Gauss–Markov assumptions, the best linear unbiased estimator of $\boldsymbol {\beta }$ is the linear least squares estimator $\mathbf {b} =\left(\mathbf {X} ^{\mathsf {T}}\mathbf {X} \right)^{-1}\mathbf {X} ^{\mathsf {T}}\mathbf {y} $, involving the moment matrix $\mathbf {X} ^{\mathsf {T}}\mathbf {X} $ and $\mathbf {X} ^{\mathsf {T}}\mathbf {y} $ defined as

$\mathbf {X} ^{\mathsf {T}}\mathbf {X} ={\begin{bmatrix}n&\sum x_{i1}&\sum x_{i2}&\dots &\sum x_{ik}\\\sum x_{i1}&\sum x_{i1}^{2}&\sum x_{i1}x_{i2}&\dots &\sum x_{i1}x_{ik}\\\sum x_{i2}&\sum x_{i1}x_{i2}&\sum x_{i2}^{2}&\dots &\sum x_{i2}x_{ik}\\\vdots &\vdots &\vdots &\ddots &\vdots \\\sum x_{ik}&\sum x_{i1}x_{ik}&\sum x_{i2}x_{ik}&\dots &\sum x_{ik}^{2}\end{bmatrix}}$

and

$\mathbf {X} ^{\mathsf {T}}\mathbf {y} ={\begin{bmatrix}\sum y_{i}\\\sum x_{i1}y_{i}\\\vdots \\\sum x_{ik}y_{i}\end{bmatrix}}$ where obviously $\mathbf {X} ^{\mathsf {T}}\mathbf {X} $ is a square matrix of dimension $(k+1)\times (k+1)$, and $\mathbf {X} ^{\mathsf {T}}\mathbf {y} $ is a vector of dimension $(k+1)\times 1$.

$\textbf{Moment Matrix}$

A moment matrix is a special symmetric square matrix whose rows and columns are indexed by monomials. The entries of the matrix depend on the product of the indexing monomials only.

$\textbf{Gauss–Markov Theorem}$

$\textbf{STATEMENT}$ Suppose we have in matrix notation,

${\underline {y}}=X{\underline {\beta }}+{\underline {\varepsilon }},\quad ({\underline {y}},{\underline {\varepsilon }}\in \mathbb {R} ^{n},{\underline {\beta }}\in \mathbb {R} ^{K}{\text{ and }}X\in \mathbb {R} ^{n\times K})$

expanding to,

$\displaystyle y_{i}=\sum _{j=1}^{K}\beta _{j}X_{ij}+\varepsilon _{i}\quad \forall i=1,2,\ldots ,n $

where $\beta _{j}$ are non-random but $\textbf{unobservable}$ parameters, $X_{ij}$ are non-random and observable (called the "explanatory variables"), $\varepsilon _{i}$ are random, and so $y_{i}$ are random. The random variables $\varepsilon _{i}$ are called the "disturbance", "noise" or simply "error" (will be contrasted with "residual" later in the article). Note that to include a constant in the model above, one can choose to introduce the constant as a variable $\beta _{K+1}$ with a newly introduced last column of X being unity i.e., $X_{i(K+1)}=1$ for all $i$.

The Gauss–Markov assumptions concern the set of error random variables, $\varepsilon _{i}$:

They have mean zero: $\mathbb {E} [\varepsilon _{i}]=0.$
They are homoscedastic, that is all have the same finite variance: ${\text{Var}}(\varepsilon _{i})=\sigma ^{2}<\infty ,$ and
Distinct error terms are uncorrelated: ${\text{Cov}}(\varepsilon _{i},\varepsilon _{j})=0,\forall i\neq j.$

A linear estimator of $\beta _{j}$ is a linear combination

${\widehat {\beta }}_{j}=c_{1j}y_{1}+\cdots +c_{nj}y_{n}$

in which the coefficients $c_{ij}$ are not allowed to depend on the underlying coefficients $\beta _{j}$, since those are not observable, but are allowed to depend on the values $X_{ij}$, since these data are observable. (The dependence of the coefficients on each $X_{ij}$ is typically nonlinear; the estimator is linear in each $y_{i}$ and hence in each random ${\displaystyle \varepsilon ,}$ which is why this is "linear" regression.) The estimator is said to be $\textbf{unbiased}$ if and only if

$\mathbb {E} \left[{\widehat {\beta }}_{j}\right]=\beta _{j}$

regardless of the values of $X_{ij}$. Now, let $\displaystyle \sum \nolimits _{j=1}^{K}\lambda _{j}\beta _{j}$ be some linear combination of the coefficients. Then the mean squared error of the corresponding estimation is

$\mathbb {E} \left[\left(\sum _{j=1}^{K}\lambda _{j}\left({\widehat {\beta }}_{j}-\beta _{j}\right)\right)^{2}\right],$

in other words it is the expectation of the square of the weighted sum (across parameters) of the differences between the estimators and the corresponding parameters to be estimated. (Since we are considering the case in which all the parameter estimates are unbiased, this mean squared error is the same as the variance of the linear combination.) The $\textbf{best linear unbiased estimator (BLUE)} $ of the vector $\beta$ of parameters $\beta _{j}$ is one with the smallest mean squared error for every vector $\lambda$ of linear combination parameters. This is equivalent to the condition that

${{\text{Var}}({\widetilde {\beta }})-{\text{Var}}({\widehat {\beta }})}$

is a positive semi-definite matrix for every other linear unbiased estimator ${\widetilde {\beta }}$.

The $\textbf{ordinary least squares estimator (OLS)}$ is the function

${\widehat {\beta }}=(X'X)^{-1}X'y$

of $y$ and $X$ (where $X'$ denotes the transpose of $X$) that minimizes the sum of squares of residuals (misprediction amounts):

$\displaystyle \sum _{i=1}^{n}\left(y_{i}-{\widehat {y}}_{i}\right)^{2}=\sum _{i=1}^{n}\left(y_{i}-\sum _{j=1}^{K}{\widehat {\beta }}_{j}X_{ij}\right)^{2}.$

The theorem now states that the OLS estimator is a BLUE.

$\textbf{PROOF}$ The main idea of the proof is that the least-squares estimator is uncorrelated with every linear unbiased estimator of zero, i.e., with every linear combination $a_{1}y_{1}+\cdots +a_{n}y_{n}$ whose coefficients do not depend upon the unobservable $\beta$ but whose expected value is always zero.

Let ${\displaystyle {\tilde {\beta }}=Cy}$ be another linear estimator of ${\displaystyle \beta }$ with ${\displaystyle C=(X'X)^{-1}X'+D}$ where ${\displaystyle D}$ is a ${\displaystyle K\times n}$ non-zero matrix. As we're restricting to unbiased estimators, minimum mean squared error implies minimum variance. The goal is therefore to show that such an estimator has a variance no smaller than that of ${\displaystyle {\widehat {\beta }},}$ the OLS estimator. We calculate:

${\displaystyle {\begin{aligned}\mathbb {E} [{\tilde {\beta }}]&=\mathbb {E} [Cy]\\&=\mathbb {E} \left[\left((X'X)^{-1}X'+D\right)(X\beta +\varepsilon )\right]\\&=\left((X'X)^{-1}X'+D\right)X\beta +\left((X'X)^{-1}X'+D\right)\mathbb {E} [\varepsilon ]\\&=\left((X'X)^{-1}X'+D\right)X\beta &&\mathbb {E} [\varepsilon ]=0\\&=(X'X)^{-1}X'X\beta +DX\beta \\&=(I_{K}+DX)\beta .\\\end{aligned}}}$

Therefore, ${\displaystyle {\tilde {\beta }}}$ is unbiased if and only if ${\displaystyle DX=0}$. Then:

${\displaystyle {\begin{aligned}{\text{Var}}({\tilde {\beta }})&={\text{Var}}(Cy)\\&=C{\text{ Var}}(y)C'\\&=\sigma ^{2}CC'\\&=\sigma ^{2}\left((X'X)^{-1}X'+D\right)\left(X(X'X)^{-1}+D'\right)\\&=\sigma ^{2}\left((X'X)^{-1}X'X(X'X)^{-1}+(X'X)^{-1}X'D'+DX(X'X)^{-1}+DD'\right)\\&=\sigma ^{2}(X'X)^{-1}+\sigma ^{2}(X'X)^{-1}(DX)'+\sigma ^{2}DX(X'X)^{-1}+\sigma ^{2}DD'\\&=\sigma ^{2}(X'X)^{-1}+\sigma ^{2}DD'&&DX;=0\\&={\text{Var}}({\widehat {\beta }})+\sigma ^{2}DD'&&\sigma ^{2}(X'X)^{-1}={\text{Var}}({\widehat {\beta }})\end{aligned}}}$

Since $DD'$ is a positive semidefinite matrix, ${\displaystyle {\text{Var}}({\tilde {\beta }})}$ exceeds ${\displaystyle {\text{Var}}({\widehat {\beta }})}$ by a positive semidefinite matrix.

Let ${\displaystyle l^{t}{\tilde {\beta }}}$ be another linear unbiased estimator of ${\displaystyle l^{t}\beta }$:

${\displaystyle {\begin{aligned}{\text{Var}}(l^{t}{\tilde {\beta }})&=l^{t}{\text{Var}}({\tilde {\beta }})l\\&=\sigma ^{2}l^{t}(X'X)^{-1}l+l^{t}DD^{t}l\\&={\text{Var}}(l^{t}{\widehat {\beta }})+(D^{t}l)^{t}(D^{t}l)&&\sigma ^{2}l^{t}(X'X)^{-1}l={\text{Var}}(l^{t}{\widehat {\beta }})\\&={\text{Var}}(l^{t}{\widehat {\beta }})+\|D^{t}l\|\\&\geqslant {\text{Var}}(l^{t}{\widehat {\beta }})\\\end{aligned}}}$

Moreover equality holds if and only if ${\displaystyle D^{t}l=0}$. We calculate

${\displaystyle {\begin{aligned}l^{t}{\tilde {\beta }}&=l^{t}\left(((X'X)^{-1}X'+D)y\right)&&{\text{ from above}}\\&=l^{t}(X'X)^{-1}X'y+l^{t}Dy\\&=l^{t}{\widehat {\beta }}+(D^{t}l)^{t}y\\&=l^{t}{\widehat {\beta }}&&D;^{t}l=0\end{aligned}}}$

This proves that the equality holds if and only if ${\displaystyle l^{t}{\tilde {\beta }}=l^{t}{\widehat {\beta }}}$ which gives the uniqueness of the OLS estimator as a BLUE. Q.E.D.

$\textbf{OLS Assumptions}$

Correct specification. The linear functional form is correctly specified.
Strict exogeneity. The errors in the regression should have conditional mean zero:

${\displaystyle \operatorname {E} [\,\varepsilon \mid X\,]=0.}$

The immediate consequence of the exogeneity assumption is that the errors have mean zero: $E[\varepsilon] = 0$, and that the regressors are uncorrelated with the errors: $E[X^T\varepsilon] = 0$.

The exogeneity assumption is critical for the OLS theory. If it holds then the regressor variables are called $\textbf{exogenous}$. If it doesn't, then those regressors that are correlated with the error term are called $\textbf{endogenous}$, and then the OLS estimates become invalid. In such case $\textit{the method of instrumental variables}$ may be used to carry out inference.
No linear dependence. The regressors in X must all be linearly independent. Mathematically, this means that the matrix $X$ must have full column rank almost surely:

${\displaystyle \Pr \!{\big [}\,\operatorname {rank} (X)=k\,{\big ]}=1.}$

Usually, it is also assumed that the regressors have finite moments up to at least the second moment. Then the matrix $Q_{xx} = E[X^T X / n]$ is finite and positive semi-definite.

When this assumption is violated, the regressors are called linearly dependent or $\textbf{perfectly multicollinear}$. In such case, the value of the regression coefficient $\beta$ cannot be learned, although prediction of $y$ values is still possible for new values of the regressors that lie in the same linearly dependent subspace.
Spherical errors:

${\displaystyle \operatorname {Var} [\,\varepsilon \mid X\,]=\sigma ^{2}I_{n},}$

where $I_n$ is the identity matrix in dimension n, and $σ^2$ is a parameter which determines the variance of each observation. This $σ^2$ is considered a nuisance parameter in the model, although usually it is also estimated. If this assumption is violated, then the OLS estimates are still valid, but no longer efficient.

It is customary to split this assumption into two parts:
1. Homoscedasticity: $E[ \varepsilon_i^2 | X ] = σ^2$, which means that the error term has the same variance $σ^2$ in each observation. When this requirement is violated this is called $\textbf{heteroscedasticity}$, in such case a more efficient estimator would be $\textit{weighted least squares}$. If the errors have infinite variance then the OLS estimates will also have infinite variance (although by the law of large numbers they will nonetheless tend toward the true values so long as the errors have zero mean). In this case, $\textit{robust estimation}$ techniques are recommended.
2. No autocorrelation: the errors are uncorrelated between observations: $E[\varepsilon_i \varepsilon_j | X ] = 0, \forall i \neq j$. This assumption may be violated in the context of time series data, panel data, cluster samples, hierarchical data, repeated measures data, longitudinal data, and other data with dependencies. In such cases $\textit{generalized least squares}$ provides a better alternative than the OLS. Another expression for autocorrelation is $\textbf{serial correlation}$.
Normality. It is sometimes additionally assumed that the errors have normal distribution conditional on the regressors:

${\displaystyle \varepsilon \mid X\sim {\mathcal {N}}(0,\sigma ^{2}I_{n}).}$

This assumption is not needed for the validity of the OLS method, although certain additional finite-sample properties can be established in case when it does (especially in the area of hypotheses testing). Also when the errors are normal, the OLS estimator is equivalent to the $\textbf{maximum likelihood estimator (MLE)}$, and therefore it is asymptotically efficient in the class of all regular estimators. Importantly, the normality assumption applies only to the error terms; contrary to a popular misconception, the response (dependent) variable is not required to be normally distributed.
Independent and identically distributed (iid)

In some applications, especially with cross-sectional data, an additional assumption is imposed — that all observations are independent and identically distributed. This means that all observations are taken from a random sample which makes all the assumptions listed earlier simpler and easier to interpret. Also this framework allows one to state asymptotic results (as the sample size $n \to \infty$), which are understood as a theoretical possibility of fetching new independent observations from the data generating process. The list of assumptions in this case is:
1. iid observations: $(x_i, y_i)$ is independent from, and has the same distribution as, $(x_j, y_j), \forall i \neq j;$
2. no perfect multicollinearity: $Q_{xx} = E[x_ix_i^T]$ is a positive-definite matrix;
3. exogeneity: $E[\varepsilon_i | x_i] = 0;$
4. homoscedasticity: $\textrm{Var}[\varepsilon_i | x_i] = σ^2.$
Time series model

The stochastic process $\{x_i, y_i\}$ is stationary and ergodic; if $\{x_i, y_i\}$ is nonstantionary, OLS results are often spurious unless $\{x_i, y_i\}$ is co-integrating. The regressors are predetermined: $E[x_i \varepsilon_i] = 0, \forall i = 1, \ldots, n;$ The $p \times p$ matrix $Q_{xx} = E[x_ix_i^T]$ is of full rank, and hence positive-definite; $\{x_i \varepsilon_i\}$ is a martingale difference sequence, with a finite matrix of second moments $Q_{xx\varepsilon^2} = E[\varepsilon_i^2 x_ix_i^T].$

CASE 3: AREA BETWEEN THREE ARCS¶

In [42]:

import matplotlib.patches as mpat
import matplotlib.collections as mcol

fig, ax = plt.subplots(figsize=(3,3))
xs = [0, .5, .5,-.5,-.5]  # x of circle centers
ys = [0, .5,-.5, .5,-.5]  # y of circle centers
rs = [1,0.5,0.5,0.5,0.5]  # radius of circles
patches = []
for x, y, r in zip(xs, ys, rs):
    circle = mpat.Circle((x, y), r)
    patches.append(circle)

# patches.append(mpat.Rectangle((1,1),2,3))
p = mcol.PatchCollection(patches, alpha=0.4)
ax.add_collection(p)
ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.set_aspect('equal', adjustable='box')  # `ax` may be changed to `plt.gca()`

# Below deprecated
# fig1 = plt.figure()
# ax1 = fig1.add_subplot(111, aspect='equal')
# ax1.add_patch(mpat.Circle((0.5,0.5),1,color='b',fill=False))  # remove background

# circle1 = plt.Circle((0.5,0.5),0.5,color='r',fill=False)
# plt.gcf().gca().add_artist(circle1)  # gcf: get current figure, gca: get current axis

# fig1.savefig('rect1.png', dpi=90, bbox_inches='tight')

In [228]:

# 三弧问题的面积解法 by Justyre 20170415
# 首先，通过列两圆的参数方程，可解得交点的圆心角alpha
alpha = 1/2 * np.arcsin(3/4)
alpha, np.degrees(alpha)

Out[228]:

(0.4240310394907405, 24.295188945364572)

In [229]:

# the two values of alpha are in [pi, 3/2 pi], [0, 1/2 pi], resp
a1 = 3/2 * np.pi - alpha
a2 = alpha
np.sin(2*a1), np.sin(2*a2), 3/4

Out[229]:

(0.75000000000000056, 0.75, 0.75)

In [230]:

theta = 1/2 * np.arcsin(-9/16)
theta, np.degrees(theta)

Out[230]:

(-0.29870320832267511, -17.114433163906291)

In [231]:

# the two values of theta are both in [3/2 pi, 2 pi]
t1 = 3/2 * np.pi - theta
t2 = 2 * np.pi + theta
np.sin(2*t1), np.sin(2*t2), -9/16

Out[231]:

(-0.5625, -0.56250000000000067, -0.5625)

In [232]:

2 * np.cos(theta) - 1 - np.cos(alpha), 2 + 2 * np.sin(theta) - 1 - np.sin(alpha)

Out[232]:

(0.0, 5.5511151231257827e-17)

In [233]:

2 * np.cos(t1) - 1 - np.cos(a1), 2 + 2 * np.sin(t1) - 1 - np.sin(a1)

Out[233]:

(3.3306690738754696e-16, -2.2204460492503131e-16)

In [234]:

2 * np.cos(t2) - 1 - np.cos(a2), 2 + 2 * np.sin(t2) - 1 - np.sin(a2)

Out[234]:

(-2.2204460492503131e-16, -6.106226635438361e-16)

In [235]:

x1 = np.cos(t1)
y1 = 1 + np.sin(t1)
x2 = np.cos(t2)
y2 = 1 + np.sin(t2)

x1p = 1/2 * (1 + np.cos(a1))  # should be same as x1
y1p = 1/2 * (1 + np.sin(a1))
x2p = 1/2 * (1 + np.cos(a2))
y2p = 1/2 * (1 + np.sin(a2))

x1, x1p, y1, y1p, x2, x2p, y2, y2p

Out[235]:

(0.29428108611692616,
 0.29428108611692599,
 0.04428108611692616,
 0.044281086116926272,
 0.95571891388307373,
 0.95571891388307384,
 0.70571891388307351,
 0.70571891388307384)

In [236]:

# 扇形面积
angle = 2 * a2 + np.pi / 2  # 圆心角
shan = angle / (2 * np.pi) * np.pi * (1/2 ** 2)  # 扇形面积
shan

Out[236]:

0.3023573007220472

In [237]:

import scipy.integrate as si
# 左边方程
s1 = (y1 - 1/2) / (x1 - 1/2)  # slope
i1 = 1/2 - 1/2 * s1  # intercept
# 右边方程
s2 = (y2 - 1/2) / (x2 - 1/2)  # slope
i2 = 1/2 - 1/2 * s2  # intercept
print(s1, i1, s2, i2)

# 左边下面积：int_x1^0.5 (s1 x + i1) dx
l1area = si.quad(lambda x: s1 * x + i1, x1, 1/2)[0]
# 左边下圆弧下面积
c1area = si.quad(lambda x: 1 - np.sqrt(1-x**2), x1, 1/2)[0]
# 右边下面积
l2area = si.quad(lambda x: s2 * x + i2, 1/2, x2)[0]
# 右边下圆弧下面积
c2area = si.quad(lambda x: 1 - np.sqrt(1-x**2), 1/2, x2)[0]
print(l1area, c1area, l2area, c2area)

# 弧边三角形面积
curtriangle = l1area - c1area + l2area - c2area
curtriangle

2.21525043702 -0.607625218511 0.451416229645 0.274291885177
0.05598445694153691 0.01738977929915228 0.2747344569415368 0.15735309339224912

Out[237]:

0.1559760411916723

In [238]:

# 所求阴影面积
(shan - curtriangle) * 2

Out[238]:

0.29276251906074979

In [239]:

def f(x):
    return x - x/2*np.sqrt(1-x**2) - 1/2*np.arcsin(x)
print(f(1/2) - f(x1), f(x2) - f(1/2))  # 验证此为上述c1,c2积分的有限形式
def f(x):
    return s1*x**2/2 + i1*x
print(f(1/2) - f(x1))  # 验证此为上述l1积分的有限形式

0.0173897792992 0.157353093392
0.0559844569415

CASE 4: ACCOUNTING¶

Ch1 总论¶

In [134]:

# 五类账户的方向：资产+成本=负债+权益+损益（损左益右），AC=DER

Ch2 会计政策和会计估计及其变更¶

In [145]:

# 例2-1：追溯调整法
a = 450e4  # 05购入成本
b = 110e4  # 06购入成本
t = .25  # 所得税率
法定盈余公积比例 = .1
任意盈余公积比例 = .05
普通股数 = 4500e4
a05 = 510e4  # a05年末公允价值
a06 = 510e4
b06 = 130e4
税前差异05 = a05 - a  # 05期末06期初
累积影响数05 = 税前差异05 * (1 - t)  # 05期末06期初
税前差异06 = a06 + b06 - a05 - b
累积影响数06 = 税前差异06 * (1 - t)
税前差异05, 累积影响数05, 税前差异06, 累积影响数06

Out[145]:

(600000.0, 450000.0, 200000.0, 150000.0)

In [165]:

# 对05年事项调整分录

# 调整累积影响数
print('20051231')
print('借：交易性金融资产——公允价值变动\t', 税前差异05)  # 资产+
print(q+'贷：利润分配——未分配利润\t', 累积影响数05)  # 益+
print(q+q+'递延所得税负债\t\t', 税前差异05 * t)  # 负债+
# 调整利润分配
print('借：利润分配——未分配利润\t\t', round(累积影响数05 * (法定盈余公积比例 + 任意盈余公积比例),2))  # 益-
print(q+'贷：盈余公积——法定盈余公积\t', 累积影响数05 * 法定盈余公积比例)  # 益+
print(q+q+'盈余公积——任意盈余公积\t', 累积影响数05 * 任意盈余公积比例)  # 益+

# 06年事项

# 调整累积影响数
print('20061231')
print('借：交易性金融资产——公允价值变动\t', 税前差异06)  # 资产+
print(q+'贷：利润分配——未分配利润\t', 累积影响数06)  # 益+
print(q+q+'递延所得税负债\t\t', 税前差异06 * t)  # 负债+
# 调整利润分配
print('借：利润分配——未分配利润\t\t', round(累积影响数06 * (法定盈余公积比例 + 任意盈余公积比例),2))  # 益-
print(q+'贷：盈余公积——法定盈余公积\t', 累积影响数06 * 法定盈余公积比例)  # 益+
print(q+q+'盈余公积——任意盈余公积\t', 累积影响数06 * 任意盈余公积比例)  # 益+

20051231
借：交易性金融资产——公允价值变动	 600000.0
　　贷：利润分配——未分配利润	 450000.0
　　　　递延所得税负债		 150000.0
借：利润分配——未分配利润		 67500.0
　　贷：盈余公积——法定盈余公积	 45000.0
　　　　盈余公积——任意盈余公积	 22500.0
20061231
借：交易性金融资产——公允价值变动	 200000.0
　　贷：利润分配——未分配利润	 150000.0
　　　　递延所得税负债		 50000.0
借：利润分配——未分配利润		 22500.0
　　贷：盈余公积——法定盈余公积	 15000.0
　　　　盈余公积——任意盈余公积	 7500.0

In [169]:

# 财务报表重述。站在07年初

print('资产负债表的调整')
delta = {'以公允价值计量且其变动计入当期损益的金融资产07年初余额+': 600000.0 + 200000.0, \
         '递延所得税负债+': 150000.0 + 50000.0, \
         '盈余公积+': 67500.0 + 22500.0, \
         '利润分配——未分配利润（即留存收益）07年初余额+': 450000.0 - 67500.0 + 150000.0 - 22500.0
        }
print(delta)

print('利润表的调整')
delta = {'公允价值变动收益06年金额+': 200000.0, \
         '所得税费用06年金额+': 50000.0, \
         '净利润06年金额+': 150000.0, \
         '基本每股收益06年金额+': round(150000.0 / 普通股数, 4)
        }
print(delta)

print('所有者权益变动表的调整')
delta = {'盈余公积06年初金额+': 67500.0, \
         '未分配利润06年初金额+': 450000.0 - 67500.0, \
         '所有者权益合计06年初金额+': 450000.0, \
         '盈余公积06年金额+': 22500.0, \
         '未分配利润06年金额+': 150000.0 - 22500.0, \
         '所有者权益合计06年金额+': 150000.0, \
         '盈余公积07年初金额+': 67500.0 + 22500.0, \
         '未分配利润07年初金额+': 450000.0 - 67500.0 + 150000.0 - 22500.0, \
         '所有者权益合计07年初金额+': 450000.0 + 150000.0
        }
print(delta)

资产负债表的调整
{'以公允价值计量且其变动计入当期损益的金融资产07年初余额+': 800000.0, '递延所得税负债+': 200000.0, '盈余公积+': 90000.0, '利润分配——未分配利润（即留存收益）07年初余额+': 510000.0}
利润表的调整
{'公允价值变动收益06年金额+': 200000.0, '所得税费用06年金额+': 50000.0, '净利润06年金额+': 150000.0, '基本每股收益06年金额+': 0.0033}
所有者权益变动表的调整
{'盈余公积06年初金额+': 67500.0, '未分配利润06年初金额+': 382500.0, '所有者权益合计06年初金额+': 450000.0, '盈余公积06年金额+': 22500.0, '未分配利润06年金额+': 127500.0, '所有者权益合计06年金额+': 150000.0, '盈余公积07年初金额+': 90000.0, '未分配利润07年初金额+': 510000.0, '所有者权益合计07年初金额+': 600000.0}

In [172]:

# 例2-2：未来适用法
期初存货 = 250e4
购入存货实际成本 = 1800e4
期末存货先进先出法 = 450e4
当年销售额 = 2500e4
当年其他费用 = 120e4
t = .25
期末存货后进先出法 = 220e4

print('后进先出法营业成本', 期初存货 + 购入存货实际成本 - 期末存货后进先出法)
print('先进先出法营业成本', 期初存货 + 购入存货实际成本 - 期末存货先进先出法)
print('改用FIFO后，净利润增加', (期末存货先进先出法 - 期末存货后进先出法) * (1-t))

后进先出法营业成本 18300000.0
先进先出法营业成本 16000000.0
改用FIFO后，净利润增加 1725000.0

Ch3 存货¶

In [173]:

# 存货分类：原材料、在产品、半成品、产成品、商品、周转材料
# 存货成本 = 采购 + 加工 + 使存货达到目前场所和状态所发生的其他成本
# 外购存货的成本 = 采购成本 = 购买价款 + 税费 + 运输装卸保险等费用
#   已销售商品的进货费用 → 主营业务成本；未售商品的进货费用 → 期末存货成本；较小的 → 销售费用

# 存货周转的相关分录：
#   借：生产成本（直接构成产品实体的成本）、制造费用（成本类，间接费用）、管理费用、销售费用，贷：原材料、应付职工薪酬、累计折旧、银行存款。
#   借：生产成本，贷：制造费用。（表示未完工产品成本）
#   借：库存商品，贷：生产成本。（表示已完工产品成本）
#   销售时：借：主营业务成本，贷：库存商品。
# 投资者投入存货的成本：协议价优先，不公允的按公允价值。
#   分录：借：库存商品、应交税费——应交增值税（进项税额），贷：实收资本、资本公积——资本溢价。
# 盘盈存货的成本：按重置成本，经待处理财产损溢，管理报批后冲减当期管理费用
# 采购用于广告营销的商品、向客户预付款未取得商品时：预付账款，取得商品后入销售费用

In [2]:

# 发出存货的计量：FIFO，移动加权平均，月末一次加权平均，个别计价。
# 销售存货的分录：借：营业成本（主营业务成本or其他业务成本） + 存货跌价准备，贷：库存商品or原材料。
# 企业周转材料的领用分录：借：成本费用，贷：周转材料——包装物or低值易耗品。

# 待处理财产损溢（损益类）：借方为盘亏，贷方为盘盈。期末处理后，该科目无余额
# 存货盘盈的分录：
#   借：原材料or库存商品，贷：待处理财产损溢。
#   报批后：借：待处理财产损溢，贷：管理费用。
# 存货盘亏的分录：
#   借：待处理财产损溢，贷：原材料or库存商品 + （购进存货特有）应交税金──应交增值税(进项税额转出)。
#   借：管理费用（源于计量收发差错和管理不善等）or其他应收款（源于过失人赔偿、保险赔偿）or营业外支出（源于自然灾害），贷：待处理财产损溢。

Ch4 固定资产¶

In [11]:

# 例4-1
# 购入需要安装的固定资产

print('20090201')
# 支付设备价款、增值税、运输费
print('借：在建工程——某设备\t\t', 50e4 + 2500 * 0.93)  # 运输费应计入在建工程成本。deprecated: 运输费价内增值税（现行一般都是价外了）
print(q+'应交税费——应交增值税（进项税额）', 8.5e4 + 2500 * 0.07)  # 假定运输费增值税率7%
print(q+'贷：银行存款\t\t\t', 50e4 + 8.5e4 + 2500)
# 领用公司原材料、支付安装工人工资
print('借：在建工程——某设备\t\t', 3e4 + 4900)
print(q+'贷：原材料\t\t\t', 3e4)  # 购进原材料时的增值税在“售出”原材料时才抵扣，“使用”原材料时用不到
print(q+q+'应付职工薪酬\t\t', 4900.0)
# 设备安装完毕，到达预定可使用状态
print('借：固定资产——某设备\t\t', 50e4 + 2500 * 0.93 + 3e4 + 4900)
print(q+'贷：在建工程——某设备\t\t', 50e4 + 2500 * 0.93 + 3e4 + 4900)

20090201
借：在建工程——某设备		 502325.0
　　应交税费——应交增值税（进项税额） 85175.0
　　贷：银行存款			 587500.0
借：在建工程——某设备		 34900.0
　　贷：原材料			 30000.0
　　　　应付职工薪酬		 4900.0
借：固定资产——某设备		 537225.0
　　贷：在建工程——某设备		 537225.0

In [18]:

# 例4-2
# 超出正常信用条件下购买固定资产，具有融资性质。涉及未确认融资费用的分摊
总价 = 900e4  # 不考虑增值税
期数 = 5 * 2
折现率 = 0.1  # 每期折现率

年金 = 总价 / 期数  # 每期付款
折现系数PA = round(np.pv(rate=折现率, nper=期数, pmt=-1, fv=0, when='end'), 4)  # numpy.pv(rate, nper, pmt, fv=0.0, when='end')
现值 = round(年金 * 折现系数PA, 2)
年金, 折现系数PA, 现值

Out[18]:

(900000.0, 6.1445999999999996, 5530140.0)

In [13]:

# 购买设备

print('20170101')
print('借：在建工程——某设备\t', 现值)  # 总本金（资产+）
在建工程 = 现值  # 供结转用
print(q+'未确认融资费用\t', 总价 - 现值)  # 总利息（损+）
print(q+'贷：长期应付款——乙公司\t', 总价)  # 总本息和（负债+）

# 20170101-20171231是设备安装期间，未确认融资费用的分摊额可计入固定资产成本

print('20170630')
剩余本金 = 现值  # 本次归还之前的剩余本金
本次融资费用 = round(剩余本金 * 折现率, 2)  # 未确认融资费用（即利息）在本次变成已确认的部分
print('借：在建工程——某设备\t', 本次融资费用)  # 资产+
在建工程 += 本次融资费用  # 供结转用
print(q+'贷：未确认融资费用\t', 本次融资费用)  # 损-
print('借：长期应付款——乙公司\t', 年金)  # 负债-：年金同时归还了本金和利息
print(q+'贷：银行存款\t\t', 年金)  # 资产-

print('20171231')
剩余本金 -= 年金 - 本次融资费用  # 本次归还之前的剩余本金，这里的“本次融资费用”是上次发生的
本次融资费用 = round(剩余本金 * 折现率, 2)  # 未确认融资费用（即利息）在本次变成已确认的部分
print('借：在建工程——某设备\t', 本次融资费用)  # 资产+
在建工程 += 本次融资费用  # 供结转用
print(q+'贷：未确认融资费用\t', 本次融资费用)  # 损-
print('借：长期应付款——乙公司\t', 年金)  # 负债-：年金同时归还了本金和利息
print(q+'贷：银行存款\t\t', 年金)  # 资产-
# 本次特有分录：安装费，计入固定资产成本
安装费 = 398530.60
print('借：在建工程——某设备\t', 安装费)
在建工程 += 安装费  # 供结转用
print(q+'贷：银行存款\t\t', 安装费)
# 本次特有分录：已达到预定可使用状态，结转在建工程
print('借：固定资产——某设备\t', 在建工程)  # 资产+
print(q+'贷：在建工程——某设备\t', 在建工程)  # 资产-

# 20180101-20211231期间，该设备已达到预定可使用状态，未确认融资费用的分摊额应计入当期损益（而不再符合资本化条件）

ys = range(2018,2022)  # years: range(start,end,step)
ds = ['0630','1231']  # dates
for y, d in it.product(ys, ds):  # flat is better than nested
    print(str(y)+d)
    剩余本金 -= 年金 - 本次融资费用  # 本次归还之前的剩余本金，这里的“本次融资费用”是上次发生的
    print(剩余本金)
    本次融资费用 = round(剩余本金 * 折现率, 2)  # 未确认融资费用（即利息）在本次变成已确认的部分
    if str(y)+d == '20211231':  # 由于折现系数PA只保留了4位小数，末期的融资费用需要进行尾数调整
        本次融资费用 = 年金 - 剩余本金  # 年金同时归还了本金和利息，末期应将本金结清，所余部分即是剩余利息 
    print('借：财务费用\t\t', 本次融资费用)  # 损+
    print(q+'贷：未确认融资费用\t', 本次融资费用)  # 损-
    print('借：长期应付款——乙公司\t', 年金)  # 负债-
    print(q+'贷：银行存款\t\t', 年金)  # 资产-

20170101
借：在建工程——某设备	 5530140.0
　　未确认融资费用	 3469860.0
　　贷：长期应付款——乙公司	 9000000.0
20170630
借：在建工程——某设备	 553014.0
　　贷：未确认融资费用	 553014.0
借：长期应付款——乙公司	 900000.0
　　贷：银行存款		 900000.0
20171231
借：在建工程——某设备	 518315.4
　　贷：未确认融资费用	 518315.4
借：长期应付款——乙公司	 900000.0
　　贷：银行存款		 900000.0
借：在建工程——某设备	 398530.6
　　贷：银行存款		 398530.6
借：固定资产——某设备	 7000000.0
　　贷：在建工程——某设备	 7000000.0
20180630
4801469.4
借：财务费用		 480146.94
　　贷：未确认融资费用	 480146.94
借：长期应付款——乙公司	 900000.0
　　贷：银行存款		 900000.0
20181231
4381616.34
借：财务费用		 438161.63
　　贷：未确认融资费用	 438161.63
借：长期应付款——乙公司	 900000.0
　　贷：银行存款		 900000.0
20190630
3919777.97
借：财务费用		 391977.8
　　贷：未确认融资费用	 391977.8
借：长期应付款——乙公司	 900000.0
　　贷：银行存款		 900000.0
20191231
3411755.77
借：财务费用		 341175.58
　　贷：未确认融资费用	 341175.58
借：长期应付款——乙公司	 900000.0
　　贷：银行存款		 900000.0
20200630
2852931.35
借：财务费用		 285293.14
　　贷：未确认融资费用	 285293.14
借：长期应付款——乙公司	 900000.0
　　贷：银行存款		 900000.0
20201231
2238224.49
借：财务费用		 223822.45
　　贷：未确认融资费用	 223822.45
借：长期应付款——乙公司	 900000.0
　　贷：银行存款		 900000.0
20210630
1562046.94
借：财务费用		 156204.69
　　贷：未确认融资费用	 156204.69
借：长期应付款——乙公司	 900000.0
　　贷：银行存款		 900000.0
20211231
818251.63
借：财务费用		 81748.37
　　贷：未确认融资费用	 81748.37
借：长期应付款——乙公司	 900000.0
　　贷：银行存款		 900000.0

In [14]:

# 高危行业提取安全生产费的分录：借：相关产品成本，贷：专项储备——安全生产费。
# 高危行业，使用提取的安全生产费形成固定资产的分录：（专项储备为权益类）
#   借：在建工程，贷：各项支出。（达到预定可使用状态时）借：固定资产a，贷：在建工程a。借：专项储备——安全生产费a，贷：累计折旧a。

In [15]:

# 出包方式建造固定资产的成本：建筑工程支出、安装工程支出、待摊支出。
# 分录：
#  施工期间：借：在建工程——建筑工程——某工程、在建工程——安装工程——某工程，贷：银行存款、预付账款
#  工程完成时，补付工程款：借：在建工程，贷：银行存款
#  将需安装设备运抵现场安装时：借：在建工程——在安装设备——某设备，贷：工程物资——某设备
#  为建造固定资产发生的待摊支出：借：在建工程——待摊支出，贷：银行存款、应付职工薪酬
# 在建工程达到预定可使用状态时，先将待摊支出按各工程的(建筑工程+安装工程+在安装设备)进行分配；然后如下分录：
#  借：固定资产，贷：在建工程——（建筑工程or安装工程or待摊支出）

# 盘盈固定资产：视为前期差错，经以前年度损益调整核算

In [31]:

# 例4-3
# 存在弃置费用的固定资产（仅限特殊行业的特定固定资产）
建造成本 = 250e8  # 元
预计使用寿命 = 40  # 年
弃置费用 = 25e8  # 将该值折现至今，即为预计负债
折现率 = 0.1
PF = np.pv(rate=折现率, nper=预计使用寿命, pmt=0, fv=-1, when='end')  # numpy.pv(rate, nper, pmt, fv=0.0, when='end')
PF

Out[31]:

0.022094928152179921

In [32]:

# 启用时
print('20170101')
print('借：固定资产——某核反应堆\t\t\t', 建造成本 + 弃置费用 * PF)
print(q+'贷：在建工程——某核反应堆\t\t', 建造成本)
print(q+q+'预计负债——某核反应堆——弃置费用\t', 弃置费用 * PF)
剩余本金 = 弃置费用 * PF

# 此后每一年
ys = range(2017,2057)  # years: range(start,end,step)
ds = ['1231']  # dates
for y, d in it.product(ys, ds):  # flat is better than nested
    print(str(y)+d)
    本次财务费用 = 剩余本金 * 折现率
    print('借：财务费用\t\t\t\t', 本次财务费用)
    print(q+'贷：预计负债——某核反应堆——弃置费用\t', 本次财务费用)
    print('已完成预计负债：', 剩余本金 + 本次财务费用)  # 即从启用时至今贷方预计负债的累计值
    剩余本金 *= 1 + 折现率

20170101
借：固定资产——某核反应堆			 25055237320.4
　　贷：在建工程——某核反应堆		 25000000000.0
　　　　预计负债——某核反应堆——弃置费用	 55237320.3804
20171231
借：财务费用				 5523732.03804
　　贷：预计负债——某核反应堆——弃置费用	 5523732.03804
已完成预计负债： 60761052.4185
20181231
借：财务费用				 6076105.24185
　　贷：预计负债——某核反应堆——弃置费用	 6076105.24185
已完成预计负债： 66837157.6603
20191231
借：财务费用				 6683715.76603
　　贷：预计负债——某核反应堆——弃置费用	 6683715.76603
已完成预计负债： 73520873.4264
20201231
借：财务费用				 7352087.34264
　　贷：预计负债——某核反应堆——弃置费用	 7352087.34264
已完成预计负债： 80872960.769
20211231
借：财务费用				 8087296.0769
　　贷：预计负债——某核反应堆——弃置费用	 8087296.0769
已完成预计负债： 88960256.8459
20221231
借：财务费用				 8896025.68459
　　贷：预计负债——某核反应堆——弃置费用	 8896025.68459
已完成预计负债： 97856282.5305
20231231
借：财务费用				 9785628.25305
　　贷：预计负债——某核反应堆——弃置费用	 9785628.25305
已完成预计负债： 107641910.784
20241231
借：财务费用				 10764191.0784
　　贷：预计负债——某核反应堆——弃置费用	 10764191.0784
已完成预计负债： 118406101.862
20251231
借：财务费用				 11840610.1862
　　贷：预计负债——某核反应堆——弃置费用	 11840610.1862
已完成预计负债： 130246712.048
20261231
借：财务费用				 13024671.2048
　　贷：预计负债——某核反应堆——弃置费用	 13024671.2048
已完成预计负债： 143271383.253
20271231
借：财务费用				 14327138.3253
　　贷：预计负债——某核反应堆——弃置费用	 14327138.3253
已完成预计负债： 157598521.578
20281231
借：财务费用				 15759852.1578
　　贷：预计负债——某核反应堆——弃置费用	 15759852.1578
已完成预计负债： 173358373.736
20291231
借：财务费用				 17335837.3736
　　贷：预计负债——某核反应堆——弃置费用	 17335837.3736
已完成预计负债： 190694211.11
20301231
借：财务费用				 19069421.111
　　贷：预计负债——某核反应堆——弃置费用	 19069421.111
已完成预计负债： 209763632.221
20311231
借：财务费用				 20976363.2221
　　贷：预计负债——某核反应堆——弃置费用	 20976363.2221
已完成预计负债： 230739995.443
20321231
借：财务费用				 23073999.5443
　　贷：预计负债——某核反应堆——弃置费用	 23073999.5443
已完成预计负债： 253813994.987
20331231
借：财务费用				 25381399.4987
　　贷：预计负债——某核反应堆——弃置费用	 25381399.4987
已完成预计负债： 279195394.486
20341231
借：财务费用				 27919539.4486
　　贷：预计负债——某核反应堆——弃置费用	 27919539.4486
已完成预计负债： 307114933.934
20351231
借：财务费用				 30711493.3934
　　贷：预计负债——某核反应堆——弃置费用	 30711493.3934
已完成预计负债： 337826427.328
20361231
借：财务费用				 33782642.7328
　　贷：预计负债——某核反应堆——弃置费用	 33782642.7328
已完成预计负债： 371609070.06
20371231
借：财务费用				 37160907.006
　　贷：预计负债——某核反应堆——弃置费用	 37160907.006
已完成预计负债： 408769977.066
20381231
借：财务费用				 40876997.7066
　　贷：预计负债——某核反应堆——弃置费用	 40876997.7066
已完成预计负债： 449646974.773
20391231
借：财务费用				 44964697.4773
　　贷：预计负债——某核反应堆——弃置费用	 44964697.4773
已完成预计负债： 494611672.25
20401231
借：财务费用				 49461167.225
　　贷：预计负债——某核反应堆——弃置费用	 49461167.225
已完成预计负债： 544072839.475
20411231
借：财务费用				 54407283.9475
　　贷：预计负债——某核反应堆——弃置费用	 54407283.9475
已完成预计负债： 598480123.423
20421231
借：财务费用				 59848012.3423
　　贷：预计负债——某核反应堆——弃置费用	 59848012.3423
已完成预计负债： 658328135.765
20431231
借：财务费用				 65832813.5765
　　贷：预计负债——某核反应堆——弃置费用	 65832813.5765
已完成预计负债： 724160949.342
20441231
借：财务费用				 72416094.9342
　　贷：预计负债——某核反应堆——弃置费用	 72416094.9342
已完成预计负债： 796577044.276
20451231
借：财务费用				 79657704.4276
　　贷：预计负债——某核反应堆——弃置费用	 79657704.4276
已完成预计负债： 876234748.703
20461231
借：财务费用				 87623474.8703
　　贷：预计负债——某核反应堆——弃置费用	 87623474.8703
已完成预计负债： 963858223.574
20471231
借：财务费用				 96385822.3574
　　贷：预计负债——某核反应堆——弃置费用	 96385822.3574
已完成预计负债： 1060244045.93
20481231
借：财务费用				 106024404.593
　　贷：预计负债——某核反应堆——弃置费用	 106024404.593
已完成预计负债： 1166268450.52
20491231
借：财务费用				 116626845.052
　　贷：预计负债——某核反应堆——弃置费用	 116626845.052
已完成预计负债： 1282895295.58
20501231
借：财务费用				 128289529.558
　　贷：预计负债——某核反应堆——弃置费用	 128289529.558
已完成预计负债： 1411184825.13
20511231
借：财务费用				 141118482.513
　　贷：预计负债——某核反应堆——弃置费用	 141118482.513
已完成预计负债： 1552303307.65
20521231
借：财务费用				 155230330.765
　　贷：预计负债——某核反应堆——弃置费用	 155230330.765
已完成预计负债： 1707533638.41
20531231
借：财务费用				 170753363.841
　　贷：预计负债——某核反应堆——弃置费用	 170753363.841
已完成预计负债： 1878287002.25
20541231
借：财务费用				 187828700.225
　　贷：预计负债——某核反应堆——弃置费用	 187828700.225
已完成预计负债： 2066115702.48
20551231
借：财务费用				 206611570.248
　　贷：预计负债——某核反应堆——弃置费用	 206611570.248
已完成预计负债： 2272727272.73
20561231
借：财务费用				 227272727.273
　　贷：预计负债——某核反应堆——弃置费用	 227272727.273
已完成预计负债： 2500000000.0

In [36]:

# 固定资产折旧：(]。当月增加的当月不计提，当月减少的当月仍计提。比较：无形资产摊销：[)。【折开摊闭】
# 双倍余额递减法不考虑预计净残值，因而在折旧到期前2年时要改为直线法（考虑净残值了）。
# 例4-4：双倍余额递减法
原价 = 120
预计使用寿命 = 5
预计净残值率 = 0.04
应提折旧01 = 原价 * 2 / 预计使用寿命  # 第一年
应提折旧02 = (原价 - 应提折旧01) * 2 / 预计使用寿命
应提折旧03 = (原价 - 应提折旧01 - 应提折旧02) * 2 / 预计使用寿命
# 末两年改用直线法
应提折旧04 = (原价 * (1 - 预计净残值率) - 应提折旧01 - 应提折旧02 - 应提折旧03) / 2
应提折旧05 = 应提折旧04
应提折旧01, 应提折旧02, 应提折旧03, 应提折旧04, 应提折旧05

Out[36]:

(48.0, 28.8, 17.28, 10.559999999999995, 10.559999999999995)

In [42]:

# 例4-5：年数总和法
应提折旧 = []
for y in range(5):
    应提折旧 += [round(原价 * (1 - 预计净残值率) * (5 - y) / sum(range(1,6)), 2)]
应提折旧

Out[42]:

[38.4, 30.72, 23.04, 15.36, 7.68]

In [46]:

# 例4-6：折旧的分录
print('20170101')
print('借：制造费用——第一生产车间\t', 7.6 + 9)
print(q+'管理费用\t\t', 13 + 4.8)
print(q+'销售费用\t\t', 6.4 + 5.26)
print(q+'贷：累计折旧\t\t', 7.6 + 9 + 13 + 4.8 + 6.4 + 5.26)

20170101
借：制造费用——第一生产车间	 16.6
　　管理费用		 17.8
　　销售费用		 11.66
　　贷：累计折旧		 46.059999999999995

In [53]:

# 例4-7：固定资产后续支出

# 20171230建成生产线
生产线建造成本 = 113.6e4
预计净残值率 = 0.03
预计使用寿命 = 6
应提折旧 = round(生产线建造成本 * (1 - 预计净残值率) / 预计使用寿命, 2)

# 2018,2019两年正常折旧
print('20181231')
print('借：制造费用\t\t', 应提折旧)
print(q+'贷：累计折旧\t\t', 应提折旧)
print('20191231')
print('借：制造费用\t\t', 应提折旧)
print(q+'贷：累计折旧\t\t', 应提折旧)

# 20200101固定资产转入改扩建
print('20200101')
print('借：在建工程——某生产线\t', 生产线建造成本 - 应提折旧 * 2)
print(q+'累计折旧\t\t', 应提折旧 * 2)
print(q+'贷：固定资产——某生产线\t', 生产线建造成本)

# 20200101-0331发生改扩建支出
改扩建支出 = 537800
print('20200331')
print('借：在建工程——某生产线\t', 改扩建支出)
print(q+'贷：银行存款\t\t', 改扩建支出)
# 20200331达到预定可使用状态
新入账价值 = 生产线建造成本 - 应提折旧 * 2 + 改扩建支出
print('借：固定资产——某生产线\t', 新入账价值)
print(q+'贷：在建工程——某生产线\t', 新入账价值)

# 20201231改扩建后首次折旧
新净残值率 = 0.03
新使用寿命 = 10  # 但这10年已经过了2.25年
应提折旧 = round(新入账价值 * (1 - 新净残值率) / (新使用寿命 - 2.25), 2)
print('20201231')
print('借：制造费用\t\t', 应提折旧 * .75)  # 2020年只剩三个季度了
print(q+'贷：累计折旧\t\t', 应提折旧 * .75)

20181231
借：制造费用		 183653.33
　　贷：累计折旧		 183653.33
20191231
借：制造费用		 183653.33
　　贷：累计折旧		 183653.33
20200101
借：在建工程——某生产线	 768693.3400000001
　　累计折旧		 367306.66
　　贷：固定资产——某生产线	 1136000.0
20200331
借：在建工程——某生产线	 537800
　　贷：银行存款		 537800
借：固定资产——某生产线	 1306493.34
　　贷：在建工程——某生产线	 1306493.34
20201231
借：制造费用		 122641.79250000001
　　贷：累计折旧		 122641.79250000001

In [58]:

# 例4-8：更换固定资产的一部分
原飞机价 = 8000e4  # 含发动机
原发动机价 = 500e4
新发动机价 = 700e4
安装费 = 5.1e4
年折旧率 = .03
已折旧年数 = 8

# 20090101转入在建工程
print('20090101')
原飞机折旧额 = 原飞机价 * 年折旧率 * 已折旧年数
print('借：在建工程——某飞机\t', 原飞机价 - 原飞机折旧额)
print(q+'累计折旧\t\t', 原飞机折旧额)
print(q+'贷：固定资产——某飞机\t', 原飞机价)
# 安装新发动机
print('借：在建工程——某飞机\t', 新发动机价 + 安装费)
print(q+'贷：工程物资——某发动机\t', 新发动机价)
print(q+q+'银行存款\t\t', 安装费)
# 终止确认原发动机账面价值，假定报废处理且无残值
原发动机账面价值 = 原发动机价 * (1 - 年折旧率 * 已折旧年数)  # 注意对原发动机也应进行折旧！
print('借：营业外支出\t\t', 原发动机账面价值)
print(q+'贷：在建工程——某飞机\t', 原发动机账面价值)
# 安装完毕，投入使用
新飞机价 = 原飞机价 - 原飞机折旧额 + 新发动机价 + 安装费 - 原发动机账面价值
print('借：固定资产——某飞机\t', 新飞机价)
print(q+'贷：在建工程——某飞机\t', 新飞机价)

20090101
借：在建工程——某飞机	 60800000.0
　　累计折旧		 19200000.0
　　贷：固定资产——某飞机	 80000000.0
借：在建工程——某飞机	 7051000.0
　　贷：工程物资——某发动机	 7000000.0
　　　　银行存款		 51000.0
借：营业外支出		 3800000.0
　　贷：在建工程——某飞机	 3800000.0
借：固定资产——某飞机	 64051000.0
　　贷：在建工程——某飞机	 64051000.0

In [59]:

# 固定资产处置的分录
# 固定资产转入清理：借：固定资产清理、累计折旧、固定资产减值准备，贷：固定资产。
# 发生清理费用：借：固定资产清理，贷：银行存款、应交税费。
# 出售收入及残料处理：借：银行存款、原材料，贷：固定资产清理、应交税费。
# 保险赔偿处理：借：其他应收款、银行存款，贷：固定资产清理。
# 清理净损失处理：借：营业外支出——处置非流动资产损失、（如自然灾害：营业外支出——非正常损失），贷：固定资产清理。
# 清理净收益处理：借：固定资产清理，贷：营业外收入。

# 持有待售固定资产的三条件：企业已有处置决议、已签不可撤销转让协议、转让将于一年内完成
# 将固定资产划为持有待售，且公允价值-处置费用<原账面价值的分录：借：资产减值损失、(公允价值-处置费用)，贷：原固定资产净值
# 持有待售的固定资产不计提折旧，按min(固定资产净值, 公允价值-处置费用)计量
# 将持有待售划回固定资产：min(划为持有待售之前的净值-假定未划应有之折旧摊销减值, 可收回金额)

# 固定资产盘盈分录：视为前期差错。借：固定资产（按重置成本确定），贷：以前年度损益调整。
#   报批后：借：以前年度损益调整，贷：应交税费——应交所得税、盈余公积、利润分配——未分配利润
# 固定资产盘亏分录：借：待处理财产损溢——待处理固定资产损溢、累计折旧、固定资产减值准备，贷：固定资产。
#   报批后：借：其他应收款、营业外支出——盘亏损失，贷：待处理财产损溢。

Ch5 无形资产¶

In [60]:

# 例5-1：购入专利权
无形资产成本 = 300 + 1 + 5  # 含相关税费及有关专业服务费
print('20170101')
print('借：无形资产——专利权\t', 无形资产成本)
print(q+'贷：银行存款\t\t', 无形资产成本)

20170101
借：无形资产——专利权	 306
　　贷：银行存款		 306

In [2]:

# 例5-2：购入的无形资产超出正常信用条件
年金 = 200
期限 = 5  # 每年末付款
折现率 = 0.05
PA = np.pv(rate=折现率, nper=期限, pmt=-1, fv=0, when='end')
总价 = 年金 * 期限

# 购入时
print('20150108')
print('借：无形资产——商标权\t', 年金 * PA)
print(q+'未确认融资费用\t', 总价 - 年金 * PA)
print(q+'贷：长期应付款\t', 总价)
剩余本金 = 年金 * PA

# 此后每一年
ys = range(2015,2020)  # years: range(start,end,step)
ds = ['1231']  # dates
for y, d in it.product(ys, ds):  # flat is better than nested
    print(str(y)+d)
    本次财务费用 = 剩余本金 * 折现率
    print('借：长期应付款\t\t', 年金)
    print(q+'贷：银行存款\t\t', 年金)
    print('借：财务费用\t\t', 本次财务费用)
    print(q+'贷：未确认融资费用\t', 本次财务费用)
    剩余本金 -= 年金 - 本次财务费用

20150108
借：无形资产——商标权	 865.895334126
　　未确认融资费用	 134.104665874
　　贷：长期应付款	 1000
20151231
借：长期应付款		 200
　　贷：银行存款		 200
借：财务费用		 43.2947667063
　　贷：未确认融资费用	 43.2947667063
20161231
借：长期应付款		 200
　　贷：银行存款		 200
借：财务费用		 35.4595050416
　　贷：未确认融资费用	 35.4595050416
20171231
借：长期应付款		 200
　　贷：银行存款		 200
借：财务费用		 27.2324802937
　　贷：未确认融资费用	 27.2324802937
20181231
借：长期应付款		 200
　　贷：银行存款		 200
借：财务费用		 18.5941043084
　　贷：未确认融资费用	 18.5941043084
20191231
借：长期应付款		 200
　　贷：银行存款		 200
借：财务费用		 9.52380952381
　　贷：未确认融资费用	 9.52380952381

In [70]:

# 例5-3：投资者投入的无形资产
商标权价格 = 500
相关税费 = 2
商标权成本 = 商标权价格 + 相关税费
print('20170101')
print('借：无形资产——商标权\t', 商标权成本)
print(q+'贷：实收资本（或股本）\t', 商标权价格)
print(q+q+'银行存款\t\t', 相关税费)

20170101
借：无形资产——商标权	 502
　　贷：实收资本（或股本）	 500
　　　　银行存款		 2

In [77]:

# 例5-4：土地使用权
土地使用权价格 = 8000
材料支出 = 12000
工资费用 = 8000
其他相关费用 = 10000
土地使用年限 = 50
厂房使用年限 = 25  # 假定两者均无净残值，不考虑税费

print('20170101')
# 支付转让价款
print('借：无形资产——土地所有权\t', 土地使用权价格)
print(q+'贷：银行存款\t\t', 土地使用权价格)
# 在土地上自行建造厂房
print('借：在建工程\t\t', 材料支出 + 工资费用 + 其他相关费用)
print(q+'贷：工程物资\t\t', 材料支出)  # 注意：进在建工程的材料费，入工程物资；否则入原材料
print(q+q+'应付职工薪酬\t', 工资费用)
print(q+q+'银行存款\t\t', 其他相关费用)
# 厂房达到预定可使用状态
厂房价值 = 材料支出 + 工资费用 + 其他相关费用
print('借：固定资产\t\t', 厂房价值)
print(q+'贷：在建工程\t\t', 厂房价值)

# 此后每一年，摊销土地使用权，折旧厂房
print('20171231')
print('借：制造费用（土地摊销）\t', 土地使用权价格 / 土地使用年限)  # 制造费用属于成本类
print('借：制造费用（厂房折旧）\t', 厂房价值 / 厂房使用年限)
print(q+'贷：累计摊销\t\t', 土地使用权价格 / 土地使用年限)
print(q+q+'累计折旧\t\t', 厂房价值 / 厂房使用年限)

20170101
借：无形资产——土地所有权	 8000
　　贷：银行存款		 8000
借：在建工程		 30000
　　贷：工程物资		 12000
　　　　应付职工薪酬	 8000
　　　　银行存款		 10000
借：固定资产		 30000
　　贷：在建工程		 30000
20171231
借：制造费用（土地摊销）	 160.0
借：制造费用（厂房折旧）	 1200.0
　　贷：累计摊销		 160.0
　　　　累计折旧		 1200.0

In [3]:

# 企业合并取得的无形资产：
#   同控吸收合并：按账面；同控控股合并：按账面。
#   非同控合并：公允价值。

# 内部研发：研究阶段全部费用化（管理费用）；开发阶段某时点达到资本化条件，此后直至无形资产达到预定用途前发生的支出总和可资本化；其后入管理费用。
# 分录：借：研发支出——（费用化支出or资本化支出），贷：原材料、银行存款、应付职工薪酬。
# 研发项目达到预定用途时：借：无形资产，贷：研发支出——资本化支出。借：管理费用，贷：研发支出——费用化支出。

In [7]:

# 例5-5
材料费 = 5000
人工工资 = 1000
其他费用 = 4000
资本化支出 = 6000
费用化支出 = 材料费 + 人工工资 + 其他费用 - 资本化支出

# 发生研发支出
print('20170101')
print('借：研发支出——费用化支出\t', 费用化支出)
print(q+'研发支出——资本化支出\t', 资本化支出)
print(q+'贷：原材料\t\t', 材料费)  # 注意：进在建工程的材料费，入工程物资；否则入原材料
print(q+q+'应付职工薪酬\t', 人工工资)
print(q+q+'银行存款\t\t', 其他费用)

# 达到预定用途
print('20171231')
print('借：管理费用\t\t', 费用化支出)
print(q+'无形资产\t\t', 资本化支出)
print(q+'贷：研发支出——费用化支出\t', 费用化支出)
print(q+q+'研发支出——资本化支出\t', 资本化支出)

20170101
借：研发支出——费用化支出	 4000
　　研发支出——资本化支出	 6000
　　贷：原材料		 5000
　　　　应付职工薪酬	 1000
　　　　银行存款		 4000
20171231
借：管理费用		 4000
　　无形资产		 6000
　　贷：研发支出——费用化支出	 4000
　　　　研发支出——资本化支出	 6000

In [12]:

# 例5-6：无形资产摊销
技术价格 = 5000  # 用于产品生产
技术使用寿命 = 10
商标权价格 = 3000
商标权使用寿命 = 15

# 取得无形资产
print('20160101')
print('借：无形资产——非专利技术\t', 技术价格)
print(q+'无形资产——商标权\t', 商标权价格)
print(q+'贷：银行存款\t\t', 技术价格 + 商标权价格)

# 正常摊销2年
print('20161231, 20171231')
print('借：制造费用——非专利技术\t', 技术价格 / 技术使用寿命)
print(q+'管理费用——商标权\t', 商标权价格 / 商标权使用寿命)
print(q+'贷：累计摊销\t\t', 技术价格 / 技术使用寿命 + 商标权价格 / 商标权使用寿命)

# 变更非专利技术剩余摊销年限为4年
技术剩余寿命 = 4
技术年摊销 = (技术价格 - 技术价格 / 技术使用寿命 * 2) / 4
print('20181231')
print('借：制造费用——非专利技术\t', 技术年摊销)
print(q+'贷：累计摊销\t\t', 技术年摊销)

20160101
借：无形资产——非专利技术	 5000
　　无形资产——商标权	 3000
　　贷：银行存款		 8000
20161231, 20171231
借：制造费用——非专利技术	 500.0
　　管理费用——商标权	 200.0
　　贷：累计摊销		 700.0
20181231
借：制造费用——非专利技术	 1000.0
　　贷：累计摊销		 1000.0

In [14]:

# 例5-7：使用寿命不确定的无形资产，持有期内不需摊销，但应每年减值测试
商标价格 = 6000
使用寿命 = 5

# 购入商标
print('20160101')
print('借：无形资产——商标权\t', 商标价格)
print(q+'贷：银行存款\t\t', 商标价格)

# 20171231进行减值测试，发生减值
商标公允价值 = 4000
print('20171231')
print('借：资产减值损失\t', 商标价格 - 商标公允价值)
print(q+'贷：无形资产减值准备——商标权\t', 商标价格 - 商标公允价值)

20160101
借：无形资产——商标权	 6000
　　贷：银行存款		 6000
20171231
借：资产减值损失	 2000
　　贷：无形资产减值准备——商标权	 2000

In [27]:

# 出售无形资产：借：银行存款、累计摊销、无形资产减值准备、营业外收入——处置非流动资产利得，贷：应交税费、无形资产、营业外支出——处置……损失。
# 例5-8
专利价格 = 1000
已摊销 = 500
已计提减值准备 = 20
出售收入 = 600
应交税费 = 36

print('20170101（情形A）')
print('借：银行存款\t\t', 出售收入)
print(q+'累计摊销\t\t', 已摊销)
print(q+'无形资产减值准备\t', 已计提减值准备)
print(q+'贷：无形资产\t\t\t\t', 专利价格)
print(q+q+'应交税费\t\t\t\t', 应交税费)
print(q+q+'营业外收入——处置非流动资产利得\t', 出售收入 + 已摊销 + 已计提减值准备 - 专利价格 - 应交税费)

# 另一种情况
出售收入 = 400
应交税费 = 24
print('20170101（情形B）')
print('借：银行存款\t\t\t', 出售收入)
print(q+'累计摊销\t\t\t', 已摊销)
print(q+'无形资产减值准备\t\t', 已计提减值准备)
print(q+'营业外支出——处置非流动资产损失\t', 专利价格 + 应交税费 -出售收入 - 已摊销 - 已计提减值准备)
print(q+'贷：无形资产\t\t\t\t', 专利价格)
print(q+q+'应交税费\t\t\t\t', 应交税费)

20170101（情形A）
借：银行存款		 600
　　累计摊销		 500
　　无形资产减值准备	 20
　　贷：无形资产				 1000
　　　　应交税费				 36
　　　　营业外收入——处置非流动资产利得	 84
20170101（情形B）
借：银行存款			 400
　　累计摊销			 500
　　无形资产减值准备		 20
　　营业外支出——处置非流动资产损失	 104
　　贷：无形资产				 1000
　　　　应交税费				 24

In [30]:

# 例5-9：出租无形资产
专利价格 = 500
摊销期限 = 10
专利技术使用费 = 100  # 10万元每件销售商品，销售了10件

# 取得该专利技术使用费
print('20170101')
print('借：银行存款\t\t', 专利技术使用费)
print(q+'贷：其他业务收入\t', 专利技术使用费)

# 摊销出租的无形资产
print('20171231')
print('借：其他业务成本\t\t', 专利价格 / 摊销期限)
print(q+'贷：累计摊销\t\t', 专利价格 / 摊销期限)

# 报废无形资产：借：累计摊销、无形资产减值准备、营业外支出，贷：无形资产。

20170101
借：银行存款		 100
　　贷：其他业务收入	 100
20171231
借：其他业务成本		 50.0
　　贷：累计摊销		 50.0

Ch6 投资性房地产¶

In [37]:

# 房企开发的销售中的普通住宅：开发产品（存货）。房企开发并经营租赁给某超市的房产：投资性房地产。
# 投资性房地产范围：持有待增值转让的土地使用权；已出租的土地或建筑物。
# 费用化的后续支出：借：其他业务成本，贷：银行存款。
# 例6-1：资本化的后续支出
厂房原价 = 2000
已计提折旧 = 600
改扩建支出 = 150  # 假定投资性房地产按成本模式计量

# 20180315转入改扩建
print(20180315)
print('借：投资性房地产——厂房（在建）\t', 厂房原价 - 已计提折旧)
print(q+'投资性房地产累计折旧\t\t', 已计提折旧)
print(q+'贷：投资性房地产——厂房\t\t', 厂房原价)

# 20180315-1210改扩建期间，不计提折旧摊销
print(20181210)
print('借：投资性房地产——厂房（在建）\t', 改扩建支出)
print(q+'贷：银行存款\t\t\t', 改扩建支出)
# 改扩建完工
厂房现价 = 厂房原价 - 已计提折旧 + 改扩建支出
print('借：投资性房地产——厂房\t\t', 厂房现价)
print(q+'贷：投资性房地产——厂房（在建）\t', 厂房现价)

20180315
借：投资性房地产——厂房（在建）	 1400
　　投资性房地产累计折旧		 600
　　贷：投资性房地产——厂房		 2000
20181210
借：投资性房地产——厂房（在建）	 150
　　贷：银行存款			 150
借：投资性房地产——厂房		 1550
　　贷：投资性房地产——厂房（在建）	 1550

In [40]:

# 例6-2
成本 = 1000
公允价值变动 = 200
厂房原价 = 成本 + 公允价值变动  # 假定投资性房地产按公允价值模式计量
改扩建支出 = 150

# 20180315转入改扩建
print(20180315)
print('借：投资性房地产——厂房（在建）\t', 厂房原价)
print(q+'贷：投资性房地产——成本\t\t', 成本)
print(q+q+'投资性房地产——公允价值变动\t', 公允价值变动)

# 20180315-1210改扩建期间，不计提折旧摊销
print(20181210)
print('借：投资性房地产——厂房（在建）\t', 改扩建支出)
print(q+'贷：银行存款\t\t\t', 改扩建支出)
# 改扩建完工
厂房现价 = 厂房原价 + 改扩建支出
print('借：投资性房地产——厂房\t\t', 厂房现价)
print(q+'贷：投资性房地产——厂房（在建）\t', 厂房现价)

20180315
借：投资性房地产——厂房（在建）	 1200
　　贷：投资性房地产——成本		 1000
　　　　投资性房地产——公允价值变动	 200
20181210
借：投资性房地产——厂房（在建）	 150
　　贷：银行存款			 150
借：投资性房地产——厂房		 1350
　　贷：投资性房地产——厂房（在建）	 1350

In [41]:

# 采用成本模式后续计量分录：借：其他业务成本，贷：投资性房地产累计折旧（摊销）。
#   取得租金收入：借：银行存款，贷：其他业务收入。
#   经减值测试后确定减值的：借：资产减值损失，贷：投资性房地产减值准备。若价值恢复，不得转回
# 采用公允价值模式后续计量，不计提折旧摊销。
#   资产负债表日，若公允价值高于其账面余额，将差额：借：投资性房地产——公允价值变动，贷：公允价值变动损益。若低于，做相反分录

In [3]:

# 例6-3：房企，投资性房地产的公允价值模式
# 20181001写字楼开发完成并出租
print(20181001)
print('借：投资性房地产——成本\t', 9000)  # A+.注意：公允价值模式下，分录“——成本”不可省略！
print(q+'贷：开发成本\t\t', 9000)  # C-

# 20181231按公允价值调整账面价值
print(20181231)
print('借：投资性房地产——公允价值变动\t', 9200 - 9000)
print(q+'贷：公允价值变动损益\t\t', 9200 - 9000)

20181001
借：投资性房地产——成本	 9000
　　贷：开发成本		 9000
20181231
借：投资性房地产——公允价值变动	 200
　　贷：公允价值变动损益		 200

In [5]:

# 房地产转换：指用途改变，而非后续计量模式改变。
# 例6-4：投资性房地产（成本）to自用房地产：科目对转
原价 = 5000
已计提折旧 = 1235
账面价值 = 原价 - 已计提折旧

print(20180801)
print('借：固定资产\t\t', 原价)
print(q+'投资性房地产累计折旧\t', 已计提折旧)
print(q+'贷：投资性房地产\t', 原价)
print(q+q+'累计折旧\t\t', 已计提折旧)

20180801
借：固定资产		 5000
　　投资性房地产累计折旧	 1235
　　贷：投资性房地产	 5000
　　　　累计折旧		 1235

In [7]:

# 例6-5：投资性房地产（公允）to自用房地产：单向冲销
原成本 = 4500
公允价值增值 = 250
原账面价值 = 原成本 + 公允价值增值
现公允价值 = 4800

print(20181015)
print('借：固定资产\t\t\t', 现公允价值)
print(q+'贷：投资性房地产——成本\t\t', 原成本)
print(q+q+'投资性房地产——公允价值变动\t', 公允价值增值)
print(q+q+'公允价值变动损益\t\t', 现公允价值 - 原账面价值)

20181015
借：固定资产			 4800
　　贷：投资性房地产——成本		 4500
　　　　投资性房地产——公允价值变动	 250
　　　　公允价值变动损益		 50

In [8]:

# 【房企】投资性房地产（成本）to存货：单向冲销
print(20181015)
print('借：开发产品\t\t', 4800)
print(q+'投资性房地产累计折旧\t', 150)
print(q+'投资性房地产减值准备\t', 50)
print(q+'贷：投资性房地产\t', 5000)

20181015
借：开发产品		 4800
　　投资性房地产累计折旧	 150
　　投资性房地产减值准备	 50
　　贷：投资性房地产	 5000

In [12]:

# 例6-6：【房企】投资性房地产（公允）to存货
原成本 = 5000
公允价值增值 = 600
原账面价值 = 原成本 + 公允价值增值
现公允价值 = 5800

print(20181015)
print('借：开发产品\t\t\t', 现公允价值)
print(q+'贷：投资性房地产——成本\t\t', 原成本)
print(q+q+'投资性房地产——公允价值变动\t', 公允价值增值)
print(q+q+'公允价值变动损益\t\t', 现公允价值 - 原账面价值)

20181015
借：开发产品			 5800
　　贷：投资性房地产——成本		 5000
　　　　投资性房地产——公允价值变动	 600
　　　　公允价值变动损益		 200

In [13]:

# 【房企】存货to投资性房地产（成本）
print(20181015)
print('借：投资性房地产\t', 4600)
print(q+'存货跌价准备\t', 200)
print(q+'贷：开发产品\t', 4800)  # 房企的存货称为开发产品，由开发成本结转而来

20181015
借：投资性房地产	 4600
　　存货跌价准备	 200
　　贷：开发产品	 4800

In [18]:

# 自用房地产to投资性房地产（成本）：科目对转
print(20181015)
print('借：投资性房地产\t\t', 5000)
print(q+'累计折旧\t\t', 300)
print(q+'固定资产减值准备\t', 100)
print(q+'贷：固定资产\t\t\t', 4600)
print(q+q+'投资性房地产累计折旧\t', 300)
print(q+q+'投资性房地产减值准备\t', 100)

20181015
借：投资性房地产		 5000
　　累计折旧		 300
　　固定资产减值准备	 100
　　贷：固定资产			 4600
　　　　投资性房地产累计折旧	 300
　　　　投资性房地产减值准备	 100

In [24]:

# 例6-7：【房企】存货to投资性房地产（公允）：非投to投&公允&公允>账面时，记入其他综合收益（权益类）
账面余额 = 45000
四月公允价值 = 47000
十二月公允价值 = 48000

print(20180415)  # 租赁期开始日
print('借：投资性房地产——成本\t', 四月公允价值)
print(q+'贷：开发产品\t\t', 账面余额)
print(q+q+'其他综合收益\t', 四月公允价值 - 账面余额)

print(20181231)
print('借：投资性房地产——公允价值变动\t', 十二月公允价值 - 四月公允价值)
print(q+'贷：公允价值变动损益\t\t', 十二月公允价值 - 四月公允价值)

20180415
借：投资性房地产——成本	 47000
　　贷：开发产品		 45000
　　　　其他综合收益	 2000
20181231
借：投资性房地产——公允价值变动	 1000
　　贷：公允价值变动损益		 1000

In [26]:

# 例6-8：自用房地产to投资性房地产（公允）
原价 = 50000
已提折旧 = 14250
公允价值 = 35000

print(20181030)  # 租赁期开始日
print('借：投资性房地产——成本\t', 公允价值)
print(q+'公允价值变动损益\t', 原价 - 已提折旧 - 公允价值)
print(q+'累计折旧\t\t', 已提折旧)
print(q+'贷：固定资产\t\t', 原价)

20181030
借：投资性房地产——成本	 35000
　　公允价值变动损益	 750
　　累计折旧		 14250
　　贷：固定资产		 50000

In [31]:

# 处置投资性房地产（成本）：借：银行存款，贷：其他业务收入、应交税费——应交增值税（销项税额）。借：其他业务成本、投折旧减值，贷：投资性房地产。
# 例6-9：处置投资性房地产（公允）

# 【房企】存货to投资性房地产
print(20170415)
print('借：投资性房地产——成本\t', 47000)  # 公允
print(q+'贷：开发产品\t\t', 45000)  # 账面
print(q+q+'其他综合收益\t', 2000)

# 公允价值变动
print(20171231)
print('借：投资性房地产——公允价值变动\t', 1000)
print(q+'贷：公允价值变动损益\t\t', 1000)

# 出售
print(20180601)
print('借：银行存款\t\t', 55000)
print(q+'公允价值变动损益\t', 1000)
print(q+'其他综合收益\t\t', 2000)
print(q+'其他业务成本\t\t', 45000)  # 投房的账面余额
print(q+'贷：投资性房地产——成本\t\t', 47000)
print(q+q+'投资性房地产——公允价值变动\t', 1000)
print(q+q+'其他业务收入\t\t', 55000)  # 不考虑税费

20170415
借：投资性房地产——成本	 47000
　　贷：开发产品		 45000
　　　　其他综合收益	 2000
20171231
借：投资性房地产——公允价值变动	 1000
　　贷：公允价值变动损益		 1000
20180601
借：银行存款		 55000
　　公允价值变动损益	 1000
　　其他综合收益		 2000
　　其他业务成本		 45000
　　贷：投资性房地产——成本		 47000
　　　　投资性房地产——公允价值变动	 1000
　　　　其他业务收入		 55000

Ch7 金融资产¶

In [14]:

# 分四类：公允入当、持有至到期投资、贷款应收款、可供出售金融资产。
# 公允入当金融资产：交易性or指定为公允入当。禁止重分类进出。

# 例7-1
# 购入股票
print(20170513)
print('借：交易性金融资产——成本\t', 10 * 10e4)  # 注意，已宣未发的股利要从股票价格里扣除，所余才计入成本
print(q+'应收股利\t\t', .6 * 10e4)
print(q+'投资收益\t\t', 1000)  # 交易费用直入损益
print(q+'贷：银行存款\t\t', 10.6 * 10e4 + 1000)

# 收到现金股利
print(20170523)
print('借：银行存款\t\t', .6 * 10e4)
print(q+'贷：应收股利\t\t', .6 * 10e4)

# 股价上涨确认
print(20170630)
print('借：交易性金融资产——公允价值变动\t', (13 - 10) * 10e4)
print(q+'贷：公允价值变动损益\t\t', (13 - 10) * 10e4)

# 售出股票
print(20170815)
print('借：银行存款\t\t', 15 * 10e4)
print(q+'公允价值变动损益\t',(13 - 10) * 10e4)  # 此为冲零项目，因为所有损益都要转入投资收益
print(q+'贷：交易性金融资产——成本\t\t', 10 * 10e4)
print(q+q+'交易性金融资产——公允价值变动\t', 3 * 10e4)
print(q+q+'投资收益\t\t\t\t', (15+13-10-10-3) * 10e4)  # 此为倒挤项

20170513
借：交易性金融资产——成本	 1000000.0
　　应收股利		 60000.0
　　投资收益		 1000
　　贷：银行存款		 1061000.0
20170523
借：银行存款		 60000.0
　　贷：应收股利		 60000.0
20170630
借：交易性金融资产——公允价值变动	 300000.0
　　贷：公允价值变动损益		 300000.0
20170815
借：银行存款		 1500000.0
　　公允价值变动损益	 300000.0
　　贷：交易性金融资产——成本		 1000000.0
　　　　交易性金融资产——公允价值变动	 300000.0
　　　　投资收益				 500000.0

In [24]:

# 例7-3
PA1 = np.pv(rate=.08, nper=5, pmt=-1, fv=0, when='end')  # numpy.pv(rate, nper, pmt, fv=0.0, when='end')
PA2 = np.pv(rate=.12, nper=5, pmt=-1, fv=0, when='end')
PF1 = np.pv(rate=.08, nper=5, pmt=0, fv=-1, when='end')
PF2 = np.pv(rate=.12, nper=5, pmt=0, fv=-1, when='end')
PV1 = PA1 * 59 + PF1 * 1250
PV2 = PA2 * 59 + PF2 * 1250

# 内插法，单变量求解实际利率r
import scipy.optimize as so
def f(x):  # 预备求解形如f(x)=0的方程组
    x1 = x[0]
    return [(PV1-1000)/(PV1-PV2) - (.08-x1)/(.08-.12)]  # 返回误差
result = so.fsolve(f, [.08])  # 第二参数为未知向量x的初始值
print ('the result is', result)
print ('the error is', f(result))

the result is [ 0.10100579]
the error is [-1.1102230246251565e-16]

In [34]:

市价 = 1000  # 含交易费用
面值 = 1250
票面年息 = 59  # 票面年利率4.72%
实际利率 = .1  # 上Cell求得

# 购入债券
print(20100101)
print('借：持有至到期投资——成本\t\t', 面值)
print(q+'贷：银行存款\t\t\t', 市价)
print(q+q+'持有至到期投资——利息调整\t', 面值 - 市价)

# 收到利息等
摊余成本 = 面值 - (面值 - 市价)  # 即持有至到期投资的借方余额
本期投资收益 = 摊余成本 * 实际利率
print(20101231)
print('借：应收利息\t\t\t', 票面年息)
print(q+'持有至到期投资——利息调整\t', 本期投资收益 - 票面年息)
print(q+'贷：投资收益\t\t\t', 本期投资收益)
print('借：银行存款\t\t\t', 票面年息)
print(q+'贷：应收利息\t\t\t', 票面年息)

# 2011-2014 收到利息等
ys = range(2011,2015)  # years: range(start,end,step)
ds = ['1231']  # dates
for y, d in it.product(ys, ds):  # flat is better than nested
    print(str(y)+d)
    摊余成本 += 本期投资收益 - 票面年息
    本期投资收益 = 摊余成本 * 实际利率
    print('借：应收利息\t\t\t', 票面年息)
    print(q+'持有至到期投资——利息调整\t', 本期投资收益 - 票面年息)  # 末期该数应用调整尾差法倒挤得出
    print(q+'贷：投资收益\t\t\t', 本期投资收益)
    print('借：银行存款\t\t\t', 票面年息)
    print(q+'贷：应收利息\t\t\t', 票面年息)

# 2014 收回本金
print('借：银行存款\t\t\t', 面值)
print(q+'贷：持有至到期投资——成本\t', 面值)

20100101
借：持有至到期投资——成本		 1250
　　贷：银行存款			 1000
　　　　持有至到期投资——利息调整	 250
20101231
借：应收利息			 59
　　持有至到期投资——利息调整	 41.0
　　贷：投资收益			 100.0
借：银行存款			 59
　　贷：应收利息			 59
20111231
借：应收利息			 59
　　持有至到期投资——利息调整	 45.10000000000001
　　贷：投资收益			 104.10000000000001
借：银行存款			 59
　　贷：应收利息			 59
20121231
借：应收利息			 59
　　持有至到期投资——利息调整	 49.61
　　贷：投资收益			 108.61
借：银行存款			 59
　　贷：应收利息			 59
20131231
借：应收利息			 59
　　持有至到期投资——利息调整	 54.570999999999984
　　贷：投资收益			 113.57099999999998
借：银行存款			 59
　　贷：应收利息			 59
20141231
借：应收利息			 59
　　持有至到期投资——利息调整	 60.02809999999998
　　贷：投资收益			 119.02809999999998
借：银行存款			 59
　　贷：应收利息			 59
借：银行存款			 1250
　　贷：持有至到期投资——成本	 1250

In [44]:

# 若上例中，所购债券为到期一次还本付息且利息不计复利
实际利率 = np.rate(nper=5, pmt=0, pv=-市价, fv=票面年息*5+面值, when='end')
实际利率 = .0905  # 课本用近似值

# 购入债券
print(20100101)
print('借：持有至到期投资——成本\t\t', 面值)
print(q+'贷：银行存款\t\t\t', 市价)
print(q+q+'持有至到期投资——利息调整\t', 面值 - 市价)

# 2010 收到利息等
摊余成本 = 面值 - (面值 - 市价)  # 即持有至到期投资的借方余额
本期投资收益 = 摊余成本 * 实际利率
print(20101231)
print('借：持有至到期投资——应计利息\t', 票面年息)  # 注意：一次付的利息只能进持有至到期投资——应计利息这一科目！
print(q+'持有至到期投资——利息调整\t', 本期投资收益 - 票面年息)
print(q+'贷：投资收益\t\t\t', 本期投资收益)

# 2011-2014 收到利息等
ys = range(2011,2015)  # years: range(start,end,step)
ds = ['1231']  # dates
for y, d in it.product(ys, ds):  # flat is better than nested
    print(str(y)+d)
    摊余成本 += 本期投资收益  # 即持有至到期投资的借方余额
    本期投资收益 = 摊余成本 * 实际利率
    print('借：持有至到期投资——应计利息\t', 票面年息)
    print(q+'持有至到期投资——利息调整\t', 本期投资收益 - 票面年息)  # 末期该数应用调整尾差法倒挤得出
    print(q+'贷：投资收益\t\t\t', 本期投资收益)

# 2014 收回本金
print('借：银行存款\t\t\t', 面值 + 票面年息 * 5)
print(q+'贷：持有至到期投资——成本\t', 面值)
print(q+q+'持有至到期投资——应计利息\t', 票面年息 * 5)

20100101
借：持有至到期投资——成本		 1250
　　贷：银行存款			 1000
　　　　持有至到期投资——利息调整	 250
20101231
借：持有至到期投资——应计利息	 59
　　持有至到期投资——利息调整	 31.5
　　贷：投资收益			 90.5
20111231
借：持有至到期投资——应计利息	 59
　　持有至到期投资——利息调整	 39.69024999999999
　　贷：投资收益			 98.69024999999999
20121231
借：持有至到期投资——应计利息	 59
　　持有至到期投资——利息调整	 48.621717625
　　贷：投资收益			 107.621717625
20131231
借：持有至到期投资——应计利息	 59
　　持有至到期投资——利息调整	 58.361483070062505
　　贷：投资收益			 117.3614830700625
20141231
借：持有至到期投资——应计利息	 59
　　持有至到期投资——利息调整	 68.98269728790316
　　贷：投资收益			 127.98269728790316
借：银行存款			 1545
　　贷：持有至到期投资——成本	 1250
　　　　持有至到期投资——应计利息	 295

In [50]:

# 持有至到期投资的转换（即非豁免性售出后的剩余部分重分类）
# 例7-4
# 出售持有至到期债券的10%
print(20170401)
print('借：银行存款\t\t\t', 120)
print(q+'贷：持有至到期投资——成本\t', 1000 * .1)
print(q+q+'投资收益\t\t\t', 120 - 1000 * .1)
# 前述非豁免性售出后，剩余的持有至到期债券必须重分类
print('借：可供出售金融资产\t\t', 120 / .1 * .9)
print(q+'贷：持有至到期投资——成本\t', 1000 * .9)
print(q+q+'其他综合收益\t\t', 120 / .1 * .9 - 1000 * .9)
  # 因"持有至to可供"属于金融资产类别内的转换，不视为出售，故只能先进权益，而不能直接进损益

# 全部出售
print(20170423)
print('借：银行存款\t\t', 1180)
print(q+'其他综合收益\t\t', 120 / .1 * .9 - 1000 * .9)
print(q+'贷：可供出售金融资产\t', 120 / .1 * .9)
print(q+q+'投资收益\t\t', 1180 + 120 / .1 * .9 - 1000 * .9 - 120 / .1 * .9)

20170401
借：银行存款			 120
　　贷：持有至到期投资——成本	 100.0
　　　　投资收益			 20.0
借：可供出售金融资产		 1080.0
　　贷：持有至到期投资——成本	 900.0
　　　　其他综合收益		 180.0
20170423
借：银行存款		 1180
　　其他综合收益		 180.0
　　贷：可供出售金融资产	 1080.0
　　　　投资收益		 280.0

In [11]:

# 例7-5：可供出售金融资产的会计处理
面值 = 1000
市价 = 1028.244
票面年息 = 1000 * .04
实际利率 = .03

# 购入债券
print(20170101)
print('借：可供出售金融资产——成本\t\t', 面值)
print(q+'可供出售金融资产——利息调整\t', 市价 - 面值)
print(q+'贷：银行存款\t\t\t', 市价)

# 收到利息
摊余成本 = 面值 - (面值 - 市价)  # 即可供的借方余额
本期投资收益 = 摊余成本 * 实际利率
print(20171231)
print('借：应收利息\t\t\t', 票面年息)
print(q+'贷：投资收益\t\t\t', 本期投资收益)
print(q+q+'可供出售金融资产——利息调整\t', 票面年息 - 本期投资收益)
print('借：银行存款\t\t\t', 票面年息)
print(q+'贷：应收利息\t\t\t', 票面年息)
# 公允价值变动
新市价 = 1000.094
摊余成本 += 本期投资收益 - 票面年息
print('借：其他综合收益\t', 摊余成本 - 新市价)
print(q+'贷：可供出售金融资产——公允价值变动\t', 摊余成本 - 新市价)

20170101
借：可供出售金融资产——成本		 1000
　　可供出售金融资产——利息调整	 28.243999999999915
　　贷：银行存款			 1028.244
20171231
借：应收利息			 40.0
　　贷：投资收益			 30.847319999999996
　　　　可供出售金融资产——利息调整	 9.152680000000004
借：银行存款			 40.0
　　贷：应收利息			 40.0
借：其他综合收益	 18.99731999999983
　　贷：可供出售金融资产——公允价值变动	 18.99731999999983

In [7]:

# 例7-6：金融资产转移
# 出售应收账款，不附追索权：终止确认
print(20140604)
print('借：银行存款\t', 263250)
print(q+'其他应收款\t', 23400)  # 预计会发生的销售退回，依然要作为资产挂账
print(q+'营业外支出\t', 351000 - 263250 - 23400)  # 倒挤项
print(q+'贷：应收账款\t', 351000)  # 含增值税销项税额

# 收到退回商品
print(20140803)
print('借：主营业务收入\t\t\t\t', 20000)  # 抵减
print(q+'应交税费——应交增值税（销项税额）\t', 3400)  # 销项税额是负债类科目，这里在借方，属于抵减
print(q+'贷：其他应收款\t\t\t', 23400)
print('借：库存商品\t\t\t\t', 13000)  # 按成本入账
print(q+'贷：主营业务成本\t\t\t', 13000)

20140604
借：银行存款	 263250
　　其他应收款	 23400
　　营业外支出	 64350
　　贷：应收账款	 351000
20140803
借：主营业务收入				 20000
　　应交税费——应交增值税（销项税额）	 3400
　　贷：其他应收款			 23400
借：库存商品				 13000
　　贷：主营业务成本			 13000

In [11]:

# 例7-7
# 销售实现
print(20170101)
print('借：应收票据\t\t\t\t', 234000)
print(q+'贷：主营业务收入\t\t\t', 200000)
print(q+q+'应交税费——应交增值税（销项税额）\t', 34000)  # 销项税额是负债类科目

# 如果应收票据正常到期兑付
print(20170401)
print('借：银行存款\t', 234000)
print(q+'贷：应收票据\t', 234000)

# 如果提前向银行贴现该票据，且银行拥有追索权：不符合终止确认，应将贴现所得确认为一项金融负债
print(20170201)
print('借：银行存款\t\t', 231660)
print(q+'短期借款——利息调整\t', 234000 - 231660)  # 倒挤项。该项应在票据贴现期间按实际利率法确认为利息费用
print(q+'贷：短期借款——成本\t', 234000)

20170101
借：应收票据				 234000
　　贷：主营业务收入			 200000
　　　　应交税费——应交增值税（销项税额）	 34000
20170401
借：银行存款	 234000
　　贷：应收票据	 234000
20170201
借：银行存款		 231660
　　短期借款——利息调整	 2340
　　贷：短期借款——成本	 234000

In [9]:

# 例7-8：继续涉入
本金 = 10000
摊余成本 = 10000
票面利率 = .1
实际利率 = .1

收到款项 = 9115
转移本金 = 9000
保留本金 = 本金 - 转移本金
贷款公允价值 = 10100
转移本利 = 贷款公允价值 * .9
使保留的权利次级化所取得的对价 = 收到款项 - 转移本利
超额利差公允价值 = 40
信用增级相关对价 = 使保留的权利次级化所取得的对价 + 超额利差公允价值

# 金融资产转移日
print(20170101)
print('借：存放同业\t\t', 收到款项)  # 9115=9090+25, 9090=9000+90
print(q+'继续涉入资产——次级权益\t', 保留本金)
print(q+'继续涉入资产——超额账户\t', 超额利差公允价值)
print(q+'贷：贷款\t\t', 转移本金)  # 贷款是甲银行的资产类，贷方为减少
print(q+q+'继续涉入负债\t', 保留本金 + 信用增级相关对价)  # 1065=1000+65, 65=25+40
print(q+q+'其它业务收入\t', 转移本利 - 转移本金)  # 甲银行该项金融资产转移形成的利得

# 转移后，甲银行应采用实际利率法将信用增级相关对价65w分期确认；此外还应在资产负债表日确认可能发生的减值损失
已转移贷款发生信用损失 = 300
print(20171231)
print('借：资产减值损失\t\t', 已转移贷款发生信用损失)
print(q+'贷：贷款损失准备——次级权益\t', 已转移贷款发生信用损失)
print('借：继续涉入负债\t\t', 已转移贷款发生信用损失)  # 冲减负债
print(q+'贷：继续涉入资产——次级权益\t', 已转移贷款发生信用损失)  # 冲减资产

20170101
借：存放同业		 9115
　　继续涉入资产——次级权益	 1000
　　继续涉入资产——超额账户	 40
　　贷：贷款		 9000
　　　　继续涉入负债	 1065.0
　　　　其它业务收入	 90.0
20171231
借：资产减值损失		 300
　　贷：贷款损失准备——次级权益	 300
借：继续涉入负债		 300
　　贷：继续涉入资产——次级权益	 300

Ch9 资产减值¶

In [2]:

# 存货：资产负债表日，按成本与可变现净值孰低计量。
# 例9-2
print(20180630)
print('借：存货跌价准备\t\t', 75)  # 跌价准备的转回，注意不应超过最初计提总额
print(q+'贷：资产减值损失——存货减值损失\t', 75)

20180630
借：存货跌价准备	 75
　　贷：资产减值损失——存货减值损失	 75

In [8]:

# 例9-5
# XYZ银行发放贷款
print(20150101)
print('借：贷款——本金\t', 1500)
print(q+'贷：吸收存款\t', 1500)
# 按时收到利息
print(20151231)
print('借：应收利息\t', 150)
print(q+'贷：利息收入\t', 150)
print('借：存放同业\t', 150)
print(q+'贷：应收利息\t', 150)

# 20161231 出现减值
print(20161231)
print('借：应收利息\t', 150)
print(q+'贷：利息收入\t', 150)

贷款本金 = 1500
应收未收利息 = 150  # 截至目前，只有20161231这一期是应收未收的
未确认减值前摊余成本 = 贷款本金 + 应收未收利息  # 即“贷款”这一科目的借方余额
新预计未来现金流量现值 = 976.66
应确认贷款减值损失 = 未确认减值前摊余成本 - 新预计未来现金流量现值
print(20161231)
print('借：资产减值损失\t\t', 应确认贷款减值损失)
print(q+'贷：贷款损失准备\t', 应确认贷款减值损失)
print('借：贷款——已减值\t\t', 未确认减值前摊余成本)  # 这是将原贷款全部结转入“已减值”这一新的二级科目中
print(q+'贷：贷款——本金\t', 贷款本金)
print(q+q+'应收利息\t\t', 应收未收利息)

# 20171231 按上述现金流估计
实际利率 = .1
确认减值回转 = 新预计未来现金流量现值 * 实际利率
print(20171231)
print('借：贷款损失准备\t\t', 确认减值回转)
print(q+'贷：利息收入\t\t', 确认减值回转)
摊余成本 = 新预计未来现金流量现值 + 确认减值回转

# 20181231 按上述现金流估计，但实际只收到900w
实际收到现金 = 900
print(20181231)
print('借：贷款损失准备\t\t', 摊余成本 * 实际利率)
print(q+'贷：利息收入\t\t', 摊余成本 * 实际利率)  # 该分录同20171231
print('借：存放同业\t\t', 实际收到现金)
print(q+'贷：贷款——已减值\t', 实际收到现金)
摊余成本 += 摊余成本 * 实际利率 - 实际收到现金
预计现值 = 181.82  # 181.82为新现值，即说明出现了新的损失
print('借：资产减值损失\t\t', 摊余成本 - 预计现值)
print(q+'贷：贷款损失准备\t', 摊余成本 - 预计现值)

# 20191231 上述预估现金流发生变化，预计本年将收回150w*.9091（即新的预计现值），实际收到100w
实际收到现金 = 100
摊余成本 = 预计现值 * (1 + 实际利率) - 实际收到现金  # 用老的预计现值181.82计算
print(20191231)
print('借：贷款损失准备\t\t', 预计现值 * 实际利率)
print(q+'贷：利息收入\t\t', 预计现值 * 实际利率)
print('借：存放同业\t\t', 实际收到现金)
print(q+'贷：贷款——已减值\t', 实际收到现金)

预计现值 = 150 * .9091  # 此为新的预计现值
print('借：贷款损失准备\t\t', 预计现值 - 摊余成本)
print(q+'贷：资产减值损失\t', 预计现值 - 摊余成本)  # 由于实际收到现金100高于预计现值，贷款损失出现了转回

# 20201231 银行将贷款结算，实收现金200w
print(20201231)
print('借：贷款损失准备\t\t', 预计现值 * 实际利率)
print(q+'贷：利息收入\t\t', 预计现值 * 实际利率)
print('借：存放同业\t\t', 200)
print(q+'贷款损失准备\t\t', 673.34 - 97.666 - 107.4326 + 99.9386 - 18.182 - 36.363 - 预计现值 * 实际利率)
print(q+'贷：贷款——已减值\t\t', 1650 - 900 - 100)  # 清空前面分录所有的“贷款——已减值”科目
print(q+q+'资产减值损失\t\t', 49.9985)  # 倒挤项

20150101
借：贷款——本金	 1500
　　贷：吸收存款	 1500
20151231
借：应收利息	 150
　　贷：利息收入	 150
借：存放同业	 150
　　贷：应收利息	 150
20161231
借：应收利息	 150
　　贷：利息收入	 150
20161231
借：资产减值损失		 673.34
　　贷：贷款损失准备	 673.34
借：贷款——已减值		 1650
　　贷：贷款——本金	 1500
　　　　应收利息		 150
20171231
借：贷款损失准备		 97.666
　　贷：利息收入		 97.666
20181231
借：贷款损失准备		 107.43260000000001
　　贷：利息收入		 107.43260000000001
借：存放同业		 900
　　贷：贷款——已减值	 900
借：资产减值损失		 99.93860000000001
　　贷：贷款损失准备	 99.93860000000001
20191231
借：贷款损失准备		 18.182
　　贷：利息收入		 18.182
借：存放同业		 100
　　贷：贷款——已减值	 100
借：贷款损失准备		 36.363
　　贷：资产减值损失	 36.363
20201231
借：贷款损失准备		 13.636500000000002
　　贷：利息收入		 13.636500000000002
借：存放同业		 200
　　贷款损失准备		 499.9985
　　贷：贷款——已减值		 650
　　　　资产减值损失		 49.9985

In [12]:

# 例9-6
# 20150101 购入债券
print(20150101)
print('借：可供出售金融资产——成本\t', 100 * 10000)
print(q+'贷：银行存款\t\t', 100 * 10000)

# 20151231 确认利息，确认公允价值变动（本期变动为0，故不作账务处理）
票息 = .03 * 100 * 10000
print(20151231)
print('借：应收利息\t\t', 票息)
print(q+'贷：投资收益\t\t', 票息)
print('借：银行存款\t\t', 票息)
print(q+'贷：应收利息\t\t', 票息)

# 20161231 确认利息（当年仍可支付），确认减值损失（因预计该债券公允价值将“持续”下跌）
print(20161231)
print('借：应收利息\t\t', 票息)
print(q+'贷：投资收益\t\t', 票息)
print('借：银行存款\t\t', 票息)
print(q+'贷：应收利息\t\t', 票息)
减值 = (100 - 80) * 10000
print('借：资产减值损失\t\t\t\t', 减值)
print(q+'贷：可供出售金融资产——公允价值变动\t', 减值)

# 20171231 确认利息，确认减值损失的转回
应确认利息收入 = 80 * 10000 * .03
print(20171231)
print('借：应收利息\t\t\t', 票息)
print(q+'贷：投资收益\t\t\t', 应确认利息收入)  # 投资收益 = 摊余成本 * 实际利率；然后倒挤利息调整
print(q+q+'可供出售金融资产——利息调整\t', 票息 - 应确认利息收入)
print('借：银行存款\t\t\t', 票息)
print(q+'贷：应收利息\t\t\t', 票息)

减值转回前摊余成本 = 80 * 10000 - (票息 - 应确认利息收入)  # 即可供的借方余额
新公允价值 = 95 * 10000
应转回额 = 新公允价值 - 减值转回前摊余成本
print('借：可供出售金融资产——公允价值变动\t', 应转回额)
print(q+'贷：资产减值损失\t\t', 应转回额)

20150101
借：可供出售金融资产——成本	 1000000
　　贷：银行存款		 1000000
20151231
借：应收利息		 30000.0
　　贷：投资收益		 30000.0
借：银行存款		 30000.0
　　贷：应收利息		 30000.0
20161231
借：应收利息		 30000.0
　　贷：投资收益		 30000.0
借：银行存款		 30000.0
　　贷：应收利息		 30000.0
借：资产减值损失				 200000
　　贷：可供出售金融资产——公允价值变动	 200000
20171231
借：应收利息			 30000.0
　　贷：投资收益			 24000.0
　　　　可供出售金融资产——利息调整	 6000.0
借：银行存款			 30000.0
　　贷：应收利息			 30000.0
借：可供出售金融资产——公允价值变动	 156000.0
　　贷：资产减值损失		 156000.0

In [2]:

# 例9-10
print('借：资产减值损失——固定资产减值损失\t', 5035)
print(q+'贷：固定资产减值准备\t\t', 5035)

借：资产减值损失——固定资产减值损失	 5035
　　贷：固定资产减值准备		 5035

Ch10 负债和所有者权益¶

CASE 5: FINANCIAL & COST MANAGEMENT¶

Ch1 财务管理基本原理¶

In [2]:

# 股票价值=F（公司价值）=F（G（公司理财行为））。
# 资本市场有效性的检验：
# 弱式：股价不受历史价格、交易量等历史信息的影响；技术分析无用。
#   1.随机游走模型：corr(ret, ret.shift(n)) -> 0。
#   2.过滤检验：按某交易规则买卖股票之ret=ret(buy&hold;).
# 半强式：股价不受公开信息（财报、各类公告等）影响；基本分析无用。
#   1.事件研究：定义abret_t=r_t-r_mkt_t, 若半强式，则t时刻披露的事件只应与r_t有关，而与abret_t-1, abret_t+1均无关。
#   2.投资基金表现研究：半强式有效市场的技术分析、基本分析、各种估价模型均无效，投资基金无法取得超额收益。
# 强式：股价不受内部信息的影响；内幕信息无用。
#   1.主要考察内幕信息获得者（大股东、董监高等）参与交易时无法获得超额收益。

Ch2 财务报表分析和财务预测¶

In [3]:

# 财报分析目的：战略（宏观行业竞争）、会计（灵活恰当修正）、财务（比率现金流）、前景（预测估值）。
# 财报分析方法：比较分析法、因素分析法。
#   1.比较分析法：
#     按对象分：a趋势（vs历史），b横向比较（vs行业平均、对标企业），c预算差异（实际vs计划预算）。
#     按内容分：a总量（A,E,NI,时序分析），b结构百分比（如以收入为100%；发现显著问题），c财务比率（相对数比较）。
#   2.因素分析法=连环替代法：定对象、定驱动因素、定替代顺序、按序计算各因素之影响。

In [8]:

# 例2-1：因素分析法
材料费用实际 = 6720
材料费用计划 = 5400
# 材料费用 = 产品产量 * 材料单耗（单位产品材料耗用量） * 材料单价
print('计划', 120 * 9 * 5, '实际', 140 * 8 * 6, '第一替代', 140 * 9 * 5, '第二替代', 140 * 8 * 5, '第三替代', 140 * 8 * 6)

计划 5400 实际 6720 第一替代 6300 第二替代 5600 第三替代 6720

In [16]:

# 短期偿债能力比率
流动资产末 = 700  # 货币资金、公允入当金融资产、应收票据、应收账款、预付账款、应收利息、应收股利、其他应收款、存货、一年到期非流、其他流动
流动负债末 = 300  # 短期借款、公允入当金融负债、应付票据、应付账款、预收账款、应付职工薪酬、应交税费、应付利息、应付股利、其他应付款、
                 # 一年到期非流、其他流动
流动资产初 = 610
流动负债初 = 220
print('本年营运资本', 流动资产末 - 流动负债末)
print('上年营运资本', 流动资产初 - 流动负债初)

非流动资产 = 1300  # 可供出售金融资产、持有至到期投资、长期应收款、长期股权投资、固定资产、在建工程、固定资产清理、无形资产、开发支出、
                  # 商誉、长期待摊费用、递延所得税资产、其他非流动资产
非流动负债 = 740   # 长期借款、应付债券、长期应付款、专项应付款、预计负债、递延所得税负债、其他非流动负债
股东权益 = 960     # 股本、资本公积、其他综合收益、盈余公积、未分配利润

# 营运资本 = 长期资本 - 长期资产，这里：长期资本=E+非流D，长期资产=非流A. 即营运成本是长期资本用于流动资产的部分
print('长期资本', 股东权益 + 非流动负债)
print('长期资产', 非流动资产)
print('本年营运资本', 股东权益 + 非流动负债 - 非流动资产)

print('本年流动比率', 流动资产末 / 流动负债末)
print('上年流动比率', 流动资产初 / 流动负债初)
# 传统认为生产企业流动比率宜>=2. 营业周期越短，流动比率越低。

货币资金 = 44
公允入当金融资产 = 0  # 即交易性金融资产
应收票据 = 20
应收账款 = 398
预付账款 = 22
应收利息 = 0
应收股利 = 0
其他应收款 = 12
存货 = 119
一年到期非流资产 = 77
其他流动资产 = 8

速动资产 = 货币资金+公允入当金融资产+应收票据+应收账款+应收利息+应收股利+其他应收款  # 速动=货币金融应收
print('本年速动比率', 速动资产 / 流动负债末)
print('本年现金比率', 货币资金 / 流动负债末)

经营活动现金流量净额 = 323  # 来自SCF
print('现金流量比率', 经营活动现金流量净额 / 流动负债末)  # 分母应为期末值，因为实际需要偿还的是期末金额

本年营运资本 400
上年营运资本 390
长期资本 1700
长期资产 1300
本年营运资本 400
本年流动比率 2.3333333333333335
上年流动比率 2.772727272727273
本年速动比率 1.58
本年现金比率 0.14666666666666667
现金流量比率 1.0766666666666667

In [23]:

# 长期偿债能力比率
# 总债务存量比率
总资产 = 2000
总负债 = 1040
股东权益 = 960
print('资产负债率', 总负债 / 总资产)
# 传统认为资产负债率宜<50%.
print('产权比率', 总负债 / 股东权益)
print('权益乘数', 总资产 / 股东权益)

长期资本 = 股东权益 + 非流动负债
print('长期资本负债率', 非流动负债 / 长期资本)

# 总债务流量比率
净利润 = 136
利息费用 = 110  # 利息费用 = IS财务费用 + BS计入固定资产等成本的资本化利息
所得税费用 = 64
息税前利润 = 净利润 + 利息费用 + 所得税费用
print('利息保障倍数', 息税前利润 / 利息费用)
# 传统认为利息保障倍数宜>1.
print('现金流量利息保障倍数', 经营活动现金流量净额 / 利息费用)
print('现金流量与负债比率', 经营活动现金流量净额 / 总负债)  # 分母应为期末值，因为实际需要偿还的是期末金额

资产负债率 0.52
产权比率 1.0833333333333333
权益乘数 2.0833333333333335
长期资本负债率 0.43529411764705883
利息保障倍数 2.8181818181818183
现金流量利息保障倍数 2.9363636363636365
现金流量与负债比率 0.3105769230769231

In [2]:

# 杜邦：ROE = a * b * c = 营业净利率 * 总资产周转次数 * 权益乘数 = NI/rev * rev/A * A/E.
# 普通股股东权益 = 股东权益 - 优先股清算价值 - 优先股拖欠股息。

# 管理用财务报表：
#   A = 经营A + 金融A = （经流A+经长A）+（短金A+长金A），D类似。
#   净经营资产 = 经营A - 经营D = 经营营运资本 + 净经营性长期资产, where 经营营运资本 = 经流A-经流D, 净经营性长期资产 = 经长A - 经长D.
#   净金融负债 = 金融D - 金融A =: 净负债
#   净经营资产 = 净负债 + E =: 净投资资本
#   净利润 = 经营损益 + 金融损益 = 税后经营净利润 - 税后利息费用 = 税前经营利润 * (1-T) - 利息费用 * (1-T)
#   来源看：经营现金流量 =: 实体现金流量 = 营业现金净流量 - 资本支出, where 营业现金净流量 = 营业现金（毛）流量 - δ经营营运资本,
#     营业现金（毛）流量 = 税后经营净利润 + 折旧摊销, 资本支出（即现金购买固定资产金额） = δ净经营长期资产 + 折旧摊销.
#   去向看：营业现金毛流量 - 经营营运资本增加 - 资本支出 = 债务CF + 股权CF, 实体CF = 融资CF.
#   则：ROE = 净经营资产净利率 + （净经营资产净利率 - 税后利息率）*净财务杠杆 = 净经营资产净利率 + 杠杆贡献率,
#     where 杠杆贡献率 = 经营差异率 * 净财务杠杆, 经营差异率 = 净经营资产净利率 - 税后利息率, 净财务杠杆 = 净负债 / 股东权益.

In [5]:

# 财务预测
#   融资总需求 = 净经营资产之预计值 - 基期值
#   融资优先顺序：1.现存可动用的金融资产 2.预计今年增加的RE 3.增加金融负债 4.增加股本
#   增加的RE = S0*(1+g)*a1*(1-e1), where S0*(1+g)=预计营业收入, a1=计划税后营业净利率, e1=股利支付率

# 内含增长率：只靠内部积累（增加RE）实现的销售增长率
# 假设可动用金融资产=0，且经营资产销售比f1、经营负债销售比f2不变，则：
#   外部融资额M = S0*g*(f1-f2)-S0*(1+g)*a1*(1-e1), where a1=预计税后经营净利率
#   外部融资销售增长比m = M/(S0*g) = f1-f2-(1+g)/g*a1*(1-e1).
#   若m=0, 可由上式求得内含增长率g.

# 可持续增长率：不发新股、不改变营业净利率a,总资产周转次数b,期末总资产期末权益乘数c,利润留存率d时，所能达到的销售增长率g
# 根据期初股东权益E0: g = δE% = RE/E0 = abc0d, where c0:=A1/E0. Note: E1-E0=RE.
# 根据期末股东权益E1: g = RE/(E1-RE) = abcd/(1-abcd).

Ch3 价值评估基础¶

In [1]:

# 利率期限结构理论：预期（长短期债券完全替代）、分割市场（完全独立）、流动性溢价（部分替代）、期限优先（对某期限的券特别偏好）。
# 市场利率r = r* + riskprem = r*(纯粹无风险利率) + inflationprem + defaultriskprem + liqriskprem + MRP(期限riskprem).
#   MRP期限riskprem: 债券面临持续期内r上升导致价格下跌的风险，因此给予的补偿。

# 复利终值系数F/P = (1+i)^n.
# （普通）年金终值系数F/A = 1/i * ((1+i)^n -1)，偿债基金系数 A/F.
# （普通）年金现值系数P/A = 1/i * (1- (1+i)^(-n))，投资回收系数 A/P.
# 预付年金终值系数(F0/A,i,n) = (F/A,i,n+1) -1.
# 预付年金现值系数(P0/A,i,n) = (P/A,i,n-1) +1.
# 递延年金现值：(F/A,i,n)*(P/F,i,m) = (P/A,i,m+n)-(P/A,i,m).
# 永续年金现值：A/i.

In [22]:

# 样本方差：s_x = 1/(n-1) * ((x1-xbar)**2 + ... + (xn-xbar)**2).
# 两种证券的协方差：cov_xy = 1/(n-1) * ((x1-xbar)(y1-ybar) + ... + (xn-xbar)(yn-ybar)) = r_xy * s_x * s_y.

In [2]:

# 例3-12：证券组合有效集（基于三种证券）
%matplotlib inline
r1 = .1  # ret
s1 = .12  # sigma
r2 = .18
s2 = .2
r3 = .23
s3 = .3
corr = [.2, .3, .5]
# w1 = .5  # weight of total asset invested in 1

w = []
r = []
s = []
wspace = np.linspace(0, 1, num=101, endpoint=True)
for w1, w2 in it.product(wspace, wspace):  # `num=` is inclusive of beg and fin
    w3 = 1 - w1 - w2
    w += [[w1, w2]]
    r += [r1 * w1 + r2 * w2 + r3 * w3]
    s += [np.sqrt(w1**2 * s1**2 + w2**2 * s2**2 + 2*w1*w2*s1*s2*corr[0] + w3**2 * s3**2 + 2*w1*w3*s1*s3*corr[1] + 2*w2*w3*s2*s3*corr[2])]

sbest = min(s)
rbest = r[np.array(s).argmin()]
wbest = w[np.array(s).argmin()]
print('wbest:',wbest,'sbest:',sbest,'rbest:',rbest)

fig, ax1 = plt.subplots(figsize=[16,10])
ax1.plot(s, r)
# ss = np.array(s)[np.array(s)>=.12]  # only keep the >=.12 part of refline
# ax1.plot(ss, ss-.02)  # refline
ax1.axvline(x=sbest, color='r', linestyle='--')  # add a vertical line
ax1.axhline(y=rbest, color='r', linestyle='--')  # add a horizontal line
ax1.set_aspect('equal', adjustable='box')  # `ax` may be changed to `plt.gca()`
# axarr[1,0].axvspan(sd2, ed2, alpha=1, color='0.618')  # alpha(0-1) for transparency. Cf. matplotlib.patches.Polygon for kwargs
#   # Gray shades can be given as `color=` a string encoding a float in the 0-1 range

wbest: [0.80000000000000004, 0.23999999999999999] sbest: 0.110634533488 rbest: 0.114

In [129]:

# deserted, no meaning
# Cf.http://glowingpython.blogspot.jp/2013/02/visualizing-tangent.html
import scipy.interpolate as si
df = pd.DataFrame({'s':s,'r':r}).groupby('s').max().reset_index()  # get max(r) for each s
# tck, u = si.splprep([s,r], s=0)  # si.splprep is for 3D curves and above
tck = si.splrep(df.s, df.r)  # si.splrep is for 2D curves (curves in 2D plane) only. Treat r=f(s)
  # tck:a tuple (vector of knots, B-spline coefs, degree of spline). u: array of paravals.

i = 100
fs = si.splev(df.s[i], tck, der=0)     # f(s)
fprime = si.splev(df.s[i], tck, der=1)  # f'(s). `der=`: order of derivative of the spline to compute
sspace = np.linspace(0, df.s[i], num=101)
tans = fs + fprime * (sspace - df.s[i])  # tangent
# # plt.plot(s[i], fs, 'om', sspace, tans, '--r')
# # plt.plot(s,r)

fig, ax1 = plt.subplots()
ax1.plot(df.s, df.r)
ax1.plot(df.s[i], fs, 'om', sspace, tans, '--r')
# # ax1.axvline(x=sbest, color='r', linestyle='--')  # add a vertical line
# # ax1.axhline(y=rbest, color='r', linestyle='--')  # add a horizontal line
# ax1.set_aspect('equal', adjustable='box')  # `ax` may be changed to `plt.gca()`

Out[129]:

[,
 ]

Ch4 资本成本¶

In [19]:

# 基于17国开04的测算
PA = np.pv(rate=.039479, nper=.8411, pmt=-1, fv=0, when='end')  # numpy.pv(rate, nper, pmt, fv=0.0, when='end')
PF = np.pv(rate=.039479, nper=.8411, pmt=0, fv=-1, when='end')
PA, PF, PA*3.2+PF*100, np.rate(nper=.8411, pmt=3.2, pv=-99.3749, fv=100, when='end')  # pv应是净价，即不含应计利息的价格

Out[19]:

(0.81163353991697706,
 0.96795751947761766,
 99.392979275496103,
 0.039703283004816389)

In [21]:

# 例4-1, 4-3
np.rate(nper=22, pmt=1000*.07, pv=-900, fv=1000, when='end'), \
np.rate(nper=30, pmt=1000*.1, pv=-1000*(1-.01), fv=1000, when='end')  # 注意发行费仅在PV中扣除

Out[21]:

(0.079786673536307043, 0.10107027503315716)

In [19]:

# 例4-8：单变量求解rS
import scipy.optimize as so
def f(x):  # 预备求解形如f(x)=0的方程组
    x1 = x[0]
    return [2.1800/(1+x1) + 2.3544/(1+x1)**2 + 2.5192/(1+x1)**3 + 2.6704/(1+x1)**4 + 2.8039/(x1-.05)/(1+x1)**4 - 23]  # 返回误差
result = so.fsolve(f, [.1])  # 第二参数为未知向量x的初始值
print ('the result is', result)
print ('the error is', f(result))

the result is [ 0.14952723]
the error is [7.1054273576010019e-15]

CASE 6: PRETTIFYING¶

In [26]:

%lsmagic

Out[26]:

Available line magics:
%alias  %alias_magic  %autocall  %automagic  %autosave  %bookmark  %cd  %clear  %cls  %colors  %config  %connect_info  %copy  %ddir  %debug  %dhist  %dirs  %doctest_mode  %echo  %ed  %edit  %env  %gui  %hist  %history  %killbgscripts  %ldir  %less  %load  %load_ext  %loadpy  %logoff  %logon  %logstart  %logstate  %logstop  %ls  %lsmagic  %macro  %magic  %matplotlib  %mkdir  %more  %notebook  %page  %pastebin  %pdb  %pdef  %pdoc  %pfile  %pinfo  %pinfo2  %popd  %pprint  %precision  %profile  %prun  %psearch  %psource  %pushd  %pwd  %pycat  %pylab  %qtconsole  %quickref  %recall  %rehashx  %reload_ext  %ren  %rep  %rerun  %reset  %reset_selective  %rmdir  %run  %save  %sc  %set_env  %store  %sx  %system  %tb  %time  %timeit  %unalias  %unload_ext  %who  %who_ls  %whos  %xdel  %xmode

Available cell magics:
%%!  %%HTML  %%SVG  %%bash  %%capture  %%cmd  %%debug  %%file  %%html  %%javascript  %%js  %%latex  %%perl  %%prun  %%pypy  %%python  %%python2  %%python3  %%ruby  %%script  %%sh  %%svg  %%sx  %%system  %%time  %%timeit  %%writefile

Automagic is ON, % prefix IS NOT needed for line magics.

In [27]:

%env OMP_NUM_THREADS=4  # set environment variable using ipython magic

env: OMP_NUM_THREADS=4

In [30]:

%who int  # list all ints in this notebook (of the parts already executed)

ed	 sd	 win

In [37]:

%%timeit  # run 1e5 times, take avg; cell magic
1+2

Wall time: 0 ns

In [41]:

%timeit np.random.normal(size=100)  # % is line magic, %% is cell magic

6.78 µs ± 177 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

In [5]:

# %pdb  # debug choice 1

def pick_and_take():
    picked = np.random.randint(0, 1000)
    raise NotImplementedError()

pick_and_take()

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
 in ()
      5     raise NotImplementedError()
      6 
----> 7 pick_and_take()

 in pick_and_take()
      3 def pick_and_take():
      4     picked = np.random.randint(0, 1000)
----> 5     raise NotImplementedError()
      6 
      7 pick_and_take()

NotImplementedError:

In [6]:

%debug  # debug choice 2

NOTE: Enter 'c' at the ipdb>  prompt to continue execution.
> (2)()

ipdb> picked
*** NameError: name 'picked' is not defined
ipdb> exit

In [3]:

%config InlineBackend.figure_format = 'retina'
x = range(1000)
y = [i ** 2 for i in x]
plt.plot(x,y);  # semicolon can suppress the output of the function on a final line\
# plt.show()

In [14]:

!dir *.csv

 驱动器 E 中的卷是 710
 卷的序列号是 000B-FDA6

 E:\BDSync\K3\Pythoning\py4fi-master-codes\ipython 的目录

2017/03/03  14:09         1,240,306 cons.csv
2017/03/03  14:09         1,007,836 consret.csv
2017/03/31  14:29             9,782 Guerry.csv
2017/03/03  11:26           718,179 idxwgt.csv
2017/03/02  16:34             8,627 mer.csv
2017/03/03  15:25            24,281 out.csv
2017/03/02  21:08            10,751 sz50.csv
               7 个文件      3,019,762 字节
               0 个目录 318,848,057,344 可用字节

Test a reference to a figure: \ref{fig:testsqaure}. Test a reference to an equation: \ref{eqfs}. \begin{equation} P(A \mid B) = \frac{P(B \mid A) \, P(A)}{P(B)} \end{equation}

In [ ]:

%load_ext fortranmagic

%%fortran
subroutine compute_fortran(x, y, z)
    real, intent(in) :: x(:), y(:)
    real, intent(out) :: z(size(x, 1))

    z = sin(x + y)

end subroutine compute_fortran

In [ ]:

compute_fortran([1, 2, 3], [4, 5, 6])

In [29]:

import os
from IPython.display import display, Image
names = [f for f in os.listdir('./images/') if f.endswith('.png')]
for name in names[:5]:
    display(Image('./images/' + name, width=300))

In [ ]:

# # saving data using np.savez, so that computationally expensive results is retained
# recalc = False

# if recalc==True:
#     ### create data1, data2, ... arrays

#     outfile = open(data_dir+"data1_data2.npz","w")
#     np.savez(outfile, data1=data1, data2=data2)
#     outfile.close()
# else:
#     infile = open(data_dir+"data1_data2.npz")
#     npzfile = np.load(infile)
#     data1 = npzfile["data1"]
#     data2 = npzfile["data2"]
#     infile.close()

\begin{equation} \label{eqfs} E = F \cdot s \end{equation}

In [8]:

%alias?

Justyre Insight

Miscellaneous

Edward Justyre

CASE 1: MISCELLANEOUS¶

Ch1 MAX DRAWDOWN¶

Ch2 ITERATION¶

Ch3 SOLVING EQUATIONS¶

Ch4 DATABASE CONNECTION¶

Ch5 PANDAS DATETIME RESAMPLING¶

CASE 2: LINEAR REGRESSION - ORDINARY LEAST SQUARES¶

CASE 3: AREA BETWEEN THREE ARCS¶

CASE 4: ACCOUNTING¶

Ch1 总论¶

Ch2 会计政策和会计估计及其变更¶

Ch3 存货¶

Ch4 固定资产¶

Ch5 无形资产¶

Ch6 投资性房地产¶

Ch7 金融资产¶

Ch9 资产减值¶

Ch10 负债和所有者权益¶

CASE 5: FINANCIAL & COST MANAGEMENT¶

Ch1 财务管理基本原理¶

Ch2 财务报表分析和财务预测¶

Ch3 价值评估基础¶

Ch4 资本成本¶

CASE 6: PRETTIFYING¶

Comments