Skip to content

Latest commit

 

History

History
93 lines (65 loc) · 2.66 KB

File metadata and controls

93 lines (65 loc) · 2.66 KB

DEPENDENCIES

MODIFIER COMMANDS

converts a string index of date/time to a datetime format

df.index = pd.to_datetime(df.index)

correlation provides insight between the compatibility between two series; typically used with '% Change'

# '% Change' converts the series in the df to provide a more accurate correlation
pct_df = df.pct_change()

correlation = pct_df['Column_Name_1'].corr(pct_df['Column_Name_2'])

preform a linear regression using pandas

pd.ols(y, x)

preform a single linear regression using statsmodels

import statsmodels.api as sm
# regression boilerplate = sm.OLS(y, x).fit()

# compute % changes in the series
pct_df = df.pct_change()

# add a constant to the df for the regression intercept
pct_df = sm.add_constant(df)

# delete the row of NaN (created when % change is calculated)
pct_df = pct_df.dropna()

# run the regression
results = sm.OLS(pct_df['Column_Name_1'], df[['const', 'Column_Name_2']]).fit()

# 'summary' provides an in depth analysis; review the 'coef' and 'R-squared' information 
print(results.summary())

perform a linear regression using statsmodels

import statsmodels.formula.api as smf

# the first arguement in OLS is a formula that specifies the variables in regression
results = smf.ols('predict_column ~ informing_column', data=df).fit()

# if using more than one column to inform regression, just add to formula
results = smf.ols('predict_column ~ informing_column + additional_column', data=df).fit()

# params provides analysis 
results.params

autocorrelation is the correleation of a single times series with a lagged copy of itself

pct_df = df.pct_change()

# autocorrelation also known as 'serial correlation' or 'lag-one' autocorrelation
autocorrelation = df['Column_Name'].autocorr()

# negative autocorrelation = mean reverting
# positive autocorrelation = trend-following or momentum

provides autocorrelation function (ACF) numerical values instead of the plot

from statsmodels.tsa.stattools import acf

# x= series or array
acf_values = acf(x)

calculates and plots the sample autocorrelation function (ACF), which shows different lags (not just 'lag-one')

from statsmodels.graphics.tsaplots import plot_acf

# x= series or array
# lags= # of lags the autocorrelation function will be plotted 
# aplha= width of confidence interval (0-1), alpha=1 removes confidence interval
plot_acf(x, lags=num, alpha=0.05)

# any significant non-zero autocorrelations imply that the series can be forecast from the past
plt.show()