Forecast stock prices python

ARIMA Forecasting with Python

In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score
import seaborn as sns
In [2]:
######################################## Import Data From Quandl API
##########################################################
#import quandl
#import pandas as pd
#import xlsxwriter
##mydata = quandl.get_table('ZACKS/FC', ticker='AAPL')
#quandl.ApiConfig.api_key = "XXXXXXXXXXXXXX" ########## Register and
generate a key ##############
#mydata = quandl.get("FRED/GDP")
#excelfile= r'dailyd2.xlsx'
#workbook = xlsxwriter.Workbook(excelfile)
#worksheet1 = workbook.add_worksheet('daily')
#bold = workbook.add_format({'bold': True})
#row = 0
#worksheet1.write(row,0,'Date')
#mydata = quandl.get('WIKI/AAPL', start_date="2017-01-01", end_date="2017-0
8-25")
#daterange = mydata.index ############# Save the date range ##############
#
#for i in daterange:
# row = row+1
# worksheet1.write(row,0,i)
#data = pd.read_csv(r'0825_quandl_ticks2.csv') ########### Load the list of
tickers #############
#tlist = data['Ticker']
#col = 0
#for tl in tlist:
# try:
# mydata = quandl.get('WIKI/'+tl, start_date="2017-01-01", end_date=
"2017-08-25")
# if len(mydata)-1 == len(daterange):
# row = 0
# col = col+1
# worksheet1.write(row,col,tl,bold)
# for dr in daterange:
# row = row+1
# for i,j in mydata['Adj. Close'].iteritems():
# if dr==i:
# if dr==i:
# worksheet1.write(row,col,j)
# print(tl,j)
# except Exception as e:
# print(e)
#workbook.close()
In [3]:
######## Load Data #########
data = pd.read_excel(r'C:ModelDatadailyd2.xlsx', index_col='Date', parse_
dates=True)
In [4]:
################ Calculate and plot volatility #####################
df_stat = pd.DataFrame(columns = ['std','mean','normalized_std'])
df_stat[['std','mean','normalized_std']] = pd.DataFrame([data.std(),data.m
ean(),data.std()/data.mean()]).T
df_stat.sort_values('normalized_std')
plt.title('Normalized Standard Deviation')
plt.hist(df_stat['normalized_std'])
fig = plt.gcf()
fig.set_size_inches(14.5, 5.5)
plt.show()
In [5]:
######### Based on the graph above filter out stocks with high variance
and low returns#############
df_fltr = df_stat[df_stat['normalized_std']<.15]
df = data[df_fltr.index]
tkl = [x for x in df if (df[x][-1]-df[x][0])/df[x][0]>=.25 ]
df = df[tkl]
fltr_tk = []
for tk in df:
Y = df[tk].as_matrix()
X = range(len(df.index))
X = sm.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()
if results.params[1] > .01: ########## Filter on slope coefficient from
Regression Model #########
Regression Model #########
fltr_tk.append(tk)
df.plot(y=[tk])
plt.title('Daily '+tk+' Stock Prices 2017-01-01 to 2017-07-31')
plt.legend(loc='upper left')
plt.axvspan('2017-08-01','2017-08-25', color='green', alpha=0.25)
fig = plt.gcf()
fig.set_size_inches(16.5, 4.5)
plt.show()
Forecast stock prices python
Forecast stock prices python
Forecast stock prices python
Forecast stock prices python
Forecast stock prices python
In [6]:
################ PLot Trend and Seasonality #########################
for tk in fltr_tk:
df_d = df[tk]
adj_index = pd.date_range(df_d.index[0], periods=len(df), freq='D')
df_d.index = adj_index
decomposition = seasonal_decompose(df_d)
trend = decomposition.trend
seasonal = decomposition.seasonal
seasonal.index = df.index
trend.index = df.index
df_d.index = df.index
plt.subplot(1,2,1)
plt.plot(trend,label='Trend')
plt.title(tk)
plt.xticks(rotation=90)
plt.legend(loc='best')
plt.subplot(1,2,2)
plt.plot(seasonal,label='Seasonality')
plt.title(tk)
plt.xticks(rotation=90)
plt.legend(loc='best')
fig = plt.gcf()
fig.set_size_inches(16.5, 4.5)
plt.show()
Forecast stock prices python
Forecast stock prices python
Forecast stock prices python
Forecast stock prices python
Forecast stock prices python
Forecast stock prices python
In [7]:
############### Run ARIMA Model ###################
store = {}
for tk in fltr_tk:
train = df[tk][0:-20]
test = df[tk][len(train):]
ap = 99
ad = 99
aq = 99
amape = 99
af = []
for p in range(10):
for q in range(10):
for d in range(2):
try:
model = ARIMA(train, order=(p, d, q)).fit()
predict = model.forecast(len(test))
fcst=predict[0]
mapelist = []
for i in range(len(fcst)):
mapelist.insert(i, (np.absolute(test[i] - fcst[i])) /
test[i])
mape = np.mean(mapelist) * 100
mape = round(mape,2)
except:
mape = 9999
pass
if amape > mape:
amape = mape
ap = p
ad = d
aq = q
af= fcst
store[tk] = af
plt.plot(train)
plt.plot(test,label='Actual')
plt.plot(test.index,af,label='Predicted')
fig = plt.gcf()
fig.set_size_inches(16.5, 4.5)
plt.title(str(tk)+"_"+"MAPE"+"_"+str(amape)+"_"+"Order"+"_"+"("+str(ap)+
str(ad)+str(aq)+")")
plt.legend(loc='best')
plt.show()
Forecast stock prices python
Forecast stock prices python
Forecast stock prices python
Forecast stock prices python
Forecast stock prices python
In [8]:
###### Correlation HeatMap #######
corr = df[fltr_tk].corr(method='pearson', min_periods=1).abs()
ax = sns.heatmap(corr);
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
ax.set_title('PortFolio Diversity HeatMap');
plt.show()
In [9]:
##### Test model accuracy #####
eval_metrcs = pd.DataFrame(columns = ['Actual','Predicted','Actual_Growth',
'Predicted_Growth'],index = fltr_tk)
for tk in fltr_tk:
train = df[tk][0:-20]
test = df[tk][len(train):]
if (train[-1] >= test[-1] and train[-1] >= store[tk][-1]):
eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 0
eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 0
eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1
] - train[-1])/train[-1]
eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (stor
e[tk][-1] - train[-1])/train[-1]
elif (train[-1] < test[-1] and train[-1] < store[tk][-1]):
eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 1
eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 1
eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1
] - train[-1])/train[-1]
eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (stor
e[tk][-1] - train[-1])/train[-1]
elif (train[-1] >= test[-1] and train[-1] < store[tk][-1]):
eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 0
eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 1
eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1
] - train[-1])/train[-1]
eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (stor
e[tk][-1] - train[-1])/train[-1]
elif (train[-1] < test[-1] and train[-1] >= store[tk][-1]):
eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 1
eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 0
eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1
] - train[-1])/train[-1]
eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (stor
e[tk][-1] - train[-1])/train[-1]
eval_metrcs
In [10]:
Out[9]:
Actual Predicted Actual_Growth Predicted_Growth
HPQ 1 0 0.00520562 -0.017592
BGCP 1 1 0.0344432 0.0274154
BLDR 0 1 -0.0562743 0.0407847
CZR 0 0 -0.113725 -0.0310849
CAMP 0 0 -0.0433884 -0.0357415
CPN 1 1 0.0288124 0.0241479
CWST 0 0 -0.0268692 -0.0257216
CENX 1 0 0.00892359 -0.21757
COHU 0 0 -0.0932919 -0.131128
DAR 1 0 0.0654545 -0.0141167
FOE 1 1 0.0202128 0.00680254
GSM 1 1 0.0460829 0.0259386
GTN 0 0 -0.0562914 -0.100722
IXYS 0 0 -0.0859599 -0.0192517
MDCA 0 0 -0.00980392 -0.00131282
MTOR 1 1 0.0772311 0.0496162
MGI 0 0 -0.0295567 -0.0605303
MYE 1 1 0.0314286 0.031333
NYT 0 0 -0.085213 -0.023136
ORBC 0 0 -0.0545145 -0.0124334
PNK 1 1 0.00455005 0.0167042
RDNT 1 1 0.251613 0.0372675
SEM 1 1 0.0617284 0.0590196
TGH 1 0 0.0125786 -0.0381664
USAP 0 0 -0.0281081 -0.0258438
NLY 1 1 0.0403361 0.0491873
FOR 0 1 0 0.000157287
FORM 1 1 0.0792453 0.0899497
RSO 1 1 0.0264188 0.0305759
GAIA 0 0 -0.0214592 -0.0252127
In [10]:
model_accuracy =
accuracy_score(eval_metrcs['Actual'].astype(int),eval_metrcs['Predicted'].a
stype(int))
model_accuracy
Out[10]:
0.80000000000000004

Recomendados

FYCS_DAA_PRACTICAL_MANUAL (1).docx von
FYCS_DAA_PRACTICAL_MANUAL (1).docxFYCS_DAA_PRACTICAL_MANUAL (1).docx
FYCS_DAA_PRACTICAL_MANUAL (1).docxonlmcq
168 views25 Folien
C programming von
C programmingC programming
C programmingSamsil Arefin
157 views23 Folien
Array von
ArrayArray
ArraySamsil Arefin
226 views13 Folien
Lec21-CS110 Computational Engineering von
Lec21-CS110 Computational EngineeringLec21-CS110 Computational Engineering
Lec21-CS110 Computational EngineeringSri Harsha Pamu
251 views19 Folien
Aggregate Function - Database von
Aggregate Function - DatabaseAggregate Function - Database
Aggregate Function - DatabaseShahadat153031
2.5K views13 Folien
Pnno von
PnnoPnno
Pnnoshristichaudhary4
36 views27 Folien

Más contenido relacionado

Was ist angesagt?

COMPUTER SCIENCE CLASS 12 PRACTICAL FILE von
COMPUTER SCIENCE CLASS 12 PRACTICAL FILECOMPUTER SCIENCE CLASS 12 PRACTICAL FILE
COMPUTER SCIENCE CLASS 12 PRACTICAL FILEAnushka Rai
6.6K views34 Folien
Data Visualization — Le funzionalità matematiche di Sage per la visualizzazio... von
Data Visualization — Le funzionalità matematiche di Sage per la visualizzazio...Data Visualization — Le funzionalità matematiche di Sage per la visualizzazio...
Data Visualization — Le funzionalità matematiche di Sage per la visualizzazio...Andrea Lazzarotto
922 views31 Folien
multiple linear regression von
multiple linear regressionmultiple linear regression
multiple linear regressionAkhilesh Joshi
654 views25 Folien
SupportVectorRegression von
SupportVectorRegressionSupportVectorRegression
SupportVectorRegressionDaniel K
167 views6 Folien
NCCU: Statistics in the Criminal Justice System, R basics and Simulation - Pr... von
NCCU: Statistics in the Criminal Justice System, R basics and Simulation - Pr...NCCU: Statistics in the Criminal Justice System, R basics and Simulation - Pr...
NCCU: Statistics in the Criminal Justice System, R basics and Simulation - Pr...The Statistical and Applied Mathematical Sciences Institute
183 views6 Folien
Berlin meetup von
Berlin meetupBerlin meetup
Berlin meetupWiem Zine Elabidine
651 views61 Folien

Was ist angesagt?(20)

COMPUTER SCIENCE CLASS 12 PRACTICAL FILE von Anushka Rai
COMPUTER SCIENCE CLASS 12 PRACTICAL FILECOMPUTER SCIENCE CLASS 12 PRACTICAL FILE
COMPUTER SCIENCE CLASS 12 PRACTICAL FILE
Anushka Rai6.6K views
Data Visualization — Le funzionalità matematiche di Sage per la visualizzazio... von Andrea Lazzarotto
Data Visualization — Le funzionalità matematiche di Sage per la visualizzazio...Data Visualization — Le funzionalità matematiche di Sage per la visualizzazio...
Data Visualization — Le funzionalità matematiche di Sage per la visualizzazio...
Andrea Lazzarotto922 views
SupportVectorRegression von Daniel K
SupportVectorRegressionSupportVectorRegression
SupportVectorRegression
Daniel K167 views
The num py_library_20200818 von Haim Michael
The num py_library_20200818The num py_library_20200818
The num py_library_20200818
Haim Michael121 views
Odoo - From v7 to v8: the new api von Odoo
Odoo - From v7 to v8: the new apiOdoo - From v7 to v8: the new api
Odoo - From v7 to v8: the new api
Odoo61.8K views
Tweaking the interactive grid von Roel Hartman
Tweaking the interactive gridTweaking the interactive grid
Tweaking the interactive grid
Roel Hartman1.4K views
Atomically { Delete Your Actors } von John De Goes
Atomically { Delete Your Actors }Atomically { Delete Your Actors }
Atomically { Delete Your Actors }
John De Goes2.1K views

Similar a Forecast stock prices python

Question- Upload your Python notebook where the last cell shows a plot.docx von
Question- Upload your Python notebook where the last cell shows a plot.docxQuestion- Upload your Python notebook where the last cell shows a plot.docx
Question- Upload your Python notebook where the last cell shows a plot.docxadam234567
13 views2 Folien
import numpy as np import matplotlib-pyplot as plt import random # Loa (1).pdf von
import numpy as np import matplotlib-pyplot as plt import random # Loa (1).pdfimport numpy as np import matplotlib-pyplot as plt import random # Loa (1).pdf
import numpy as np import matplotlib-pyplot as plt import random # Loa (1).pdfasarudheen07
5 views2 Folien
Final project kijtorntham n von
Final project kijtorntham nFinal project kijtorntham n
Final project kijtorntham nNatsarankornKijtornt
27 views61 Folien
Assignment 5.2.pdf von
Assignment 5.2.pdfAssignment 5.2.pdf
Assignment 5.2.pdfdash41
4 views7 Folien
QMC: Undergraduate Workshop, Tutorial on 'R' Software - Yawen Guan, Feb 26, 2... von
QMC: Undergraduate Workshop, Tutorial on 'R' Software - Yawen Guan, Feb 26, 2...QMC: Undergraduate Workshop, Tutorial on 'R' Software - Yawen Guan, Feb 26, 2...
QMC: Undergraduate Workshop, Tutorial on 'R' Software - Yawen Guan, Feb 26, 2...The Statistical and Applied Mathematical Sciences Institute
48 views14 Folien
Regression and Classification with R von
Regression and Classification with RRegression and Classification with R
Regression and Classification with RYanchang Zhao
3.8K views45 Folien

Similar a Forecast stock prices python(20)

Question- Upload your Python notebook where the last cell shows a plot.docx von adam234567
Question- Upload your Python notebook where the last cell shows a plot.docxQuestion- Upload your Python notebook where the last cell shows a plot.docx
Question- Upload your Python notebook where the last cell shows a plot.docx
adam23456713 views
import numpy as np import matplotlib-pyplot as plt import random # Loa (1).pdf von asarudheen07
import numpy as np import matplotlib-pyplot as plt import random # Loa (1).pdfimport numpy as np import matplotlib-pyplot as plt import random # Loa (1).pdf
import numpy as np import matplotlib-pyplot as plt import random # Loa (1).pdf
asarudheen075 views
Assignment 5.2.pdf von dash41
Assignment 5.2.pdfAssignment 5.2.pdf
Assignment 5.2.pdf
dash414 views
Regression and Classification with R von Yanchang Zhao
Regression and Classification with RRegression and Classification with R
Regression and Classification with R
Yanchang Zhao3.8K views
Interactive financial analytics with vix(cboe) von Aiden Wu, FRM
Interactive financial analytics with vix(cboe)Interactive financial analytics with vix(cboe)
Interactive financial analytics with vix(cboe)
Aiden Wu, FRM48 views
Naive application of Machine Learning to Software Development von Andriy Khavryuchenko
Naive application of Machine Learning to Software DevelopmentNaive application of Machine Learning to Software Development
Naive application of Machine Learning to Software Development
Forecasting Revenue With Stationary Time Series Models von Geoffery Mullings
Forecasting Revenue With Stationary Time Series ModelsForecasting Revenue With Stationary Time Series Models
Forecasting Revenue With Stationary Time Series Models
Geoffery Mullings344 views
Efficient equity portfolios using mean variance optimisation in R von Gregg Barrett
Efficient equity portfolios using mean variance optimisation in REfficient equity portfolios using mean variance optimisation in R
Efficient equity portfolios using mean variance optimisation in R
Gregg Barrett282 views
you need to complete the r code and a singlepage document c.pdf von adnankhan605720
you need to complete the r code and a singlepage document c.pdfyou need to complete the r code and a singlepage document c.pdf
you need to complete the r code and a singlepage document c.pdf
adnankhan6057202 views
intro_to_python_20150825 von Shung-Hsi Yu
intro_to_python_20150825intro_to_python_20150825
intro_to_python_20150825
Shung-Hsi Yu181 views
Company segmentation - an approach with R von Casper Crause
Company segmentation - an approach with RCompany segmentation - an approach with R
Company segmentation - an approach with R
Casper Crause28 views

Último

PRIVACY AWRE PERSONAL DATA STORAGE von
PRIVACY AWRE PERSONAL DATA STORAGEPRIVACY AWRE PERSONAL DATA STORAGE
PRIVACY AWRE PERSONAL DATA STORAGEantony420421
5 views56 Folien
OECD-Persol Holdings Workshop on Advancing Employee Well-being in Business an... von
OECD-Persol Holdings Workshop on Advancing Employee Well-being in Business an...OECD-Persol Holdings Workshop on Advancing Employee Well-being in Business an...
OECD-Persol Holdings Workshop on Advancing Employee Well-being in Business an...StatsCommunications
5 views26 Folien
Ukraine Infographic_22NOV2023_v2.pdf von
Ukraine Infographic_22NOV2023_v2.pdfUkraine Infographic_22NOV2023_v2.pdf
Ukraine Infographic_22NOV2023_v2.pdfAnastosiyaGurin
1.4K views3 Folien
SAP-TCodes.pdf von
SAP-TCodes.pdfSAP-TCodes.pdf
SAP-TCodes.pdfmustafaghulam8181
10 views285 Folien
Organic Shopping in Google Analytics 4.pdf von
Organic Shopping in Google Analytics 4.pdfOrganic Shopping in Google Analytics 4.pdf
Organic Shopping in Google Analytics 4.pdfGA4 Tutorials
16 views13 Folien
Survey on Factuality in LLM's.pptx von
Survey on Factuality in LLM's.pptxSurvey on Factuality in LLM's.pptx
Survey on Factuality in LLM's.pptxNeethaSherra1
7 views9 Folien

Último(20)

PRIVACY AWRE PERSONAL DATA STORAGE von antony420421
PRIVACY AWRE PERSONAL DATA STORAGEPRIVACY AWRE PERSONAL DATA STORAGE
PRIVACY AWRE PERSONAL DATA STORAGE
antony4204215 views
OECD-Persol Holdings Workshop on Advancing Employee Well-being in Business an... von StatsCommunications
OECD-Persol Holdings Workshop on Advancing Employee Well-being in Business an...OECD-Persol Holdings Workshop on Advancing Employee Well-being in Business an...
OECD-Persol Holdings Workshop on Advancing Employee Well-being in Business an...
Ukraine Infographic_22NOV2023_v2.pdf von AnastosiyaGurin
Ukraine Infographic_22NOV2023_v2.pdfUkraine Infographic_22NOV2023_v2.pdf
Ukraine Infographic_22NOV2023_v2.pdf
AnastosiyaGurin1.4K views
Organic Shopping in Google Analytics 4.pdf von GA4 Tutorials
Organic Shopping in Google Analytics 4.pdfOrganic Shopping in Google Analytics 4.pdf
Organic Shopping in Google Analytics 4.pdf
GA4 Tutorials16 views
Survey on Factuality in LLM's.pptx von NeethaSherra1
Survey on Factuality in LLM's.pptxSurvey on Factuality in LLM's.pptx
Survey on Factuality in LLM's.pptx
NeethaSherra17 views
Short Story Assignment by Kelly Nguyen von kellynguyen01
Short Story Assignment by Kelly NguyenShort Story Assignment by Kelly Nguyen
Short Story Assignment by Kelly Nguyen
kellynguyen0119 views
Cross-network in Google Analytics 4.pdf von GA4 Tutorials
Cross-network in Google Analytics 4.pdfCross-network in Google Analytics 4.pdf
Cross-network in Google Analytics 4.pdf
GA4 Tutorials6 views
Data Journeys Hard Talk workshop final.pptx von info828217
Data Journeys Hard Talk workshop final.pptxData Journeys Hard Talk workshop final.pptx
Data Journeys Hard Talk workshop final.pptx
info82821710 views
Chapter 3b- Process Communication (1) (1)(1) (1).pptx von ayeshabaig2004
Chapter 3b- Process Communication (1) (1)(1) (1).pptxChapter 3b- Process Communication (1) (1)(1) (1).pptx
Chapter 3b- Process Communication (1) (1)(1) (1).pptx
ayeshabaig20047 views
[DSC Europe 23][Cryptica] Martin_Summer_Digital_central_bank_money_Ideas_init... von DataScienceConferenc1
[DSC Europe 23][Cryptica] Martin_Summer_Digital_central_bank_money_Ideas_init...[DSC Europe 23][Cryptica] Martin_Summer_Digital_central_bank_money_Ideas_init...
[DSC Europe 23][Cryptica] Martin_Summer_Digital_central_bank_money_Ideas_init...
CRM stick or twist workshop von info828217
CRM stick or twist workshopCRM stick or twist workshop
CRM stick or twist workshop
info82821711 views
Data about the sector workshop von info828217
Data about the sector workshopData about the sector workshop
Data about the sector workshop
info82821715 views

Forecast stock prices python

  • 1. In [11]: import numpy as np import pandas as pd import matplotlib.pyplot as plt from statsmodels.tsa.arima_model import ARIMA import statsmodels.api as sm from statsmodels.tsa.seasonal import seasonal_decompose import warnings warnings.filterwarnings('ignore') from sklearn.metrics import accuracy_score import seaborn as sns In [2]: ######################################## Import Data From Quandl API ########################################################## #import quandl #import pandas as pd #import xlsxwriter ##mydata = quandl.get_table('ZACKS/FC', ticker='AAPL') #quandl.ApiConfig.api_key = "XXXXXXXXXXXXXX" ########## Register and generate a key ############## #mydata = quandl.get("FRED/GDP") #excelfile= r'dailyd2.xlsx' #workbook = xlsxwriter.Workbook(excelfile) #worksheet1 = workbook.add_worksheet('daily') #bold = workbook.add_format({'bold': True}) #row = 0 #worksheet1.write(row,0,'Date') #mydata = quandl.get('WIKI/AAPL', start_date="2017-01-01", end_date="2017-0 8-25") #daterange = mydata.index ############# Save the date range ############## # #for i in daterange: # row = row+1 # worksheet1.write(row,0,i) #data = pd.read_csv(r'0825_quandl_ticks2.csv') ########### Load the list of tickers ############# #tlist = data['Ticker'] #col = 0 #for tl in tlist: # try: # mydata = quandl.get('WIKI/'+tl, start_date="2017-01-01", end_date= "2017-08-25") # if len(mydata)-1 == len(daterange): # row = 0 # col = col+1 # worksheet1.write(row,col,tl,bold) # for dr in daterange: # row = row+1 # for i,j in mydata['Adj. Close'].iteritems(): # if dr==i:
  • 2. # if dr==i: # worksheet1.write(row,col,j) # print(tl,j) # except Exception as e: # print(e) #workbook.close() In [3]: ######## Load Data ######### data = pd.read_excel(r'C:ModelDatadailyd2.xlsx', index_col='Date', parse_ dates=True) In [4]: ################ Calculate and plot volatility ##################### df_stat = pd.DataFrame(columns = ['std','mean','normalized_std']) df_stat[['std','mean','normalized_std']] = pd.DataFrame([data.std(),data.m ean(),data.std()/data.mean()]).T df_stat.sort_values('normalized_std') plt.title('Normalized Standard Deviation') plt.hist(df_stat['normalized_std']) fig = plt.gcf() fig.set_size_inches(14.5, 5.5) plt.show() In [5]: ######### Based on the graph above filter out stocks with high variance and low returns############# df_fltr = df_stat[df_stat['normalized_std']<.15] df = data[df_fltr.index] tkl = [x for x in df if (df[x][-1]-df[x][0])/df[x][0]>=.25 ] df = df[tkl] fltr_tk = [] for tk in df: Y = df[tk].as_matrix() X = range(len(df.index)) X = sm.add_constant(X) model = sm.OLS(Y,X) results = model.fit() if results.params[1] > .01: ########## Filter on slope coefficient from Regression Model #########
  • 3. Regression Model ######### fltr_tk.append(tk) df.plot(y=[tk]) plt.title('Daily '+tk+' Stock Prices 2017-01-01 to 2017-07-31') plt.legend(loc='upper left') plt.axvspan('2017-08-01','2017-08-25', color='green', alpha=0.25) fig = plt.gcf() fig.set_size_inches(16.5, 4.5) plt.show()
  • 9. In [6]: ################ PLot Trend and Seasonality ######################### for tk in fltr_tk: df_d = df[tk] adj_index = pd.date_range(df_d.index[0], periods=len(df), freq='D') df_d.index = adj_index decomposition = seasonal_decompose(df_d) trend = decomposition.trend seasonal = decomposition.seasonal seasonal.index = df.index trend.index = df.index df_d.index = df.index plt.subplot(1,2,1) plt.plot(trend,label='Trend') plt.title(tk) plt.xticks(rotation=90) plt.legend(loc='best') plt.subplot(1,2,2) plt.plot(seasonal,label='Seasonality') plt.title(tk) plt.xticks(rotation=90) plt.legend(loc='best') fig = plt.gcf() fig.set_size_inches(16.5, 4.5) plt.show()
  • 16. In [7]: ############### Run ARIMA Model ################### store = {} for tk in fltr_tk: train = df[tk][0:-20] test = df[tk][len(train):] ap = 99 ad = 99 aq = 99 amape = 99 af = [] for p in range(10): for q in range(10): for d in range(2): try: model = ARIMA(train, order=(p, d, q)).fit() predict = model.forecast(len(test)) fcst=predict[0] mapelist = [] for i in range(len(fcst)): mapelist.insert(i, (np.absolute(test[i] - fcst[i])) / test[i]) mape = np.mean(mapelist) * 100 mape = round(mape,2) except: mape = 9999 pass if amape > mape: amape = mape ap = p ad = d aq = q af= fcst store[tk] = af plt.plot(train) plt.plot(test,label='Actual') plt.plot(test.index,af,label='Predicted') fig = plt.gcf() fig.set_size_inches(16.5, 4.5) plt.title(str(tk)+"_"+"MAPE"+"_"+str(amape)+"_"+"Order"+"_"+"("+str(ap)+ str(ad)+str(aq)+")") plt.legend(loc='best') plt.show()
  • 22. In [8]: ###### Correlation HeatMap ####### corr = df[fltr_tk].corr(method='pearson', min_periods=1).abs() ax = sns.heatmap(corr); fig = plt.gcf() fig.set_size_inches(18.5, 10.5) ax.set_title('PortFolio Diversity HeatMap'); plt.show()
  • 23. In [9]: ##### Test model accuracy ##### eval_metrcs = pd.DataFrame(columns = ['Actual','Predicted','Actual_Growth', 'Predicted_Growth'],index = fltr_tk) for tk in fltr_tk: train = df[tk][0:-20] test = df[tk][len(train):] if (train[-1] >= test[-1] and train[-1] >= store[tk][-1]): eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 0 eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 0 eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1 ] - train[-1])/train[-1] eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (stor e[tk][-1] - train[-1])/train[-1] elif (train[-1] < test[-1] and train[-1] < store[tk][-1]): eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 1 eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 1 eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1 ] - train[-1])/train[-1] eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (stor e[tk][-1] - train[-1])/train[-1] elif (train[-1] >= test[-1] and train[-1] < store[tk][-1]): eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 0 eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 1 eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1 ] - train[-1])/train[-1] eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (stor e[tk][-1] - train[-1])/train[-1] elif (train[-1] < test[-1] and train[-1] >= store[tk][-1]): eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 1 eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 0 eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1 ] - train[-1])/train[-1] eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (stor e[tk][-1] - train[-1])/train[-1]
  • 24. eval_metrcs In [10]: Out[9]: Actual Predicted Actual_Growth Predicted_Growth HPQ 1 0 0.00520562 -0.017592 BGCP 1 1 0.0344432 0.0274154 BLDR 0 1 -0.0562743 0.0407847 CZR 0 0 -0.113725 -0.0310849 CAMP 0 0 -0.0433884 -0.0357415 CPN 1 1 0.0288124 0.0241479 CWST 0 0 -0.0268692 -0.0257216 CENX 1 0 0.00892359 -0.21757 COHU 0 0 -0.0932919 -0.131128 DAR 1 0 0.0654545 -0.0141167 FOE 1 1 0.0202128 0.00680254 GSM 1 1 0.0460829 0.0259386 GTN 0 0 -0.0562914 -0.100722 IXYS 0 0 -0.0859599 -0.0192517 MDCA 0 0 -0.00980392 -0.00131282 MTOR 1 1 0.0772311 0.0496162 MGI 0 0 -0.0295567 -0.0605303 MYE 1 1 0.0314286 0.031333 NYT 0 0 -0.085213 -0.023136 ORBC 0 0 -0.0545145 -0.0124334 PNK 1 1 0.00455005 0.0167042 RDNT 1 1 0.251613 0.0372675 SEM 1 1 0.0617284 0.0590196 TGH 1 0 0.0125786 -0.0381664 USAP 0 0 -0.0281081 -0.0258438 NLY 1 1 0.0403361 0.0491873 FOR 0 1 0 0.000157287 FORM 1 1 0.0792453 0.0899497 RSO 1 1 0.0264188 0.0305759 GAIA 0 0 -0.0214592 -0.0252127