Python Projects
Other Python Projects
Moving Average Algorithm & Visualization
Excel Automation
Minimization Algorithm & Data Structuring
Drag Image to Reposition
Diversification and Portfolio Risk
Feel free to download the bundle below!
The Code
#The purpose of this program is to analyze the relationship between the number of assets in a portfolio, #and the average standard deviation of that portfolio. Users are promopted to choose between 5 different #asset classes, but can also analyze custom data sets. def Introduction(): #This function propts the user to enter a specified set of data. After user-entry, # the function collects the corresponding data from their file locations, and returns # the daily and monthly data. choice = '?' #initiate choice #print intro message print("Hello. This program analyzes the relationship between the number of assets in a portfolio, and the average standard deviation of that portfolio.") # Obtain User Input choice = input("Please choose between the following asset classes to analyze:\n\nFor a representative sample of the stock market over the last 20 years, enter\033[1m 'SP500'\033[0m (This will use the oldest 400 securities in the S&P500). \nFor the 200 largest Exchange Traded Funds, diversified by asset class, enter\033[1m '200 ETFs'\033[0m.\nFor all of the ETFs traded on the Nasdaq that are at least 5 years old, enter\033[1m 'Nasdaq ETFs'\033[0m. *WARNING* This is an extremely large data set. Analysis is extremely CPU intensive and could take an extended period of time.\nFor a small-scale relationship analysis of only 3 ETFs, enter\033[1m '3 ETFs'\033[0m.\nFor the 200 largest Real Estate Investment Trusts, enter\033[1m 'REIT'\033[0m.\n") #input validation while choice != 'SP500' and choice != 'REIT' and choice != '200 ETFs' and choice != 'Nasdaq ETFs' and choice != '3 ETFs' and choice != 'Custom': choice = input('Please make a valid selection. ') #collect the corresponding files if choice == 'SP500': daily_data = 'SP 500 daily.txt' monthly_data = 'SP 500 monthly.txt' elif choice == 'REIT': daily_data = 'REIT daily.txt' monthly_data = 'REIT monthly.txt' elif choice == '200 ETFs': daily_data = '200 daily.txt' monthly_data = '200 monthly.txt' elif choice == 'Nasdaq ETFs': daily_data = 'Nasdaq daily.txt' monthly_data = 'Nasdaq monthly.txt' elif choice == '3 ETFs': daily_data = 'Three daily.txt' monthly_data = 'Three monthly.txt' elif choice == 'Custom': daily_data = input('Enter filename for daily data: ') monthly_data = input('Enter filename for monthly data: ') print("processing...") #return the data return daily_data, monthly_data def relationship_analysis(daily_data, monthly_data): #This function does the bulk of the processing of the program. It takes the daily and monthly datasets #and stores them in corresponding dictionaries. The daily return data is used to derive the correlations #between each of the assets, and the monthly return data is used to derive the standard deviations #and then convert the correlation matrix into a covariance matrix. #It then propts the user for how many trials they would like to run for each possible value of stocks in #the portfolio, and begins the simulation. #For each trial, stocks are selected at random. For each set of trials for a given value #of n, the calculated portfolio standard deviations are averaged and appended to the standard deviations #list. After the simulation is finished, this list of average standard deviations for each value of n #stocks is returned along with the list of generated x values. #Initiate the daily data dictionary, d #open the file with the daily data three = open(daily_data, 'r') d = dict() titles = three.readline() titles = titles.split('\t') #collect all the data titles to a list #make each title the key for the data in the dictionary for i in range(len(titles)): titles[i] = titles[i].rstrip('\n') d[titles[i]] = [] #Populate the dictionary three = open(daily_data, 'r') line_number = 0 length = len(three.readlines()) three = open(daily_data, 'r') while line_number < length: if line_number > 0: line = three.readline().rstrip('\n').rstrip('%').split('\t') number_in_list = 0 for item in d: try: d[item].append(float(line[number_in_list])) number_in_list += 1 except: d[item].append(line[number_in_list]) number_in_list += 1 line_number += 1 else: three.readline() line_number += 1 #format info for matrix import numpy as np matrix_input = [] for i in titles: if i != 'Dates': matrix_input.append(d[i]) #calculate matrix comatrix = np.corrcoef(matrix_input) ######################################################################################################################## #Obtain the standard deviations for all of the assets import statistics as stat #for standard deviation calculation #initiate the monthly data dictionary, m four = open(monthly_data, 'r') m = dict() titles = four.readline() titles = titles.split('\t') #set all the titles as keys in the dictionary for i in range(len(titles)): titles[i] = titles[i].rstrip('\n') m[titles[i]] = [] #Populate the dictionary four = open(monthly_data, 'r') line_number = 0 length = len(four.readlines()) four = open(monthly_data, 'r') while line_number < length: if line_number > 0: line = four.readline().rstrip('\n').rstrip('%').split('\t') number_in_list = 0 for item in m: try: if line[number_in_list] != '#N/A N/A': m[item].append(float(line[number_in_list])) number_in_list += 1 elif line[number_in_list] == '#N/A N/A': m[item].append(0) number_in_list += 1 except: #for all the dates m[item].append(line[number_in_list]) number_in_list += 1 line_number += 1 else: four.readline() line_number += 1 from statistics import stdev #to obtain all the standard deviations import math #to use the square root function #initiate dictionary to hold all of the standard deviations, s s = m count = 0 #delete the dates key del s['Dates'] for key in s: try: if key != 'Dates': s[key] = stdev(s[key]) * math.sqrt(12) / 100 #replace all of the lists with the annualized standard deviation of that list except: print("There was an error.") ########################################################################################### #Covariance Matrix from numpy import array #initiate the covariance matrix cvmatrix = array(list(comatrix)) stdvs = list(s.values()) #pop out all the standard deviations as a list #convert the correlation between each two assets into a covariance by multiplying it by the standard deviation of each asset for i in range(len(stdvs)): cvmatrix[i] *= stdvs[i] for x in range(len(stdvs)): cvmatrix[i][x] *= stdvs[x] ########################################################################################### #The Simulation #Portfolio Standard Deviation import random #initiate lists standard_deviations = [] iteration_average = [] #prompt for number of trials print("Please enter the number of trials you would like to run for each number of assets. The more iterations per value of n, the more accurate the relationship will be, but the longer it will take to run. \nI suggest using smaller values for the larger data sets. Running the analysis on all Nasdaq ETFs with 10+ trials could take hundreds of hours to process on a high-performance computer.\n") while True: #make sure input is valid, do not accept ValueErrors try: trials = int(input("Trials:")) while trials < 1 or trials > 1000000000: trials = int(input("Trials must be an integer between one and a billion. (Good luck running a billion trials without a supercomputer)\nTrials: ")) except ValueError: print("Value must be an integer.") else: break #break if no error print("Thank you. Analysis in progress, this may take a while...") number_of_stocks_in_portfolio = [] #randomly select portfolio picks = set() for i in range(len(stdvs)): #this iterates for the total stock selection, once per portfolio stdv calculation number_of_stocks_in_portfolio.append(i+1) for z in range(trials): while len(picks) <= i: num = random.randrange(0,len(stdvs)) picks.add(num) #at this point the picks for this number of assets are set #Calc Standard Deviaiton for the random portfolio terms = [] for n in picks: for x in range(0,len(cvmatrix[n])): if x in picks: terms.append(cvmatrix[n][x]) #at this point all of the terms are in the terms list #calculate the weight per asset, assuming an equally weighted portfolio weights = 1/len(picks) #weight each term and append to list weighted_terms = [] for y in terms: weighted_terms.append(y*(weights**2)) #calculate portfolio variance variance = sum(weighted_terms) #calculate portfolio standard deviation, *FOR THIS TRIAL AT THIS VALUE OF N STOCKS* standard_deviation = math.sqrt(variance) #add to list of all standard deviations for this value of n iteration_average.append(standard_deviation) #calculate the average standard deviation for this value of n stocks average_for_trial = sum(iteration_average) / len(iteration_average) #add the average standard deviation of n stocks to the list of all y values standard_deviations.append(average_for_trial) #reset iteration average to use for the next iteration of t trials for n stocks iteration_average = [] return number_of_stocks_in_portfolio, standard_deviations # return x and y variables def graph_results(number_of_stocks_in_portfolio, standard_deviations): #This function takes the x and y values as parameters, and creates a scatterplot of the data #import packages import matplotlib import matplotlib.pyplot as plt matplotlib.style.use('ggplot') plt.ylabel('Standard Deviation') plt.xlabel('Number of Assets in Portfolio') plt.title('Diversification and Portfolio Risk') plt.scatter(number_of_stocks_in_portfolio, standard_deviations) # send data to function plt.show() #show the plot def main(): #This function calles the introduction function, makes sure that all custom files exist, and then #calls the relationship analysis function, and graph results function. After the analysis is complete, #the user is notified. #call the intro daily_data, monthly_data = Introduction() while True: #validate files for Custom entries try: three = open(daily_data, 'r') four = open(monthly_data, 'r') except FileNotFoundError: #do not accept files that don't exist print("Make sure you have the correct filenames for custom entries. ") daily_data = input('Enter filename for daily data: ') monthly_data = input('Enter filename for monthly data: ') else: break try: #try suite number_of_stocks_in_portfolio, standard_deviations = relationship_analysis(daily_data, monthly_data) graph_results(number_of_stocks_in_portfolio,standard_deviations) print("Analysis complete.") except Exception: # catch exceptions for bad data print("Make sure custom data is correctly formatted.") main()
Python
Sample Output