Drag Image to Reposition

Diversification and Portfolio Risk

Feel free to download the bundle below!

Diversification & Portfolio Risk.zip

26507.7KB

The Code

#The purpose of this program is to analyze the relationship between the number of assets in a portfolio,
  #and the average standard deviation of that portfolio. Users are promopted to choose between 5 different
  #asset classes, but can also analyze custom data sets.
  
  
  
  
  def Introduction():
      #This function propts the user to enter a specified set of data. After user-entry,
      # the function collects the corresponding data from their file locations, and returns
      # the daily and monthly data.
  
  
      choice = '?' #initiate choice
  
      #print intro message
      print("Hello. This program analyzes the relationship between the number of assets in a portfolio, and the average standard deviation of that portfolio.")
      # Obtain User Input
      choice = input("Please choose between the following asset classes to analyze:\n\nFor a representative sample of the stock market over the last 20 years, enter\033[1m 'SP500'\033[0m (This will use the oldest 400 securities in the S&P500). \nFor the 200 largest Exchange Traded Funds, diversified by asset class, enter\033[1m '200 ETFs'\033[0m.\nFor all of the ETFs traded on the Nasdaq that are at least 5 years old, enter\033[1m 'Nasdaq ETFs'\033[0m. *WARNING* This is an extremely large data set. Analysis is extremely CPU intensive and could take an extended period of time.\nFor a small-scale relationship analysis of only 3 ETFs, enter\033[1m '3 ETFs'\033[0m.\nFor the 200 largest Real Estate Investment Trusts, enter\033[1m 'REIT'\033[0m.\n")
      #input validation
      while choice != 'SP500' and choice != 'REIT' and choice != '200 ETFs' and choice != 'Nasdaq ETFs' and choice != '3 ETFs' and choice != 'Custom':
          choice = input('Please make a valid selection. ')
  
      #collect the corresponding files
      if choice == 'SP500':
          daily_data = 'SP 500 daily.txt'
          monthly_data = 'SP 500 monthly.txt'
      elif choice == 'REIT':
          daily_data = 'REIT daily.txt'
          monthly_data = 'REIT monthly.txt'
      elif choice == '200 ETFs':
          daily_data = '200 daily.txt'
          monthly_data = '200 monthly.txt'
      elif choice == 'Nasdaq ETFs':
          daily_data = 'Nasdaq daily.txt'
          monthly_data = 'Nasdaq monthly.txt'
      elif choice == '3 ETFs':
          daily_data = 'Three daily.txt'
          monthly_data = 'Three monthly.txt'
      elif choice == 'Custom':
          daily_data = input('Enter filename for daily data: ')
          monthly_data = input('Enter filename for monthly data: ')
      print("processing...")
      #return the data
      return daily_data, monthly_data
  def relationship_analysis(daily_data, monthly_data):
      #This function does the bulk of the processing of the program. It takes the daily and monthly datasets
      #and stores them in corresponding dictionaries. The daily return data is used to derive the correlations
      #between each of the assets, and the monthly return data is used to derive the standard deviations
      #and then convert the correlation matrix into a covariance matrix.
  
      #It then propts the user for how many trials they would like to run for each possible value of stocks in
      #the portfolio, and begins the simulation.
  
      #For each trial, stocks are selected at random. For each set of trials for a given value
      #of n, the calculated portfolio standard deviations are averaged and appended to the standard deviations
      #list. After the simulation is finished, this list of average standard deviations for each value of n
      #stocks is returned along with the list of generated x values.
  
  
  
      #Initiate the daily data dictionary, d
      #open the file with the daily data
      three = open(daily_data, 'r')
      d = dict()
      titles = three.readline()
      titles = titles.split('\t')     #collect all the data titles to a list
  
      #make each title the key for the data in the dictionary
      for i in range(len(titles)):
          titles[i] = titles[i].rstrip('\n')
          d[titles[i]] = []
  
  
  
  
      #Populate the dictionary
      three = open(daily_data, 'r')
      line_number = 0
      length = len(three.readlines())
      three = open(daily_data, 'r')
  
  
      while line_number < length:
  
          if line_number > 0:
              line = three.readline().rstrip('\n').rstrip('%').split('\t')
              number_in_list = 0
              for item in d:
                  try:
                      d[item].append(float(line[number_in_list]))
                      number_in_list += 1
                  except:
                      d[item].append(line[number_in_list])
                      number_in_list += 1
              line_number += 1
  
          else:
              three.readline()
              line_number += 1
  
      #format info for matrix
      import numpy as np
      matrix_input = []
      for i in titles:
          if i != 'Dates':
              matrix_input.append(d[i])
  
      #calculate matrix
      comatrix = np.corrcoef(matrix_input)
  
  
  
  
      ########################################################################################################################
  
      #Obtain the standard deviations for all of the assets
  
      import statistics as stat #for standard deviation calculation
  
  
      #initiate the monthly data dictionary, m
      four = open(monthly_data, 'r')
      m = dict()
      titles = four.readline()
      titles = titles.split('\t')
      #set all the titles as keys in the dictionary
      for i in range(len(titles)):
          titles[i] = titles[i].rstrip('\n')
          m[titles[i]] = []
  
  
  
  
      #Populate the dictionary
      four = open(monthly_data, 'r')
      line_number = 0
      length = len(four.readlines())
      four = open(monthly_data, 'r')
  
  
      while line_number < length:
  
          if line_number > 0:
              line = four.readline().rstrip('\n').rstrip('%').split('\t')
              number_in_list = 0
              for item in m:
                  try:
                      if line[number_in_list] != '#N/A N/A':
                          m[item].append(float(line[number_in_list]))
                          number_in_list += 1
                      elif line[number_in_list] == '#N/A N/A':
                          m[item].append(0)
                          number_in_list += 1
                  except: #for all the dates
  
                      m[item].append(line[number_in_list])
                      number_in_list += 1
              line_number += 1
  
          else:
              four.readline()
              line_number += 1
  
      from statistics import stdev #to obtain all the standard deviations
      import math #to use the square root function
      #initiate dictionary to hold all of the standard deviations, s
      s = m
  
      count = 0
  
      #delete the dates key
      del s['Dates']
      for key in s:
          try:
              if key != 'Dates':
                  s[key] = stdev(s[key]) * math.sqrt(12) / 100    #replace all of the lists with the annualized standard deviation of that list
          except:
              print("There was an error.")
  
  
  
  
      ###########################################################################################
  
      #Covariance Matrix
  
  
      from numpy import array
  
      #initiate the covariance matrix
      cvmatrix = array(list(comatrix))
      stdvs = list(s.values()) #pop out all the standard deviations as a list
  
      #convert the correlation between each two assets into a covariance by multiplying it by the standard deviation of each asset
      for i in range(len(stdvs)):
          cvmatrix[i] *= stdvs[i]
  
          for x in range(len(stdvs)):
              cvmatrix[i][x] *= stdvs[x]
  
  
  
  
      ###########################################################################################
  
      #The Simulation
  
      #Portfolio Standard Deviation
      import random
      #initiate lists
      standard_deviations = []
      iteration_average = []
      #prompt for number of trials
      print("Please enter the number of trials you would like to run for each number of assets. The more iterations per value of n, the more accurate the relationship will be, but the longer it will take to run. \nI suggest using smaller values for the larger data sets. Running the analysis on all Nasdaq ETFs with 10+ trials could take hundreds of hours to process on a high-performance computer.\n")
      while True: #make sure input is valid, do not accept ValueErrors
          try:
              trials = int(input("Trials:"))
              while trials < 1 or trials > 1000000000:
                  trials = int(input("Trials must be an integer between one and a billion. (Good luck running a billion trials without a supercomputer)\nTrials: "))
          except ValueError:
              print("Value must be an integer.")
  
          else:
              break #break if no error
  
      print("Thank you. Analysis in progress, this may take a while...")
      number_of_stocks_in_portfolio = []
      #randomly select portfolio
      picks = set()
  
      for i in range(len(stdvs)): #this iterates for the total stock selection, once per portfolio stdv calculation
          number_of_stocks_in_portfolio.append(i+1)
          for z in range(trials):
              while len(picks) <= i:
                  num = random.randrange(0,len(stdvs))
                  picks.add(num)
              #at this point the picks for this number of assets are set
              #Calc Standard Deviaiton for the random portfolio
              terms = []
              for n in picks:
                  for x in range(0,len(cvmatrix[n])):
                      if x in picks:
                          terms.append(cvmatrix[n][x])
              #at this point all of the terms are in the terms list
              #calculate the weight per asset, assuming an equally weighted portfolio
              weights = 1/len(picks)
  
  
              #weight each term and append to list
              weighted_terms = []
              for y in terms:
                  weighted_terms.append(y*(weights**2))
              #calculate portfolio variance
              variance = sum(weighted_terms)
              #calculate portfolio standard deviation, *FOR THIS TRIAL AT THIS VALUE OF N STOCKS*
              standard_deviation = math.sqrt(variance)
              #add to list of all standard deviations for this value of n
              iteration_average.append(standard_deviation)
          #calculate the average standard deviation for this value of n stocks
          average_for_trial = sum(iteration_average) / len(iteration_average)
          #add the average standard deviation of n stocks to the list of all y values
          standard_deviations.append(average_for_trial)
          #reset iteration average to use for the next iteration of t trials for n stocks
          iteration_average = []
  
      return number_of_stocks_in_portfolio, standard_deviations  # return x and y variables
  def graph_results(number_of_stocks_in_portfolio, standard_deviations):
      #This function takes the x and y values as parameters, and creates a scatterplot of the data
  
      #import packages
      import matplotlib
      import matplotlib.pyplot as plt
  
      matplotlib.style.use('ggplot')
  
      plt.ylabel('Standard Deviation')
      plt.xlabel('Number of Assets in Portfolio')
      plt.title('Diversification and Portfolio Risk')
      plt.scatter(number_of_stocks_in_portfolio, standard_deviations)  # send data to function
      plt.show() #show the plot
  def main():
      #This function calles the introduction function, makes sure that all custom files exist, and then
      #calls the relationship analysis function, and graph results function. After the analysis is complete,
      #the user is notified.
  
      #call the intro
      daily_data, monthly_data = Introduction()
      while True: #validate files for Custom entries
          try:
              three = open(daily_data, 'r')
              four = open(monthly_data, 'r')
          except FileNotFoundError: #do not accept files that don't exist
              print("Make sure you have the correct filenames for custom entries. ")
              daily_data = input('Enter filename for daily data: ')
              monthly_data = input('Enter filename for monthly data: ')
          else:
              break
      try: #try suite
  
          number_of_stocks_in_portfolio, standard_deviations = relationship_analysis(daily_data, monthly_data)
          graph_results(number_of_stocks_in_portfolio,standard_deviations)
          print("Analysis complete.")
  
      except Exception:  # catch exceptions for bad data
          print("Make sure custom data is correctly formatted.")
  
  main()
                        
                                      
                      

Python

Sample Output