Python Projects
Other Python Projects
Moving Average Algorithm & Visualization
Diversification & Porfolio Risk
Minimization Algorithm & Data Structuring
Drag Image to Reposition
Excel Automation
This is a program that takes un-clean data in excel, and turns it into a normalized data table. I use the package OpenPyXL to control excel workbooks from an IDE.
Data Pre-Processing
Data Post-Processing
Feel free to download the files and look at the code!
Normalization Code
from openpyxl import load_workbook # set file path filepath="/Hot_Spot_Data.xlsx" # load file wb=load_workbook(filepath) # select demo.xlsx # sheet=wb.active sheet=wb['Raw'] # get max row count max_row = sheet.max_row # get max column count max_column = sheet.max_column if not 'Out 1' in wb.sheetnames: print('Out 1 doesnt exist') wb.create_sheet('Out 1') else: wb.remove(wb['Out 1']) wb.create_sheet('Out 1') sheet2 = wb['Out 1'] # accomadate for multiple offers, kick out all strikethroughs, dump all into rowlist #################################################### # iterate over all cells # iterate over all rows rowlist = [] for i in range(1, max_row + 1): contentslist = [] # iterate over all columns for j in range(1, max_column + 1): # get particular cell value cell_obj = sheet.cell(row=i, column=j) cell_out = sheet2.cell(row=i, column=j) # print cell value if not cell_obj.font.strike: cell_obj = str(cell_obj.value).replace(" ","").replace("\n","").replace(",","") # print(cell_obj, end=' | ') # print(len(str(cell_obj)), end = "") cell_out.value = cell_obj.replace("/","") if not j == max_column: if cell_obj == "Pg3": cell_obj = "3" contentslist.append(cell_obj) elif not contentslist[0].find("/") == -1 and j == max_column: # if there is a "/" in the first column, and the iteration is at column 5 cell_obj = cell_obj.replace("/","") contentslist.append(cell_obj) contentslist2 = [] + contentslist contentslist[0] = contentslist[0][:3] contentslist2[0] = contentslist2[0][4:] if len(contentslist) == 5: rowlist.append(contentslist) if len(contentslist2) == 5: rowlist.append(contentslist2) contentslist = [] elif j == max_column: cell_obj = cell_obj.replace("/", "") contentslist.append(cell_obj) if len(contentslist) == 5: rowlist.append(contentslist) contentslist = [] # Dump contents of rowlist into ['Out 1'] ########################################################################## for i in range(0, len(rowlist)): #interate over all columns for j in range(0, len(rowlist[0])): if len(rowlist[i]) == 5: cell_out = sheet2.cell(row=i+1, column=j+1) cell_out.value = rowlist[i][j] ######################################################################################################################## #Process Out1 -> Dump to Out 2 ######################################################################################################################## #1. Process if not 'Out 2' in wb.sheetnames: print('Out 2 doesnt exist') wb.create_sheet('Out 2') else: wb.remove(wb['Out 2']) wb.create_sheet('Out 2') sheet3 = wb['Out 2'] # for row in sheet3['A1:Z999999']: # for cell in row: # cell.value = None # get max row count max_row = sheet2.max_row # get max column count max_column = sheet2.max_column # iterate over all cells # iterate over all rows rowlist = [] for i in range(1, max_row + 1): # contentslist = [] # iterate over all columns for j in range(1, max_column + 1): # get particular cell value cell_out1 = sheet2.cell(row=i, column=j) # cell_out2 = sheet3.cell(row=i, column=j) cell_out1 = cell_out1.value contentslist.append(cell_out1) if j == max_column and len(contentslist[max_column-1]) > 5: if len(contentslist[max_column-1]) % 5 == 0: if len(contentslist[max_column-1]) >= 10: contentslist2 = [] + contentslist contentslist1 = [] + contentslist contentslist1[4] = contentslist[4][:5] contentslist2[4] = contentslist[4][5:10] rowlist.append(contentslist1) rowlist.append(contentslist2) if len(contentslist[max_column - 1]) >= 15: contentslist3 = [] + contentslist contentslist3[4] = contentslist[4][10:15] rowlist.append(contentslist3) if len(contentslist[max_column - 1]) >= 20: contentslist4 = [] + contentslist contentslist4[4] = contentslist[4][15:20] rowlist.append(contentslist4) if len(contentslist[max_column - 1]) >= 25: contentslist5 = [] + contentslist contentslist5[4] = contentslist[4][20:25] rowlist.append(contentslist5) if len(contentslist[max_column - 1]) >= 30: contentslist6 = [] + contentslist contentslist6[4] = contentslist[4][25:30] rowlist.append(contentslist6) if len(contentslist[max_column - 1]) >= 35: contentslist7 = [] + contentslist contentslist7[4] = contentslist[4][30:35] rowlist.append(contentslist7) if len(contentslist[max_column - 1]) >= 40: contentslist8 = [] + contentslist contentslist8[4] = contentslist[4][35:40] rowlist.append(contentslist8) if len(contentslist[max_column - 1]) >= 45: contentslist9 = [] + contentslist contentslist9[4] = contentslist[4][40:45] rowlist.append(contentslist9) if len(contentslist[max_column - 1]) >= 50: contentslist10 = [] + contentslist contentslist10[4] = contentslist[4][45:50] rowlist.append(contentslist10) if len(contentslist[max_column - 1]) >= 55: contentslist11 = [] + contentslist contentslist11[4] = contentslist[4][50:55] rowlist.append(contentslist11) if len(contentslist[max_column - 1]) >= 60: contentslist12 = [] + contentslist contentslist12[4] = contentslist[4][55:60] rowlist.append(contentslist12) if len(contentslist[max_column - 1]) >= 65: contentslist13 = [] + contentslist contentslist13[4] = contentslist[4][60:65] rowlist.append(contentslist13) if len(contentslist[max_column - 1]) >= 70: contentslist14 = [] + contentslist contentslist14[4] = contentslist[4][65:70] rowlist.append(contentslist14) if len(contentslist[max_column - 1]) >= 75: contentslist15 = [] + contentslist contentslist15[4] = contentslist[4][70:75] rowlist.append(contentslist15) if len(contentslist[max_column - 1]) >= 80: contentslist16 = [] + contentslist contentslist16[4] = contentslist[4][75:80] rowlist.append(contentslist16) if len(contentslist[max_column - 1]) >= 85: contentslist17 = [] + contentslist contentslist17[4] = contentslist[4][80:85] rowlist.append(contentslist17) if len(contentslist[max_column - 1]) >= 90: contentslist18 = [] + contentslist contentslist18[4] = contentslist[4][85:90] rowlist.append(contentslist18) if len(contentslist[max_column - 1]) >= 95: contentslist19 = [] + contentslist contentslist19[4] = contentslist[4][90:95] rowlist.append(contentslist19) if len(contentslist[max_column - 1]) >= 100: contentslist20 = [] + contentslist contentslist20[4] = contentslist[4][95:100] rowlist.append(contentslist20) elif contentslist[max_column-1] == "OfferProductId": rowlist.append(contentslist) contentslist = [] contentslist1 = [] contentslist2 = [] contentslist3 = [] contentslist4 = [] contentslist5 = [] contentslist6 = [] contentslist7 = [] contentslist8 = [] contentslist9 = [] contentslist10 = [] contentslist11 = [] contentslist12 = [] contentslist13 = [] contentslist14 = [] contentslist15 = [] contentslist16 = [] contentslist17 = [] contentslist18 = [] contentslist19 = [] contentslist20 = [] elif j == max_column: rowlist.append(contentslist) contentslist = [] ######################################################################################################################## #2. Dump for i in range(0, len(rowlist)): #interate over all columns for j in range(0, len(rowlist[0])): if len(rowlist[i]) == 5: cell_out2 = sheet3.cell(row=i+1, column=j+1) cell_out2.value = rowlist[i][j] wb.save(filepath)
Python