import re import datetime import glob, os from bs4 import BeautifulSoup def tag_content_all(content,tag_to_search): #returns content of EVERY tag matching the criteria html_soup = BeautifulSoup(content,'html.parser') tag_list=[] for tag in html_soup.find_all(re.compile(tag_to_search)): tag_list.append(tag) return tag_list def tag_content_precise(content,tag_to_search,class_name): #returns content of the tag matching the class-name (the used ones are all unique in these files) html_soup = BeautifulSoup(content,'html.parser') tag_content = html_soup.find_all(tag_to_search,class_=class_name) return tag_content ##Load local *.htm file in a list os.chdir("Klasse") aktuellerplan_list = [] for file in sorted(glob.glob("*00*.htm")): with open(file,"r") as f: aktuellerplan_list.append("".join(f.readlines())) ##Get style-sheet: style_old = str(tag_content_all(aktuellerplan_list[0],"style")[0]) style_old = style_old.replace("fad3a6","ffffff") style_old = style_old.replace("fdecd9","e3e3e3") #replaces bg-colors for the table style_old = style_old.replace("","") style_new = style_old style_new += "#overview{width:100%;}\n" style_new += "html{font-size: 115%;}\n" style_new += ".day{border: 2px;border-style:solid;border-collapse:collapse;width:100%;}\n" style_new += "td.list{width:14%;}\n" style_new += "td.day_td{vertical-align: top;width: 50%;}\n" style_new += ".inner{\n" style_new += " position:relative;\n" style_new += " top:0px;\n" style_new += "}\n" style_new += ".outer{\n" style_new += " overflow:hidden;\n" style_new += "}\n" style_new += "\n" ##Gets everything inside center, ie. title, info and actual content plan_list_inter = [] for i in aktuellerplan_list: plan_list_inter.append(tag_content_all(i,"center")) ##Get number of LE / day: first_title = str(tag_content_precise(str(plan_list_inter[0]),"div","mon_title")) first_title = first_title[first_title.index(">")+1:first_title.index(">")+3] nb_day_one = 0 for i in plan_list_inter: i = str(tag_content_precise(str(i),"div","mon_title")) if i[i.index(">")+1:i.index(">")+3] == first_title: nb_day_one += 1 ##Gets title of each day (weekday+type of week) days_title_list=[] days_title_list.append(str(tag_content_precise(str(plan_list_inter[0]),"div","mon_title")[0])) days_title_list.append(str(tag_content_precise(str(plan_list_inter[-1]),"div","mon_title")[0])) for i in range(len(days_title_list)): days_title_list[i]=days_title_list[i][:days_title_list[i].index("Woche")+7]+"" ##Makes two sep. lists for each day. Each LE is one part of the final table days_table_list=[[],[]] for j in range(nb_day_one): days_table_list[0].append(tag_content_precise(str(plan_list_inter[j][0]),"table","mon_list")) for j in range(nb_day_one,len(plan_list_inter)): days_table_list[1].append(tag_content_precise(str(plan_list_inter[j][0]),"table","mon_list")) #The sub-lists contain every appearence of "center" ->take 1rst one, ie. [0] ##Gets the info for each day days_info_list = [] days_info_list.append(tag_content_precise(str(plan_list_inter[0][0]),"table","info")) days_info_list.append(tag_content_precise(str(plan_list_inter[nb_day_one][0]),"table","info")) for i in range(len(days_info_list)): try: days_info_list[i] = str(days_info_list[i][0]) except: days_info_list[i] = "" #as info is mandatory, the possible lack of info has to be handled ##Removes obsolete html on each LE (eg. the th) for i in range(2): old_TE = str(days_table_list[i][0][0]) new_TE = old_TE[old_TE.index("mon_list")+11:old_TE.index("/table")-1] days_table_list[i][0]=new_TE for j in range(1,len(days_table_list[i])): old_TE = str(days_table_list[i][j][0]) new_TE = old_TE[old_TE.index("mon_list")+11:old_TE.index("/table")-1] #just removing
\n | \n\n | \n
\n | \n\n | \n
Letztes update: " + str(datetime.datetime.now().strftime("%d/%m/%y - %H:%M")) + "
\n" end_html += "\n" ##Writes to file (located in dir. "Klasse") with open("index.html","w") as f: f.write(end_html)