Loading skript_projekty.py 0 → 100644 +235 −0 Original line number Diff line number Diff line # skript pro agregaci čerpání osobních nákladů zaměstnanců kateder ČVUT # export z mis.cvut.cz -> Manažerský IS -> Mzdové sestavy -> Rekapitulace čerpání mezd -> ikonka "Export zdrojových dat do MS Excelu" import pandas as pd import math import os import warnings import time # Record the start time start_time = time.time() # definuj začátek a konec pro hledání vstupních souborů # např. verso_mis_p_rek_cerp_m_table_2007.xls ... verso_mis_p_rek_cerp_m_table_2023.xls (názvy je nutné po exportu z mis.cvut.cz upravit) year_start = 2007 year_end = 2030 # prefix názvu souboru - "verso_mis_p_rek_cerp_m_table" je výchozí název v mis.cvut.cz filename_prefix = "verso_mis_p_rek_cerp_m_table_" # definice čísel TA pro identifikaci, co je čerpáno z katedry (a zbytek se bere, ze je z "projektů") # group_department = [101, 122, 888] # název výstupního xlsx souboru excel_file_name = 'output' # ignoruj varování při neoptimalní práci s pandas :D warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning) # kód: years = pd.DataFrame() def read_data_from_xls(): global years, filename_prefix bl = [] first = True for year in range(year_start, year_end+1): filename = filename_prefix+str(year)+".xls" if os.path.exists(filename): years.at['Y', year] = True print(f"The file '{filename}' exists") df = pd.read_excel(filename, sheet_name="Sheet1") df = pd.DataFrame(df) df['ROK'] = df.apply(lambda row: year, axis=1) df['OSC'] = df.apply(lambda row: math.floor(row['OSC_PV']), axis=1) if first: bl = df first = False else: bl = pd.concat([bl, df]) else: years.at['Y', year] = False # optimize bl :) bl.drop(["'A'","DIVIZE","IDPRAC","KOD","KPL","OSC_PV"], axis=1, inplace=True) bl2=bl.groupby(['ROK','MESIC','OSC','PRIJMENI','JMENO','DRUHPOM','IDZAKAZKY','TA','AKCE']).sum().reset_index() return bl2 # najdi seznam unikátních zaměstnanců podle OSC (osobní čísla zaměstnanců) def read_empl(bl): empl = pd.DataFrame( bl['OSC'].unique() ) for idx, idname in empl.iterrows(): e1 = pd.DataFrame( bl[bl['OSC'] == idname[0]] ) empl.at[idx, 'OSC'] = e1.OSC.iloc[0] empl.at[idx, 'PRIJMENI'] = e1.PRIJMENI.iloc[0] empl.at[idx, 'JMENO'] = e1.JMENO.iloc[0] empl.at[idx, 'DRUHPOM'] = e1.DRUHPOM.iloc[0] empl = empl.sort_values(by='PRIJMENI') iter=0 for idx_em, em in empl.iterrows(): empl.at[idx_em, 'IDX'] = iter iter+=1 empl['IDX']=empl['IDX'].astype(int) empl['DRUHPOM']=empl['DRUHPOM'].astype(int) return empl # najdi seznam unikátních zakázek podle IDZAKAZKY def read_zakz_from_bl(bl): zakz = pd.DataFrame( bl['IDZAKAZKY'].unique() ) for idx, idname in zakz.iterrows(): e1 = pd.DataFrame( bl[bl['IDZAKAZKY'] == idname[0]] ) zakz.at[idx, 'TA'] = str(e1.TA.iloc[0]) zakz.at[idx, 'AKCE'] = str(e1.AKCE.iloc[0]) zakz = zakz.sort_values(by='TA') iter=0 for idx_zak, zak in zakz.iterrows(): zakz.at[idx_zak, 'IDX'] = iter iter+=1 zakz['IDX'] = zakz['IDX'].astype(int) return zakz bl = read_data_from_xls() empl = read_empl(bl) zakz = read_zakz_from_bl(bl) def zpracuj_AKCE(AKCE): global year_start, year_end, filename_prefix, excel_file_name, years, bl, empl, zakz # spoj názvy AKCE excel_file_path = excel_file_name+'_'+AKCE+'.xlsx' # excel_file_path = excel_file_name # for AKCE in list_AKCE: # excel_file_path += '_'+AKCE # excel_file_path +='.xlsx' # optimize for speed (not memory) bl_empl = [] for idx_em, em in empl.iterrows(): bl_empl.append(pd.DataFrame(bl[ (bl['OSC']==em['OSC']) ])) bl_arr = [[0 for x in range(len(zakz))] for y in range(len(empl))] for idx_em, em in empl.iterrows(): for idx_zak, zak in zakz.iterrows(): bl_arr[em['IDX']][zak['IDX']] = bl[ (bl['OSC']==em['OSC']) & (bl['IDZAKAZKY']==zak[0]) ].drop(["PRIJMENI","JMENO","TA","AKCE","IDZAKAZKY","DRUHPOM","OSC"], axis=1, inplace=False) # veškerá těžká práce se rovnou exportuje do xlsx with pd.ExcelWriter(excel_file_path, engine='xlsxwriter') as writer: # 1 sheet: total print("creating xls sheet SUMY") out = pd.DataFrame() iter=0 for idx_zak, zak in zakz.iterrows(): # if zak['AKCE'] in list_AKCE: if zak['AKCE']==AKCE: out.at[iter, 'TA'] = zak['TA'] out.at[iter, 'AKCE'] = zak['AKCE'] # filter conditions # yearly for year in range(year_start, year_end+1): if years.at['Y', year]==True: out.at[iter, str(year)] = bl[ (bl['IDZAKAZKY']==zak[0]) & (bl['ROK']==year) ].CASTKA.sum() out.at[iter,'EMPTY1'] = "" # monthly for year in range(year_start, year_end+1): if years.at['Y', year]==True: for month in range(1,13): out.at[iter, str(year)+"_"+str(month)] = bl[ (bl['IDZAKAZKY']==zak[0]) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum() iter+=1 # sum all iter+=1 out.at[iter, 'TA'] = "" out.at[iter, 'AKCE'] = "" out.to_excel(writer, index=False, sheet_name="SUMY") # výpis čerpání ze zakázek po lidech a rozdělení zakázek na "za katedru" podle group_department a na "zbytek" print("creating xls sheet LIDE") out = pd.DataFrame() iter=0 out.at[iter,'PRIJMENI']="" out.at[iter,'JMENO']="" # yearly for year in range(year_start, year_end+1): if years.at['Y', year]==True: out.at[iter, str(year)] = bl[(bl['ROK']==year) & (bl['AKCE']==AKCE)].CASTKA.sum() out.at[iter,'EMPTY1'] = "" # monthly for year in range(year_start, year_end+1): if years.at['Y', year]==True: for month in range(1,13): out.at[iter, str(year)+"_"+str(month)] = bl[(bl['ROK']==year) & (bl['MESIC']==month) & (bl['AKCE']==AKCE)].CASTKA.sum() out.at[iter,'EMPTY_'+str(year-year_start)] = "" iter+=1 out.at[iter,'PRIJMENI']="" out.at[iter,'JMENO']="" iter+=1 for idx_em, em in empl.iterrows(): out.at[iter, 'PRIJMENI'] = em['PRIJMENI'] out.at[iter, 'JMENO'] = em['JMENO'] exbl=bl_empl[em['IDX']] # yearly for year in range(year_start, year_end+1): if years.at['Y', year]==True: out.at[iter, str(year)] = exbl[(exbl['ROK']==year) & (exbl['AKCE']==AKCE)].CASTKA.sum() out.at[iter,'EMPTY1'] = "" # monthly for year in range(year_start, year_end+1): if years.at['Y', year]==True: for month in range(1,13): out.at[iter, str(year)+"_"+str(month)] = bl[(bl['OSC']==em['OSC']) & (bl['ROK']==year) & (bl['MESIC']==month) & (bl['AKCE']==AKCE)].CASTKA.sum() out.at[iter,'EMPTY_'+str(year-year_start)] = "" iter+=1 out.to_excel(writer, index=False, sheet_name="LIDE") # nyní se pro každého zaměstnance vytvoří samostatný list v xlsx a do něj nasypou všechny sumy za každou zakázku for idx_em, em in empl.iterrows(): print("creating xls sheet " + str(em['PRIJMENI']) + "_" + str(em['JMENO'])) # exbl=bl_empl[em['IDX']] out = pd.DataFrame() iter=0 for idx_zak, zak in zakz.iterrows(): if zak['AKCE']==AKCE: out.at[iter, 'TA'] = zak['TA'] out.at[iter, 'AKCE'] = zak['AKCE'] exexbl = bl_arr[em['IDX']][zak['IDX']] # yearly for year in range(year_start, year_end+1): if years.at['Y', year]==True: suma = 0 for idx_ex, ex in exexbl.iterrows(): if (ex['ROK']==year): suma+=ex['CASTKA'] out.at[iter, str(year)] = suma out.at[iter,'EMPTY1'] = "" # monthly for year in range(year_start, year_end+1): if years.at['Y', year]==True: for month in range(1,13): suma=0 for idx_ex, ex in exexbl.iterrows(): if (ex['ROK']==year and ex['MESIC']==month): suma+=ex['CASTKA'] out.at[iter, str(year)+"_"+str(month)] = suma out.at[iter,'EMPTY_'+str(year-year_start)] = "" iter+=1 out.to_excel(writer, index=False, sheet_name=str(em['PRIJMENI']) + "_" + str(em['JMENO'])) # execute # zpracuj_AKCE("3122302D000") #,"1011100D000"]) # zpracuj vsechny zakazky vsechny_akce = pd.DataFrame( bl['AKCE'].unique() ) print(vsechny_akce) for AKCE in vsechny_akce.iterrows(): # print(str(AKCE[1][0])) zpracuj_AKCE(str(AKCE[1][0])) # Record the end time end_time = time.time() # Calculate and print the elapsed time elapsed_time = end_time - start_time print(f"Elapsed time: {elapsed_time} seconds") Loading
skript_projekty.py 0 → 100644 +235 −0 Original line number Diff line number Diff line # skript pro agregaci čerpání osobních nákladů zaměstnanců kateder ČVUT # export z mis.cvut.cz -> Manažerský IS -> Mzdové sestavy -> Rekapitulace čerpání mezd -> ikonka "Export zdrojových dat do MS Excelu" import pandas as pd import math import os import warnings import time # Record the start time start_time = time.time() # definuj začátek a konec pro hledání vstupních souborů # např. verso_mis_p_rek_cerp_m_table_2007.xls ... verso_mis_p_rek_cerp_m_table_2023.xls (názvy je nutné po exportu z mis.cvut.cz upravit) year_start = 2007 year_end = 2030 # prefix názvu souboru - "verso_mis_p_rek_cerp_m_table" je výchozí název v mis.cvut.cz filename_prefix = "verso_mis_p_rek_cerp_m_table_" # definice čísel TA pro identifikaci, co je čerpáno z katedry (a zbytek se bere, ze je z "projektů") # group_department = [101, 122, 888] # název výstupního xlsx souboru excel_file_name = 'output' # ignoruj varování při neoptimalní práci s pandas :D warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning) # kód: years = pd.DataFrame() def read_data_from_xls(): global years, filename_prefix bl = [] first = True for year in range(year_start, year_end+1): filename = filename_prefix+str(year)+".xls" if os.path.exists(filename): years.at['Y', year] = True print(f"The file '{filename}' exists") df = pd.read_excel(filename, sheet_name="Sheet1") df = pd.DataFrame(df) df['ROK'] = df.apply(lambda row: year, axis=1) df['OSC'] = df.apply(lambda row: math.floor(row['OSC_PV']), axis=1) if first: bl = df first = False else: bl = pd.concat([bl, df]) else: years.at['Y', year] = False # optimize bl :) bl.drop(["'A'","DIVIZE","IDPRAC","KOD","KPL","OSC_PV"], axis=1, inplace=True) bl2=bl.groupby(['ROK','MESIC','OSC','PRIJMENI','JMENO','DRUHPOM','IDZAKAZKY','TA','AKCE']).sum().reset_index() return bl2 # najdi seznam unikátních zaměstnanců podle OSC (osobní čísla zaměstnanců) def read_empl(bl): empl = pd.DataFrame( bl['OSC'].unique() ) for idx, idname in empl.iterrows(): e1 = pd.DataFrame( bl[bl['OSC'] == idname[0]] ) empl.at[idx, 'OSC'] = e1.OSC.iloc[0] empl.at[idx, 'PRIJMENI'] = e1.PRIJMENI.iloc[0] empl.at[idx, 'JMENO'] = e1.JMENO.iloc[0] empl.at[idx, 'DRUHPOM'] = e1.DRUHPOM.iloc[0] empl = empl.sort_values(by='PRIJMENI') iter=0 for idx_em, em in empl.iterrows(): empl.at[idx_em, 'IDX'] = iter iter+=1 empl['IDX']=empl['IDX'].astype(int) empl['DRUHPOM']=empl['DRUHPOM'].astype(int) return empl # najdi seznam unikátních zakázek podle IDZAKAZKY def read_zakz_from_bl(bl): zakz = pd.DataFrame( bl['IDZAKAZKY'].unique() ) for idx, idname in zakz.iterrows(): e1 = pd.DataFrame( bl[bl['IDZAKAZKY'] == idname[0]] ) zakz.at[idx, 'TA'] = str(e1.TA.iloc[0]) zakz.at[idx, 'AKCE'] = str(e1.AKCE.iloc[0]) zakz = zakz.sort_values(by='TA') iter=0 for idx_zak, zak in zakz.iterrows(): zakz.at[idx_zak, 'IDX'] = iter iter+=1 zakz['IDX'] = zakz['IDX'].astype(int) return zakz bl = read_data_from_xls() empl = read_empl(bl) zakz = read_zakz_from_bl(bl) def zpracuj_AKCE(AKCE): global year_start, year_end, filename_prefix, excel_file_name, years, bl, empl, zakz # spoj názvy AKCE excel_file_path = excel_file_name+'_'+AKCE+'.xlsx' # excel_file_path = excel_file_name # for AKCE in list_AKCE: # excel_file_path += '_'+AKCE # excel_file_path +='.xlsx' # optimize for speed (not memory) bl_empl = [] for idx_em, em in empl.iterrows(): bl_empl.append(pd.DataFrame(bl[ (bl['OSC']==em['OSC']) ])) bl_arr = [[0 for x in range(len(zakz))] for y in range(len(empl))] for idx_em, em in empl.iterrows(): for idx_zak, zak in zakz.iterrows(): bl_arr[em['IDX']][zak['IDX']] = bl[ (bl['OSC']==em['OSC']) & (bl['IDZAKAZKY']==zak[0]) ].drop(["PRIJMENI","JMENO","TA","AKCE","IDZAKAZKY","DRUHPOM","OSC"], axis=1, inplace=False) # veškerá těžká práce se rovnou exportuje do xlsx with pd.ExcelWriter(excel_file_path, engine='xlsxwriter') as writer: # 1 sheet: total print("creating xls sheet SUMY") out = pd.DataFrame() iter=0 for idx_zak, zak in zakz.iterrows(): # if zak['AKCE'] in list_AKCE: if zak['AKCE']==AKCE: out.at[iter, 'TA'] = zak['TA'] out.at[iter, 'AKCE'] = zak['AKCE'] # filter conditions # yearly for year in range(year_start, year_end+1): if years.at['Y', year]==True: out.at[iter, str(year)] = bl[ (bl['IDZAKAZKY']==zak[0]) & (bl['ROK']==year) ].CASTKA.sum() out.at[iter,'EMPTY1'] = "" # monthly for year in range(year_start, year_end+1): if years.at['Y', year]==True: for month in range(1,13): out.at[iter, str(year)+"_"+str(month)] = bl[ (bl['IDZAKAZKY']==zak[0]) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum() iter+=1 # sum all iter+=1 out.at[iter, 'TA'] = "" out.at[iter, 'AKCE'] = "" out.to_excel(writer, index=False, sheet_name="SUMY") # výpis čerpání ze zakázek po lidech a rozdělení zakázek na "za katedru" podle group_department a na "zbytek" print("creating xls sheet LIDE") out = pd.DataFrame() iter=0 out.at[iter,'PRIJMENI']="" out.at[iter,'JMENO']="" # yearly for year in range(year_start, year_end+1): if years.at['Y', year]==True: out.at[iter, str(year)] = bl[(bl['ROK']==year) & (bl['AKCE']==AKCE)].CASTKA.sum() out.at[iter,'EMPTY1'] = "" # monthly for year in range(year_start, year_end+1): if years.at['Y', year]==True: for month in range(1,13): out.at[iter, str(year)+"_"+str(month)] = bl[(bl['ROK']==year) & (bl['MESIC']==month) & (bl['AKCE']==AKCE)].CASTKA.sum() out.at[iter,'EMPTY_'+str(year-year_start)] = "" iter+=1 out.at[iter,'PRIJMENI']="" out.at[iter,'JMENO']="" iter+=1 for idx_em, em in empl.iterrows(): out.at[iter, 'PRIJMENI'] = em['PRIJMENI'] out.at[iter, 'JMENO'] = em['JMENO'] exbl=bl_empl[em['IDX']] # yearly for year in range(year_start, year_end+1): if years.at['Y', year]==True: out.at[iter, str(year)] = exbl[(exbl['ROK']==year) & (exbl['AKCE']==AKCE)].CASTKA.sum() out.at[iter,'EMPTY1'] = "" # monthly for year in range(year_start, year_end+1): if years.at['Y', year]==True: for month in range(1,13): out.at[iter, str(year)+"_"+str(month)] = bl[(bl['OSC']==em['OSC']) & (bl['ROK']==year) & (bl['MESIC']==month) & (bl['AKCE']==AKCE)].CASTKA.sum() out.at[iter,'EMPTY_'+str(year-year_start)] = "" iter+=1 out.to_excel(writer, index=False, sheet_name="LIDE") # nyní se pro každého zaměstnance vytvoří samostatný list v xlsx a do něj nasypou všechny sumy za každou zakázku for idx_em, em in empl.iterrows(): print("creating xls sheet " + str(em['PRIJMENI']) + "_" + str(em['JMENO'])) # exbl=bl_empl[em['IDX']] out = pd.DataFrame() iter=0 for idx_zak, zak in zakz.iterrows(): if zak['AKCE']==AKCE: out.at[iter, 'TA'] = zak['TA'] out.at[iter, 'AKCE'] = zak['AKCE'] exexbl = bl_arr[em['IDX']][zak['IDX']] # yearly for year in range(year_start, year_end+1): if years.at['Y', year]==True: suma = 0 for idx_ex, ex in exexbl.iterrows(): if (ex['ROK']==year): suma+=ex['CASTKA'] out.at[iter, str(year)] = suma out.at[iter,'EMPTY1'] = "" # monthly for year in range(year_start, year_end+1): if years.at['Y', year]==True: for month in range(1,13): suma=0 for idx_ex, ex in exexbl.iterrows(): if (ex['ROK']==year and ex['MESIC']==month): suma+=ex['CASTKA'] out.at[iter, str(year)+"_"+str(month)] = suma out.at[iter,'EMPTY_'+str(year-year_start)] = "" iter+=1 out.to_excel(writer, index=False, sheet_name=str(em['PRIJMENI']) + "_" + str(em['JMENO'])) # execute # zpracuj_AKCE("3122302D000") #,"1011100D000"]) # zpracuj vsechny zakazky vsechny_akce = pd.DataFrame( bl['AKCE'].unique() ) print(vsechny_akce) for AKCE in vsechny_akce.iterrows(): # print(str(AKCE[1][0])) zpracuj_AKCE(str(AKCE[1][0])) # Record the end time end_time = time.time() # Calculate and print the elapsed time elapsed_time = end_time - start_time print(f"Elapsed time: {elapsed_time} seconds")