Commit 05f40e9e authored by Radek Fučík's avatar Radek Fučík
Browse files

new file: skriptik.py

parent aa114d97
Loading
Loading
Loading
Loading

skriptik.py

0 → 100644
+333 −0
Original line number Diff line number Diff line
# skript pro agregaci čerpání osobních nákladů zaměstnanců kateder ČVUT 
# export z mis.cvut.cz -> Manažerský IS -> Mzdové sestavy -> Rekapitulace čerpání mezd -> ikonka "Export zdrojových dat do MS Excelu"

import pandas as pd
import math
import os
import warnings


# definuj začátek a konec pro hledání vstupních souborů
# např. verso_mis_p_rek_cerp_m_table_2007.xls ... verso_mis_p_rek_cerp_m_table_2023.xls (názvy je nutné po exportu z mis.cvut.cz upravit)
year_start = 2007
year_end = 2030

# prefix názvu souboru - "verso_mis_p_rek_cerp_m_table" je výchozí název v mis.cvut.cz
filename_prefix = "verso_mis_p_rek_cerp_m_table_"

# definice čísel TA pro identifikaci, co je čerpáno z katedry (a zbytek se bere, ze je z "projektů")
group_department = [101, 122, 888]

# název výstupního xlsx souboru
excel_file_path = 'output.xlsx'


# ignoruj varování při neoptimalní práci s pandas :D
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

# kód:
years = pd.DataFrame()
def read_data_from_xls():
    global years, filename_prefix
    bl = []
    first = True
    for year in range(year_start, year_end+1):
        filename = filename_prefix+str(year)+".xls"
        if os.path.exists(filename):
            years.at['Y', year] = True
            print(f"The file '{filename}' exists")
            df = pd.read_excel(filename, sheet_name="Sheet1")
            df = pd.DataFrame(df)
            df['ROK'] = df.apply(lambda row: year, axis=1)
            df['OSC'] = df.apply(lambda row: math.floor(row['OSC_PV']), axis=1)
            if first:
                 bl = df
                 first = False
            else:
                 bl = pd.concat([bl, df])
        else:
            years.at['Y', year] = False
    return bl

# najdi seznam unikátních zaměstnanců podle OSC (osobní čísla zaměstnanců)
def read_empl(bl):
    empl = pd.DataFrame( bl['OSC'].unique() )
#    print(empl)
#    empl = pd.DataFrame( bl['OSC'].unique() )
    for idx, idname in empl.iterrows():
        e1 = pd.DataFrame( bl[bl['OSC'] == idname[0]] )
        empl.at[idx, 'OSC'] = e1.OSC.iloc[0]
        empl.at[idx, 'PRIJMENI'] = e1.PRIJMENI.iloc[0]
        empl.at[idx, 'JMENO'] = e1.JMENO.iloc[0]
        empl.at[idx, 'DRUHPOM'] = e1.DRUHPOM.iloc[0]
    empl = empl.sort_values(by='PRIJMENI')
#    empl = empl.drop_duplicates(subset=['PRIJMENI', 'JMENO'])
    return empl

# najdi seznam unikátních zakázek podle IDZAKAZKY
def read_zakz_from_bl(bl):
    zakz = pd.DataFrame( bl['IDZAKAZKY'].unique() )
    for idx, idname in zakz.iterrows():
        e1 = pd.DataFrame( bl[bl['IDZAKAZKY'] == idname[0]] )
#        print(e1)
        zakz.at[idx, 'TA'] = str(e1.TA.iloc[0])
#        zakz.at[idx, 'KOD'] = e1.KOD.iloc[0]
        zakz.at[idx, 'AKCE'] = str(e1.AKCE.iloc[0])
    zakz = zakz.sort_values(by='TA')
    return zakz

bl = read_data_from_xls()
#print(bl)
empl = read_empl(bl)
zakz = read_zakz_from_bl(bl)
#print(empl)

# veškerá těžká práce se rovnou  exportuje do xlsx
with pd.ExcelWriter(excel_file_path, engine='xlsxwriter') as writer:
    # 1 sheet: total
    print("creating xls sheet SUMY")
    out = pd.DataFrame()
    iter=0
    for idx_zak, zak in zakz.iterrows():
        out.at[iter, 'TA'] = zak['TA']
        out.at[iter, 'AKCE'] = zak['AKCE']
        # filter conditions
        # yearly
        for year in range(year_start, year_end+1):
            if years.at['Y', year]==True:
                out.at[iter, str(year)] = bl[ (bl['IDZAKAZKY']==zak[0]) & (bl['ROK']==year) ].CASTKA.sum()
        out.at[iter,'EMPTY1'] = ""
        # monthly
        for year in range(year_start, year_end+1):
            if years.at['Y', year]==True:
                for month in range(1,13):
                    out.at[iter, str(year)+"_"+str(month)] = bl[ (bl['IDZAKAZKY']==zak[0]) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
        iter+=1
    out.at[iter, 'TA'] = ""
    out.at[iter, 'AKCE'] = ""
    iter+=1 # add empty space
    # sum_department
    out.at[iter, 'TA'] = "suma za katedru"
    out.at[iter, 'AKCE'] = ""
    # yearly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            out.at[iter, str(year)] = bl[(bl['TA'].isin(group_department)) & (bl['ROK']==year)].CASTKA.sum()
    out.at[iter,'EMPTY1'] = ""
    # monthly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            for month in range(1,13):
                out.at[iter, str(year)+"_"+str(month)] = bl[(bl['TA'].isin(group_department)) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
            out.at[iter,'EMPTY_'+str(year-year_start)] = ""
    # sum_non_department
    iter+=1
    out.at[iter, 'TA'] = "z projektů a ostatních zdrojů"
    out.at[iter, 'AKCE'] = ""
    # yearly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            out.at[iter, str(year)] = bl[(bl['TA'].isin(group_department)==False) & (bl['ROK']==year)].CASTKA.sum()
    out.at[iter,'EMPTY1'] = ""
    # monthly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            for month in range(1,13):
                out.at[iter, str(year)+"_"+str(month)] = bl[(bl['TA'].isin(group_department)==False) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
            out.at[iter,'EMPTY_'+str(year-year_start)] = ""
    iter+=1
    out.at[iter, 'TA'] = ""
    out.at[iter, 'AKCE'] = ""
    iter+=1 # add empty space
    # sum all
    out.at[iter, 'TA'] = "celkem"
    out.at[iter, 'AKCE'] = ""
    # yearly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            out.at[iter, str(year)] = bl[(bl['ROK']==year)].CASTKA.sum()
    out.at[iter,'EMPTY1'] = ""
    # monthly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            for month in range(1,13):
                out.at[iter, str(year)+"_"+str(month)] = bl[(bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
            out.at[iter,'EMPTY_'+str(year-year_start)] = ""

    out.to_excel(writer, index=False, sheet_name="SUMY")


    # výpis čerpání ze zakázek po lidech a rozdělení zakázek na "za katedru" podle group_department a na "zbytek"
    print("creating xls sheet LIDE")
    out = pd.DataFrame()
    iter=0
    out.at[iter,'PRIJMENI']="za katedru"
    out.at[iter,'JMENO']=""
    # yearly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            out.at[iter, str(year)] = bl[ (bl['TA'].isin(group_department)) & (bl['ROK']==year) ].CASTKA.sum()
    out.at[iter,'EMPTY1'] = ""
    # monthly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            for month in range(1,13):
                out.at[iter, str(year)+"_"+str(month)] = bl[ (bl['TA'].isin(group_department)) & (bl['ROK']==year) & (bl['MESIC']==month) ].CASTKA.sum()
            out.at[iter,'EMPTY_'+str(year-year_start)] = ""
    iter+=1
    out.at[iter,'PRIJMENI']=""
    out.at[iter,'JMENO']=""
    iter+=1
    for idx_em, em in empl.iterrows():
        out.at[iter, 'PRIJMENI']=em['PRIJMENI']
        out.at[iter, 'JMENO'] = em['JMENO']
        # yearly
        for year in range(year_start, year_end+1):
            if years.at['Y', year]==True:
#                out.at[iter, str(year)] = bl[(bl['TA'].isin(group_department)) & (bl['PRIJMENI']==em['PRIJMENI']) & (bl['JMENO']==em['JMENO']) & (bl['ROK']==year)].CASTKA.sum()
                out.at[iter, str(year)] = bl[(bl['TA'].isin(group_department)) & (bl['OSC']==em['OSC']) & (bl['ROK']==year)].CASTKA.sum()
        out.at[iter,'EMPTY1'] = ""
        # monthly
        for year in range(year_start, year_end+1):
            if years.at['Y', year]==True:
                for month in range(1,13):
#                    out.at[iter, str(year)+"_"+str(month)] = bl[(bl['TA'].isin(group_department)) & (bl['PRIJMENI']==em['PRIJMENI']) & (bl['JMENO']==em['JMENO']) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
                    out.at[iter, str(year)+"_"+str(month)] = bl[(bl['TA'].isin(group_department)) & (bl['OSC']==em['OSC']) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
                out.at[iter,'EMPTY_'+str(year-year_start)] = ""
        iter+=1
    out.at[iter,'PRIJMENI']=""
    out.at[iter,'JMENO']=""
    iter+=1
    out.at[iter,'PRIJMENI']="z projektů a ostatních zdrojů"
    out.at[iter,'JMENO']=""
    # yearly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            out.at[iter, str(year)] = bl[(bl['TA'].isin(group_department)==False) & (bl['ROK']==year)].CASTKA.sum()
    out.at[iter,'EMPTY1'] = ""
    # monthly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            for month in range(1,13):
                out.at[iter, str(year)+"_"+str(month)] = bl[(bl['TA'].isin(group_department)==False) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
            out.at[iter,'EMPTY_'+str(year-year_start)] = ""
    iter+=1
    out.at[iter,'PRIJMENI']=""
    out.at[iter,'JMENO']=""
    iter+=1
    for idx_em, em in empl.iterrows():
        out.at[iter, 'PRIJMENI'] = em['PRIJMENI']
        out.at[iter, 'JMENO'] = em['JMENO']
        # yearly
        for year in range(year_start, year_end+1):
            if years.at['Y', year]==True:
                out.at[iter, str(year)] = bl[(bl['TA'].isin(group_department)==False) & (bl['OSC']==em['OSC']) & (bl['ROK']==year)].CASTKA.sum()
        out.at[iter,'EMPTY1'] = ""
        # monthly
        for year in range(year_start, year_end+1):
            if years.at['Y', year]==True:
                for month in range(1,13):
                    out.at[iter, str(year)+"_"+str(month)] = bl[(bl['TA'].isin(group_department)==False) & (bl['OSC']==em['OSC']) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
                out.at[iter,'EMPTY_'+str(year-year_start)] = ""
        iter+=1
    out.to_excel(writer, index=False, sheet_name="LIDE")


    """
    # rozdělení čerpání zdrojů po lidech na zdroje z katedry a mimo katedru
    print("creating xls sheet LIDE_1")
    out = pd.DataFrame()
    iter=0
    out.at[iter,'PRIJMENI']="za katedru"
    out.at[iter,'JMENO']=""
    # yearly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            out.at[iter, str(year)] = bl[(bl['DRUHPOM']==1) & (bl['TA'].isin(group_department)) & (bl['ROK']==year)].CASTKA.sum()
    out.at[iter,'EMPTY1'] = ""
    # monthly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            for month in range(1,13):
                out.at[iter, str(year)+"_"+str(month)] = bl[(bl['DRUHPOM']==1) & (bl['TA'].isin(group_department)) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
            out.at[iter,'EMPTY_'+str(year-year_start)] = ""
    iter+=1
    out.at[iter,'PRIJMENI']=""
    out.at[iter,'JMENO']=""
    iter+=1
    for idx_em, em in empl.iterrows():
        if em['DRUHPOM']==1:
            out.at[iter, 'PRIJMENI'] = em['PRIJMENI']
            out.at[iter, 'JMENO'] = em['JMENO']
            # yearly
            for year in range(year_start, year_end+1):
                if years.at['Y', year]==True:
                    out.at[iter, str(year)] = bl[(bl['DRUHPOM']==1) & (bl['TA'].isin(group_department)) & (bl['OSC']==em['OSC']) & (bl['ROK']==year)].CASTKA.sum()
            out.at[iter,'EMPTY1'] = ""
            # monthly
            for year in range(year_start, year_end+1):
                if years.at['Y', year]==True:
                    for month in range(1,13):
                        out.at[iter, str(year)+"_"+str(month)] = bl[(bl['DRUHPOM']==1) & (bl['TA'].isin(group_department)) & (bl['OSC']==em['OSC']) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
                    out.at[iter,'EMPTY_'+str(year-year_start)] = ""
            iter+=1
    out.at[iter,'PRIJMENI']=""
    out.at[iter,'JMENO']=""
    iter+=1
    out.at[iter,'PRIJMENI']="z projektů a ostatních zdrojů"
    out.at[iter,'JMENO']=""
    # yearly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            out.at[iter, str(year)] = bl[(bl['DRUHPOM']==1) & (bl['TA'].isin(group_department)==False) & (bl['ROK']==year)].CASTKA.sum()
    out.at[iter,'EMPTY1'] = ""
    # monthly
    for year in range(year_start, year_end+1):
        if years.at['Y', year]==True:
            for month in range(1,13):
                out.at[iter, str(year)+"_"+str(month)] = bl[(bl['DRUHPOM']==1) & (bl['TA'].isin(group_department)==False) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
            out.at[iter,'EMPTY_'+str(year-year_start)] = ""
    iter+=1
    out.at[iter,'PRIJMENI']=""
    out.at[iter,'JMENO']=""
    iter+=1
    for idx_em, em in empl.iterrows():
        if em['DRUHPOM']==1:
            out.at[iter, 'PRIJMENI'] = em['PRIJMENI']
            out.at[iter, 'JMENO'] = em['JMENO']
            # yearly
            for year in range(year_start, year_end+1):
                if years.at['Y', year]==True:
                    out.at[iter, str(year)] = bl[(bl['TA'].isin(group_department)==False) & (bl['PRIJMENI']==em['PRIJMENI']) & (bl['JMENO']==em['JMENO']) & (bl['ROK']==year)].CASTKA.sum()
            out.at[iter,'EMPTY1'] = ""
            # monthly
            for year in range(year_start, year_end+1):
                if years.at['Y', year]==True:
                    for month in range(1,13):
                        out.at[iter, str(year)+"_"+str(month)] = bl[(bl['TA'].isin(group_department)==False) & (bl['PRIJMENI']==em['PRIJMENI']) & (bl['JMENO']==em['JMENO']) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
                    out.at[iter,'EMPTY_'+str(year-year_start)] = ""
            iter+=1
    out.to_excel(writer, index=False, sheet_name="LIDE_1")
    """

    # nyní se pro každého zaměstnance vytvoří samostatný list v xlsx a do něj nasypou všechny sumy za každou zakázku
    for idx_em, em in empl.iterrows():
        print("creating xls sheet " + str(em['PRIJMENI']) + "_" + str(em['JMENO']))
        out = pd.DataFrame()
        iter=0
        for idx_zak, zak in zakz.iterrows():
            out.at[iter, 'TA'] = zak['TA']
            out.at[iter, 'AKCE'] = zak['AKCE']
            # yearly
            for year in range(year_start, year_end+1):
                if years.at['Y', year]==True:
                    out.at[iter, str(year)] = bl[ (bl['IDZAKAZKY']==zak[0]) & (bl['PRIJMENI']==em['PRIJMENI']) & (bl['JMENO']==em['JMENO']) & (bl['ROK']==year)].CASTKA.sum()
            out.at[iter,'EMPTY1'] = ""
            # monthly
            for year in range(year_start, year_end+1):
                if years.at['Y', year]==True:
                    for month in range(1,13):
                        out.at[iter, str(year)+"_"+str(month)] = bl[ (bl['IDZAKAZKY']==zak[0]) & (bl['PRIJMENI']==em['PRIJMENI']) & (bl['JMENO']==em['JMENO']) & (bl['ROK']==year) & (bl['MESIC']==month)].CASTKA.sum()
                    out.at[iter,'EMPTY_'+str(year-year_start)] = ""
            iter+=1
        out.to_excel(writer, index=False, sheet_name=str(em['PRIJMENI']) + "_" + str(em['JMENO']))