from pathlib import Path
import time
import parser
import argparse
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")

parser = argparse.ArgumentParser(description="Process some integers.")

parser.add_argument("path", help="define the directory to folder/file")
parser.add_argument("--verbose", help="display processing information")

start = time.time()


def main(path_xlsm, verbose):
    if (".xlsm" in str(path_xlsm).lower()) and path_xlsm.is_file():
        xlsm_files = [Path(path_xlsm)]
    else:
        xlsm_files = list(Path(path_xlsm).glob("*.xlsm"))

    df = pd.DataFrame()

    for fn in xlsm_files:
        all_dfs = pd.read_excel(fn, sheet_name=None, header=None, engine="openpyxl")
        list_data = all_dfs.keys()
        all_dfs.pop("Lookups", None)
        all_dfs.pop("Instructions For Use", None)

        df2 = pd.DataFrame()

        for ws in list_data:  # Looping for excel sheet
            df1 = all_dfs[ws]
            new_row = {
                "PlodDate": df1.iloc[2, 8],
                "PlodShift": df1.iloc[3, 8],
                "ContractNo": df1.iloc[2, 3],
                "RigNo": df1.iloc[2, 6],
                "Department": df1.iloc[3, 6],
                "DayType": df1.iloc[0, 13],
                "Location": df1.iloc[4, 3],
                "Comments": df1.iloc[119, 0],
                "MachineHoursFrom": df1.iloc[2, 13],
                "MachineHoursTo": df1.iloc[3, 13],
                "DataSource": Path(fn.name),
            }
            df2 = df2.append(new_row, ignore_index=True)
            df2 = df2[
                [
                    "PlodDate",
                    "PlodShift",
                    "ContractNo",
                    "RigNo",
                    "Department",
                    "DayType",
                    "Location",
                    "Comments",
                    "MachineHoursFrom",
                    "MachineHoursTo",
                    "DataSource",
                ]
            ]
            df2.drop_duplicates(
                subset=[
                    "PlodDate",
                    "PlodShift",
                    "ContractNo",
                    "RigNo",
                    "Department",
                    "DayType",
                    "Location",
                    "Comments",
                    "MachineHoursFrom",
                    "MachineHoursTo",
                ],
                keep="last",
                inplace=True,
            )
            df2 = df2.replace(",", ";", regex=True)
            df2 = df2.replace("\n", " ", regex=True)
            df2 = df2.replace("\r", " ", regex=True)
            df2 = df2.replace("\t", " ", regex=True)
        df = df.append(df2)
    df.to_csv("RDPlod.csv", index=False)


if __name__ == "__main__":
    start = time.time()
    args = parser.parse_args()
    path = Path(args.path)
    verbose = args.verbose
    main(path, verbose)  # Calling Main Function
    print("Processed time:", time.time() - start)  # Total Time