![]() |
python Read each xlsx file and write it into csv with pipe delimiter - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: General Coding Help (https://python-forum.io/forum-8.html) +--- Thread: python Read each xlsx file and write it into csv with pipe delimiter (/thread-40633.html) |
python Read each xlsx file and write it into csv with pipe delimiter - mg24 - Aug-29-2023 Hi Team, I am trying to Read xlsx file and I want to write it into CSV File with pipe delimited.all decimal values wanted to round up 6 decimal places. Challenges I am facing for Negative Numbers xlsx values -2816151988.789466 in CSV File values are getting written as , negative symbol getting missed. 2816151988.789466 how to fix it. Thanks mg import os import pandas as pd # Specify the source directory containing the xlsx files xlsx_directory = "path/to/source/directory" # Specify the destination directory for saving the csv files csv_destination = "path/to/destination/directory" # Create the destination directory if it doesn't exist if not os.path.exists(csv_destination): os.makedirs(csv_destination) # Get a list of all xlsx files in the source directory xlsx_files = [f for f in os.listdir(xlsx_directory) if f.endswith('.xlsx')] # Loop through the xlsx files for xlsx_file in xlsx_files: # Construct the full path of the xlsx file xlsx_path = os.path.join(xlsx_directory, xlsx_file) # Load the xlsx file into a pandas DataFrame df = pd.read_excel(xlsx_path) # Construct the path for the CSV file in the destination directory csv_file = os.path.splitext(xlsx_file)[0] + '.csv' csv_path = os.path.join(csv_destination, csv_file) # Save the DataFrame as a CSV file with pipe delimiter df.to_csv(csv_path, sep='|', index=False, float_format='%.6f') print(f"Converted {xlsx_file} to {csv_file}") RE: python Read each xlsx file and write it into csv with pipe delimiter - Pedroski55 - Aug-30-2023 Just a suggestion, I don't know pandas very well: Using pandas save the excel as csv. df.to_csv(savepathXL+savenameCSV, index=False)Then you have a text file. Open the csv as a text file and change things using re, or .replace() Pandas docs here. Found this on stackoverflow [attachment=2513] RE: python Read each xlsx file and write it into csv with pipe delimiter - carecavoador - Aug-30-2023 This is the way... And to use the pipe | as the separator just pass '|' as the sep argument to df.to_csv() :df.to_csv(savepathXL+savenameCSV, index=False, sep='|') RE: python Read each xlsx file and write it into csv with pipe delimiter - deanhystad - Aug-30-2023 Is there a negative number to print? Verify that df has a negative number. If so, the problem is writing the csv. Try writing without the format. If there is not a negative number in df, verify there is a negative number in the spreadsheet. Your code works for me. RE: python Read each xlsx file and write it into csv with pipe delimiter - mg24 - Nov-09-2023 Hi Team, This is code I am using for cosolidation of xlsx files into Main file, Do ou see any error or suggest please from pathlib import Path import pandas as pd foldername = "your_folder_name" output_folder = "output_folder" # Define the folder where you want to save the output CSV files extra_header = "IFRS" columns_to_fill = ['year1', 'year2', 'year3', 'year4', 'year5', 'Total'] files_combine = Path(f"{foldername}").rglob("*.xlsx") header_written = False for input_xls in files_combine: # Attempt to read from the 'Data' sheet; if it doesn't exist, read the first active sheet try: df = pd.read_excel(input_xls, sheet_name='Data') # Check if the header contains "reports" if 'reports' in df.columns.str.lower(): df_dict = {input_xls.name: df} else: raise ValueError("Header does not contain 'reports'") except Exception: df_dict = pd.read_excel(input_xls, sheet_name=None, engine='openpyxl') # Read all sheets into a dictionary # Select the first non-empty sheet as the active sheet for sheet_name, sheet_data in df_dict.items(): if not sheet_data.empty: # Check if the header contains "reports" if 'reports' in sheet_data.columns.str.lower(): df = sheet_data df_dict = {input_xls.name: df} break else: raise ValueError("Header does not contain 'reports'") # Check if "IFRS" column already exists, and if not, add it if 'IFRS' not in df.columns: df['IFRS'] = extra_header # Replace blank cells in the specified columns with 0 df[columns_to_fill] = df[columns_to_fill].fillna(0) # Convert 'reporting date' column to yyyy-mm-dd format df['reporting date'] = pd.to_datetime(df['reporting date']).dt.strftime('%Y-%m-%d') # Extract the existing headers from the first row existing_headers = list(df.columns) # Add the extra header "IFRS" to the list of existing headers existing_headers.append(extra_header) # Update the DataFrame columns with the new list of headers df.columns = existing_headers # Insert an additional column for 'FileName' in the DataFrame df.insert(0, 'FileName', input_xls.name) # Define the output CSV file name based on the input file name output_csv_file = f"{output_folder}/{input_xls.stem}_combined.csv" # Check if the header has been written, and if not, write it if not header_written: header_written = True df.to_csv(output_csv_file, index=False, sep='\t', encoding='utf-8', float_format='%.6f', header=True, mode='a') else: # Append the workbook data to the CSV file without writing the header again df.to_csv(output_csv_file, index=False, sep='\t', encoding='utf-8', float_format='%.6f', header=False, mode='a')Thanks mg |