![]() |
|
python Read each xlsx file and write it into csv with pipe delimiter - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: General Coding Help (https://python-forum.io/forum-8.html) +--- Thread: python Read each xlsx file and write it into csv with pipe delimiter (/thread-40633.html) |
python Read each xlsx file and write it into csv with pipe delimiter - mg24 - Aug-29-2023 Hi Team, I am trying to Read xlsx file and I want to write it into CSV File with pipe delimited.all decimal values wanted to round up 6 decimal places. Challenges I am facing for Negative Numbers xlsx values -2816151988.789466 in CSV File values are getting written as , negative symbol getting missed. 2816151988.789466 how to fix it. Thanks mg import os
import pandas as pd
# Specify the source directory containing the xlsx files
xlsx_directory = "path/to/source/directory"
# Specify the destination directory for saving the csv files
csv_destination = "path/to/destination/directory"
# Create the destination directory if it doesn't exist
if not os.path.exists(csv_destination):
os.makedirs(csv_destination)
# Get a list of all xlsx files in the source directory
xlsx_files = [f for f in os.listdir(xlsx_directory) if f.endswith('.xlsx')]
# Loop through the xlsx files
for xlsx_file in xlsx_files:
# Construct the full path of the xlsx file
xlsx_path = os.path.join(xlsx_directory, xlsx_file)
# Load the xlsx file into a pandas DataFrame
df = pd.read_excel(xlsx_path)
# Construct the path for the CSV file in the destination directory
csv_file = os.path.splitext(xlsx_file)[0] + '.csv'
csv_path = os.path.join(csv_destination, csv_file)
# Save the DataFrame as a CSV file with pipe delimiter
df.to_csv(csv_path, sep='|', index=False, float_format='%.6f')
print(f"Converted {xlsx_file} to {csv_file}")
RE: python Read each xlsx file and write it into csv with pipe delimiter - Pedroski55 - Aug-30-2023 Just a suggestion, I don't know pandas very well: Using pandas save the excel as csv. df.to_csv(savepathXL+savenameCSV, index=False)Then you have a text file. Open the csv as a text file and change things using re, or .replace() Pandas docs here. Found this on stackoverflow [attachment=2513] RE: python Read each xlsx file and write it into csv with pipe delimiter - carecavoador - Aug-30-2023 This is the way... And to use the pipe | as the separator just pass '|' as the sep argument to df.to_csv():df.to_csv(savepathXL+savenameCSV, index=False, sep='|') RE: python Read each xlsx file and write it into csv with pipe delimiter - deanhystad - Aug-30-2023 Is there a negative number to print? Verify that df has a negative number. If so, the problem is writing the csv. Try writing without the format. If there is not a negative number in df, verify there is a negative number in the spreadsheet. Your code works for me. RE: python Read each xlsx file and write it into csv with pipe delimiter - mg24 - Nov-09-2023 Hi Team, This is code I am using for cosolidation of xlsx files into Main file, Do ou see any error or suggest please from pathlib import Path
import pandas as pd
foldername = "your_folder_name"
output_folder = "output_folder" # Define the folder where you want to save the output CSV files
extra_header = "IFRS"
columns_to_fill = ['year1', 'year2', 'year3', 'year4', 'year5', 'Total']
files_combine = Path(f"{foldername}").rglob("*.xlsx")
header_written = False
for input_xls in files_combine:
# Attempt to read from the 'Data' sheet; if it doesn't exist, read the first active sheet
try:
df = pd.read_excel(input_xls, sheet_name='Data')
# Check if the header contains "reports"
if 'reports' in df.columns.str.lower():
df_dict = {input_xls.name: df}
else:
raise ValueError("Header does not contain 'reports'")
except Exception:
df_dict = pd.read_excel(input_xls, sheet_name=None, engine='openpyxl') # Read all sheets into a dictionary
# Select the first non-empty sheet as the active sheet
for sheet_name, sheet_data in df_dict.items():
if not sheet_data.empty:
# Check if the header contains "reports"
if 'reports' in sheet_data.columns.str.lower():
df = sheet_data
df_dict = {input_xls.name: df}
break
else:
raise ValueError("Header does not contain 'reports'")
# Check if "IFRS" column already exists, and if not, add it
if 'IFRS' not in df.columns:
df['IFRS'] = extra_header
# Replace blank cells in the specified columns with 0
df[columns_to_fill] = df[columns_to_fill].fillna(0)
# Convert 'reporting date' column to yyyy-mm-dd format
df['reporting date'] = pd.to_datetime(df['reporting date']).dt.strftime('%Y-%m-%d')
# Extract the existing headers from the first row
existing_headers = list(df.columns)
# Add the extra header "IFRS" to the list of existing headers
existing_headers.append(extra_header)
# Update the DataFrame columns with the new list of headers
df.columns = existing_headers
# Insert an additional column for 'FileName' in the DataFrame
df.insert(0, 'FileName', input_xls.name)
# Define the output CSV file name based on the input file name
output_csv_file = f"{output_folder}/{input_xls.stem}_combined.csv"
# Check if the header has been written, and if not, write it
if not header_written:
header_written = True
df.to_csv(output_csv_file, index=False, sep='\t', encoding='utf-8', float_format='%.6f', header=True, mode='a')
else:
# Append the workbook data to the CSV file without writing the header again
df.to_csv(output_csv_file, index=False, sep='\t', encoding='utf-8', float_format='%.6f', header=False, mode='a')Thanksmg |