Posts: 11,887
Threads: 474
Joined: Sep 2016
DeaD_EyE Wrote:Pandas feels for me like numpy + black magic.
Pandas is powerful, but what I see is, that people without knowledge about the Core Language Python itself trying to work with this framework.
FYI: I use pandas because I can often save writing a huge amount of code
Here's a snippet which loads an entire directory of .csv files into a database:
def load_all(self):
datafiles = self.fpath.ourairport_datafiles
filelist = [fn for fn in datafiles.iterdir() if fn.is_file() and fn.suffix == '.csv']
for fn in filelist:
tablename = fn.stem.replace('-', '_')
df = pd.read_csv(fn)
print(f"engine: {self.Model.engine}")
df.to_sql(tablename, con=self.Model.engine, if_exists='replace')
Posts: 877
Threads: 134
Joined: Jul 2017
Aug-25-2022, 01:49 AM
(This post was last modified: Aug-25-2022, 01:49 AM by Pedroski55.)
Why bother with csv?
Just do this directly from Excel to Excel, takes 1 second!
试一试刘先生!
def myApp():
import openpyxl
import os
# these 4 can help you format cells in openpyxl
# check the documentation online
from openpyxl.styles import PatternFill
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment
path2files = '/home/pedro/myPython/openpyxl/'
savepath = '/home/pedro/myPython/openpyxl/failures.xlsx'
# open the file
XLfiles = os.listdir(path2files)
for file in XLfiles:
if file.endswith('.xlsx'):
print('The files are:', file)
my_file = input('Copy and paste the SOURCE file you want here ... ')
sourceFile = openpyxl.load_workbook(path2files + my_file)
sourceFilesheets = sourceFile.sheetnames
# make a new XL
targetFile = openpyxl.Workbook()
# a new wb only has 1 sheet called Sheet
sheet = targetFile.active
sheet.title = 'failures'
targetFile.save(savepath)
targetFile = openpyxl.load_workbook(savepath)
targetFilesheets = targetFile.sheetnames
# get the rows with failed
count = 2
for sheet in sourceFilesheets:
print('sheet is', sheet)
maxRow = sourceFile[sheet].max_row
maxCol = sourceFile[sheet].max_column
for rowNum in range(5, maxRow + 1):
if sourceFile[sheet].cell(row=rowNum, column=6).value == 'Failed':
print('Failed row is', rowNum)
for colNum in range(1, maxCol + 1):
# write the headers to the target file
header = sourceFile[sheet].cell(row=2, column=colNum).value
targetFile['failures'].cell(row=1, column=colNum).value=header
cell_value = sourceFile[sheet].cell(row=rowNum, column=colNum).value
targetFile['failures'].cell(row=count, column=colNum).value=cell_value
count +=1
print('count is', count)
# paint the columns different colours
# put the row and column numbers you want
for sheet in targetFilesheets:
maxRow = targetFile[sheet].max_row
for rowNum in range(2, maxRow + 1):
targetFile[sheet].cell(row=rowNum, column=5).fill = PatternFill(start_color='D0FBA1', end_color='D0FBA1', fill_type = 'solid')
targetFile[sheet].cell(row=rowNum, column=6).fill = PatternFill(start_color='44FFFF', end_color='44FFFF', fill_type = 'solid')
targetFile[sheet].cell(row=rowNum, column=7).fill = PatternFill(start_color='ECA4FB', end_color='ECA4FB', fill_type = 'solid')
targetFile[sheet].cell(row=rowNum, column=8).fill = PatternFill(start_color='FBC7A4', end_color='FBC7A4', fill_type = 'solid')
targetFile[sheet].cell(row=rowNum, column=9).fill = PatternFill(start_color='D0F111', end_color='D0F111', fill_type = 'solid')
targetFile.save(savepath)
Posts: 11,887
Threads: 474
Joined: Sep 2016
The files are in CSV format, never were excel to start with.
Posts: 877
Threads: 134
Joined: Jul 2017
Open the csv file in XL, save as XL! 2 clicks!
XL is the required output format.
Posts: 41
Threads: 15
Joined: Aug 2022
(Aug-25-2022, 09:04 AM)Pedroski55 Wrote: Open the csv file in XL, save as XL! 2 clicks!
XL is the required output format.
You reply is very useful. If it's not too much effort, lease help me add limit check when assign limit in sheet. I have tried didn't got it.
def myApp():
import openpyxl
import os
# these 4 can help you format cells in openpyxl
# check the documentation online
from openpyxl.styles import PatternFill
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment
# path2files = '/home/pedro/myPython/openpyxl/'
# savepath = '/home/pedro/myPython/openpyxl/failures.xlsx'
index_number = [13, 14, 15, 16,23, 27, 33, 42, 43, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 61, 63,
65, 67, 75, 81, 86, 87, 92, 93, 98, 99, 103,
105, 109, 111, 115, 117, 121, 123, 127, 129, 133, 135, 139, 141, 145, 147, 153, 174, 178, 205, 206,
207, 208, 209, 211, 213, 215, 216, 217,
219, 240, 241, 242, 243, 244, 248, 249, 264, 265, 271, 272, 276, 278, 282, 284, 293, 300, 302, 305,
307, 310, 311, 312, 313, 314, 315, 316,
317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335,
336, 337, 338, 339, 340, 341, 342, 343,
344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363,
364, 365, 366, 367, 368, 369, 370, 371,
372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384,385]
path2files = 'C:/Users/Sam/pythonProject/'
savepath = 'failures.xlsx'
# open the file
XLfiles = os.listdir(path2files)
for file in XLfiles:
if file.endswith('.xlsx'):
print('The files are:', file)
my_file = input('Copy and paste the SOURCE file you want here ... ')
sourceFile = openpyxl.load_workbook(path2files + my_file)
sourceFilesheets = sourceFile.sheetnames
# make a new XL
targetFile = openpyxl.Workbook()
# a new wb only has 1 sheet called Sheet
sheet = targetFile.active
sheet.title = 'failures'
targetFile.save(savepath)
targetFile = openpyxl.load_workbook(savepath)
targetFilesheets = targetFile.sheetnames
# get the rows with failed
count = 5
for sheet in sourceFilesheets:
print('sheet is:', sheet)
maxRow = sourceFile[sheet].max_row
maxCol = sourceFile[sheet].max_column
for rowNum in range(5, maxRow + 1):
if sourceFile[sheet].cell(row=rowNum, column=6).value == 'Failed':
print('Failed row is:', rowNum)
for colNum in range(1, maxCol + 1):
# write the headers to the target file
header1 = sourceFile[sheet].cell(row=1, column=colNum).value
header2 = sourceFile[sheet].cell(row=2, column=colNum).value
header3 = sourceFile[sheet].cell(row=3, column=colNum).value
header4 = sourceFile[sheet].cell(row=4, column=colNum).value
targetFile['failures'].cell(row=1, column=colNum).value = header1
targetFile['failures'].cell(row=2, column=colNum).value = header2
targetFile['failures'].cell(row=3, column=colNum).value = header3
targetFile['failures'].cell(row=4, column=colNum).value = header4
cell_value = sourceFile[sheet].cell(row=rowNum, column=colNum).value
targetFile['failures'].cell(row=count, column=colNum).value = cell_value
count += 1
print('count is:', count)
# paint the columns different colours
# put the row and column numbers you want
for sheet in targetFilesheets:
maxRow = targetFile[sheet].max_row
for rowNum in range(5, maxRow + 1):
targetFile[sheet].cell(row=rowNum, column=5).fill = PatternFill(start_color='D0FBA1', end_color='D0FBA1',
fill_type='solid')
targetFile[sheet].cell(row=rowNum, column=6).fill = PatternFill(start_color='44FFFF', end_color='44FFFF',
fill_type='solid')
targetFile[sheet].cell(row=rowNum, column=7).fill = PatternFill(start_color='ECA4FB', end_color='ECA4FB',
fill_type='solid')
targetFile[sheet].cell(row=rowNum, column=8).fill = PatternFill(start_color='FBC7A4', end_color='FBC7A4',
fill_type='solid')
targetFile[sheet].cell(row=rowNum, column=9).fill = PatternFill(start_color='D0F111', end_color='D0F111',
fill_type='solid')
targetFile.save(savepath)
myApp()
Posts: 877
Threads: 134
Joined: Jul 2017
Aug-25-2022, 01:54 PM
(This post was last modified: Aug-25-2022, 01:54 PM by Pedroski55.)
Quote:If it's not too much effort, lease help me add limit check when assign limit in sheet. I have tried didn't got it.
Sorry, I don't understand what you want. Please explain.
I understand you want all rows that contain "Failed".
Tell me what columns you want to save to the targetFile.
And what is the list index_number? Are those the column numbers you want save to targetFile?
Posts: 41
Threads: 15
Joined: Aug 2022
(Aug-25-2022, 01:54 PM)Pedroski55 Wrote: Quote:If it's not too much effort, lease help me add limit check when assign limit in sheet. I have tried didn't got it.
Sorry, I don't understand what you want. Please explain.
I understand you want all rows that contain "Failed".
Tell me what columns you want to save to the targetFile.
And what is the list index_number? Are those the column numbers you want save to targetFile?
I appreciated your prompt reply.index_number is mean that column has a limit and needs to check if out of range and then paint red color as attached, thanks!
Posts: 877
Threads: 134
Joined: Jul 2017
Aug-26-2022, 08:13 AM
(This post was last modified: Aug-26-2022, 08:13 AM by Pedroski55.)
Aha, now I understand!
There was problem with column 310. Rows 3 and 4 both contain values, but I got a TypeError
Quote:column is 310
Traceback (most recent call last):
File "/usr/lib/python3.8/idlelib/run.py", line 559, in runcode
exec(code, self.locals)
File "<pyshell#48>", line 10, in <module>
File "<pyshell#42>", line 6, in checkCol
TypeError: '<' not supported between instances of 'NoneType' and 'float'
Quote:>>> value1 = targetFile['failures'].cell(row=3, column=310).value
>>> value2 = targetFile['failures'].cell(row=4, column=310).value
>>> value1
-39.19524256
>>> value2
-37.39370508
>>>
Can't explain that! So I put in a try: ..... except: TypeError
def myApp():
import openpyxl
import os
# these 4 can help you format cells in openpyxl
# check the documentation online
from openpyxl.styles import PatternFill
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment
path2file = '/home/pedro/myPython/openpyxl/log_data.xlsx'
savepath = '/home/pedro/myPython/openpyxl/failures.xlsx'
sourceFile = openpyxl.load_workbook(path2file)
sourceFilesheets = sourceFile.sheetnames
# make a new XL
targetFile = openpyxl.Workbook()
# a new wb only has 1 sheet called Sheet
sheet = targetFile.active
sheet.title = 'failures'
# copy rows 1 to 4 into the targetFile
# this will give you the limits, the headers and whatever row 1 is
for sheet in sourceFilesheets:
print('sheet is', sheet)
# copy the first 4 rows from sourceFile to targetFile
maxCol = sourceFile[sheet].max_column
for rowNum in range(1, 5):
for colNum in range(1, maxCol + 1):
# write the headers to the target file
data = sourceFile[sheet].cell(row=rowNum, column=colNum).value
targetFile['failures'].cell(row=rowNum, column=colNum).value=data
# save that, open it in Excel, check it and reopen in Python
targetFile.save(savepath)
targetFile = openpyxl.load_workbook(savepath)
targetFilesheets = targetFile.sheetnames
# get the rows with failed
# write to targetFile row 5 as first row
count = 5
for sheet in sourceFilesheets:
print('sheet is', sheet)
maxRow = sourceFile[sheet].max_row
maxCol = sourceFile[sheet].max_column
for rowNum in range(5, maxRow + 1):
if sourceFile[sheet].cell(row=rowNum, column=6).value == 'Failed':
print('Failed row is', rowNum)
for colNum in range(1, maxCol + 1):
cell_value = sourceFile[sheet].cell(row=rowNum, column=colNum).value
targetFile['failures'].cell(row=count, column=colNum).value=cell_value
count +=1
print('count is', count)
# got all the data for Failed rows and copied to targetFile
# now just check the limits and paint cells outside of the limits
# I got a strange TypeError for column 310 don't know why
# a function to find cells outside the limits and paint them
def checkCol(lim1, lim2, col):
maxRow = targetFile['failures'].max_row
try:
for rowNum in range(5, maxRow + 1):
print('column is', col)
column_value = targetFile['failures'].cell(row=rowNum, column=col).value
if column_value < lim1 or column_value > lim2:
print('found a value outside the limits', lim1, column_value, lim2)
targetFile[sheet].cell(row=rowNum, column=col).fill = PatternFill(start_color='D0FBA1', end_color='D0FBA1', fill_type = 'solid')
except: TypeError
# look through row 3 for cells with values not None
# pass the column number to checkCol(lim1, lim2, col)
for sheet in targetFilesheets:
maxCol = targetFile[sheet].max_column
# find a column with a limit
for colNum in range(3, maxCol + 1):
value1 = targetFile['failures'].cell(row=3, column=colNum).value
if not value1 == None:
# get the limits
value2 = targetFile['failures'].cell(row=4, column=colNum).value
#print('value1 is', value1, 'value2 is', value2)
checkCol(value1, value2, colNum)
# save the result so far
targetFile.save(savepath)
print('Cells with value outside of range limits marked with colour')
print('File saved to', savepath)
# format the XL file
# resize row height and cell width
def formatCells():
for sheet in targetFilesheets:
targetFile.active = targetFile[sheet]
for rowNum in range(1, targetFile.active.max_row + 1):
targetFile.active.row_dimensions[rowNum].height = 36 # about 1/2"
for sheet in targetFilesheets:
targetFile.active = targetFile[sheet]
for colNum in range(1, targetFile.active.max_column + 1):
targetFile.active.column_dimensions[get_column_letter(colNum)].width = 22 # about 1"
# set the fonts
ft1 = Font(name='Arial', size=20)
alignment = Alignment(horizontal='center',
vertical='bottom',
text_rotation=0,
wrap_text=False,
shrink_to_fit=True,
indent=0)
ft2 = Font(name='Arial', size=20, bold=True)
alignment2 = Alignment(horizontal='center',
vertical='bottom',
text_rotation=0,
wrap_text=True,
shrink_to_fit=True,
indent=0)
ft3 = Font(name='Arial', size=24, bold=True)
alignment3 = Alignment(horizontal='center',
vertical='center',
text_rotation=0,
wrap_text=False,
shrink_to_fit=True,
indent=0)
# set the fonts for all cells
for sheet in targetFilesheets:
targetFileActiveSheet = targetFile[sheet]
maxRow = targetFileActiveSheet.max_row
maxCol = targetFileActiveSheet.max_column
for colNum in range(2, maxCol + 1, 1):
for rowNum in range(1, maxRow + 1):
targetFileActiveSheet.cell(row=rowNum, column=colNum).font = ft1
targetFileActiveSheet.cell(row=rowNum, column=colNum).alignment = alignment
# set the font for row 1 and row 3
for sheet in targetFilesheets:
targetFileActiveSheet = targetFile[sheet]
maxRow = targetFileActiveSheet.max_row
maxCol = targetFileActiveSheet.max_column
for colNum in range(1, maxCol + 1, 1):
targetFileActiveSheet.cell(row=1, column=colNum).font = ft2
targetFileActiveSheet.cell(row=1, column=colNum).alignment = alignment2
targetFileActiveSheet.cell(row=3, column=colNum).font = ft2
targetFileActiveSheet.cell(row=3, column=colNum).alignment = alignment2
# set the font bigger for column 7, the end of term score
for sheet in targetFilesheets:
targetFileActiveSheet = targetFile[sheet]
maxRow = targetFileActiveSheet.max_row
for rowNum in range(1, maxRow + 1, 1):
targetFileActiveSheet.cell(row=rowNum, column=6).font = ft3
targetFileActiveSheet.cell(row=rowNum, column=6).alignment = alignment3
print('formatting finished ... !')
formatCells()
targetFile.save(savepath)
print('All done!')
Posts: 41
Threads: 15
Joined: Aug 2022
def myApp():
import openpyxl
import datetime
import os
# these 4 can help you format cells in openpyxl
# check the documentation online
from openpyxl.styles import PatternFill
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment
path2file = r'C:\Users\Sam\pythonProject\Folder_operations_CSV\writers.xlsx'
savepath = 'failures.xlsx'
curr_time = datetime.datetime.now()
print(curr_time)
sourceFile = openpyxl.load_workbook(path2file)
sourceFilesheets = sourceFile.sheetnames
# make a new XL
targetFile = openpyxl.Workbook()
# a new wb only has 1 sheet called Sheet
sheet = targetFile.active
sheet.title = 'failures'
# copy rows 1 to 4 into the targetFile
# this will give you the limits, the headers and whatever row 1 is
for sheet in sourceFilesheets:
print('sheet is', sheet)
# copy the first 4 rows from sourceFile to targetFile
maxCol = sourceFile[sheet].max_column
for rowNum in range(1, 5):
for colNum in range(1, maxCol + 1):
# write the headers to the target file
data = sourceFile[sheet].cell(row=rowNum, column=colNum).value
targetFile['failures'].cell(row=rowNum, column=colNum).value = data
# save that, open it in Excel, check it and reopen in Python
targetFile.save(savepath)
targetFile = openpyxl.load_workbook(savepath)
targetFilesheets = targetFile.sheetnames
# get the rows with failed
# write to targetFile row 5 as first row
count = 5
for sheet in sourceFilesheets:
print('sheet is', sheet)
maxRow = sourceFile[sheet].max_row
maxCol = sourceFile[sheet].max_column
for rowNum in range(5, maxRow + 1):
if sourceFile[sheet].cell(row=rowNum, column=6).value == 'Failed':
print('Failed row is', rowNum)
for colNum in range(1, maxCol + 1):
cell_value = sourceFile[sheet].cell(row=rowNum, column=colNum).value
targetFile['failures'].cell(row=count, column=colNum).value = cell_value
count += 1
print('count is', count)
# got all the data for Failed rows and copied to targetFile
# now just check the limits and paint cells outside of the limits
# I got a strange TypeError for column 310 don't know why
# a function to find cells outside the limits and paint them
def checkCol(lim1, lim2, col):
maxRow = targetFile['failures'].max_row
try:
for rowNum in range(5, maxRow + 1):
# print('column is', col)
column_value = targetFile['failures'].cell(row=rowNum, column=col).value
print('column is:', {rowNum},{col},{column_value},{lim1},{lim2})
if not column_value==None:
if column_value < lim1 or column_value > lim2:
print('found a value outside the limits', lim1, column_value, lim2)
targetFile[sheet].cell(row=rowNum, column=col).fill = PatternFill(start_color='D0FBA1',
end_color='D0FBA1',
fill_type='solid')
else:
rowNum+=1
except:
TypeError
# look through row 3 for cells with values not None
# pass the column number to checkCol(lim1, lim2, col)
for sheet in targetFilesheets:
maxCol = targetFile[sheet].max_column
# find a column with a limit
for colNum in range(13, maxCol + 1):
value1 = targetFile['failures'].cell(row=3, column=colNum).value
if not value1 == None:
# get the limits
value2 = targetFile['failures'].cell(row=4, column=colNum).value
# print('value1 is', value1, 'value2 is', value2)
checkCol(value1, value2, colNum)
# save the result so far
targetFile.save(savepath)
print('Cells with value outside of range limits marked with colour')
print('File saved to', savepath)
# format the XL file
# resize row height and cell width
def formatCells():
for sheet in targetFilesheets:
targetFile.active = targetFile[sheet]
for rowNum in range(1, targetFile.active.max_row + 1):
targetFile.active.row_dimensions[rowNum].height = 36 # about 1/2"
for sheet in targetFilesheets:
targetFile.active = targetFile[sheet]
for colNum in range(1, targetFile.active.max_column + 1):
targetFile.active.column_dimensions[get_column_letter(colNum)].width = 22 # about 1"
# set the fonts
ft1 = Font(name='Arial', size=20)
alignment = Alignment(horizontal='center',
vertical='bottom',
text_rotation=0,
wrap_text=False,
shrink_to_fit=True,
indent=0)
ft2 = Font(name='Arial', size=20, bold=True)
alignment2 = Alignment(horizontal='center',
vertical='bottom',
text_rotation=0,
wrap_text=True,
shrink_to_fit=True,
indent=0)
ft3 = Font(name='Arial', size=24, bold=True)
alignment3 = Alignment(horizontal='center',
vertical='center',
text_rotation=0,
wrap_text=False,
shrink_to_fit=True,
indent=0)
# set the fonts for all cells
for sheet in targetFilesheets:
targetFileActiveSheet = targetFile[sheet]
maxRow = targetFileActiveSheet.max_row
maxCol = targetFileActiveSheet.max_column
for colNum in range(2, maxCol + 1, 1):
for rowNum in range(1, maxRow + 1):
targetFileActiveSheet.cell(row=rowNum, column=colNum).font = ft1
targetFileActiveSheet.cell(row=rowNum, column=colNum).alignment = alignment
# set the font for row 1 and row 3
for sheet in targetFilesheets:
targetFileActiveSheet = targetFile[sheet]
maxRow = targetFileActiveSheet.max_row
maxCol = targetFileActiveSheet.max_column
for colNum in range(1, maxCol + 1, 1):
targetFileActiveSheet.cell(row=1, column=colNum).font = ft2
targetFileActiveSheet.cell(row=1, column=colNum).alignment = alignment2
targetFileActiveSheet.cell(row=3, column=colNum).font = ft2
targetFileActiveSheet.cell(row=3, column=colNum).alignment = alignment2
# set the font bigger for column 7, the end of term score
for sheet in targetFilesheets:
targetFileActiveSheet = targetFile[sheet]
maxRow = targetFileActiveSheet.max_row
for rowNum in range(1, maxRow + 1, 1):
targetFileActiveSheet.cell(row=rowNum, column=6).font = ft3
targetFileActiveSheet.cell(row=rowNum, column=6).alignment = alignment3
print('formatting finished ... !')
formatCells()
targetFile.save(savepath)
print('All done!')
curr_time2 = datetime.datetime.now()
print(curr_time2-curr_time)
if __name__ == '__main__':
print("Starting Check....")
myApp() I added check if none of rows .It's ok right now.But 5 testers csv files of all test log need converted to xlsx format by manually that could do it,so complicated for that.thanks!
Posts: 41
Threads: 15
Joined: Aug 2022
import csv
import pandas as pd
import re
list=[]
with open('Log.csv','r')as fp:
csvreader=csv.reader(fp)
columns = next(fp).rstrip(",\n").split(",")
columns1 = next(fp).rstrip(",\n").split(",")
up_limited = next(fp).rstrip(",\n").split(",")
low_limited = next(fp).rstrip(",\n").split(",")
for row,column in enumerate(csvreader):
if column[5]=="Failed":
list.append(column)
fp.close()
df=pd.DataFrame(list)
try:
for i in range(12,int(len(df.axes[1]))):
for j in range(0,int(len(df.axes[0]))):
if not df[i][j] == None:
lim1 = float(up_limited[i])
lim2 = float(low_limited[i])
val = float(df[i][j])
if val>lim1 or val<lim2:
print(val)
else:
print(f'out of range!{i},row is:{j}.val is:{val}.up is:{lim1}.low is:{lim2}.')
else:
print('Value is None.')
except:
print('Value is None11.')
TypeError
df.to_csv('dd.csv',index=False,header=False) Try use pandas to do that,still got wrong.
|