I am trying to read log files from different folders and store then in an Excel sheet.
import os
import pandas as pd
import bs4
path = "D:\\logfolder"
filelist = []
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith("History.txt"):
filelist.append(os.path.join(root, file))
for name in filelist:
print(name + "\n") # D:\logfolder\logfolder_1\History.txt
# D:\logfolder\logfolder_2\History.txt
# D:\logfolder\logfolder_3\History.txt
# D:\logfolder\logfolder_4\History.txt
for name in filelist:
with open(name,"r") as f:
soupObj = bs4.BeautifulSoup(f, "lxml")
df = pd.DataFrame([(x["uri"], *x["t"].split("T"), x["u"], x["desc"])
for x in soupObj.find_all("log")],
columns=["Database", "Date", "Time", "User", "Description"])
df.to_excel("logfile.xlsx", index=False)
The expected output is to get a final Excel sheet of all logfile.txt from all log-folders but I am only getting the data from the last logfile.txt with its logfolder_4 in the final Excel sheet with its logfile.xlsx.
What I am doing wrong ?