用 Python 讀取 xml 檔內特定資料
From Python - Wiki |
上一篇 用 GDAL 批次運算影像 Stats 資料 教學中,我們用 CMD 和 GDAL 工具,將數十萬筆影像樣本轉換成 xml 格式儲存的 Stats 資料。而這些資料必須經過整理,才能進行下一步的機器學習 (Machine learning) 和物件導向影像萃取 (OBIA, Object-Based Image Analysis) 。習翠近貧
xml 結構
DataOut_1.xml<PDataset> <PAMRasterBand B="1"> <Metadata> <MDI key="STATISTICS_MEAN">132</MDI> <MDI key="STATISTICS_STDDEV">54.32</MDI> </Metadata> </PAMRasterBand> <PAMRasterBand B="2"> <Metadata> <MDI key="STATISTICS_MEAN">681</MDI> <MDI key="STATISTICS_STDDEV">22.33</MDI> </Metadata> </PAMRasterBand></PDataset>
擷取出 xml 檔案中特定的內容,並存成下方格式的 CSV 或 ODS 檔。
Name, R_Mean, R_STDDEV,
DataLig_1, 132, 54.32
程式碼
from lxml import etree
import pandas as pd
import numpy as np
import os
import csv
import glob
for root, dirs, files in os.walk(r"X:\QGIS-Project\pic\sample"):
for file in files:
if file.endswith('.xml'): # 指定檔案類型,刪除則列出所有檔案
#print(root) #列出路徑
#print(file) #列出檔名
#print(os.path.join(root,file)) #路徑+檔名
infilexml = os.path.join(root,file) #Xi.virus
tree = etree.parse(infilexml)
Name = os.path.join(file) # 取得完整路徑與檔名
R_Mean = tree.xpath("//PAMRasterBand[@band='1']/Metadata/MDI[@key='STATISTICS_MEAN']/text()") # 萃取B1-3路徑下的MDI_mean與STDEV
R_STDDEV = tree.xpath("//PAMRasterBand[@band='1']/Metadata/MDI[@key='STATISTICS_STDDEV']/text()")
G_Mean = tree.xpath("//PAMRasterBand[@band='2']/Metadata/MDI[@key='STATISTICS_MEAN']/text()")
G_STDDEV = tree.xpath("//PAMRasterBand[@band='2']/Metadata/MDI[@key='STATISTICS_STDDEV']/text()")
B_Mean = tree.xpath("//PAMRasterBand[@band='3']/Metadata/MDI[@key='STATISTICS_MEAN']/text()")
B_STDDEV = tree.xpath("//PAMRasterBand[@band='3']/Metadata/MDI[@key='STATISTICS_STDDEV']/text()")
s = ","
# sf = s.join(fields)
Result = root+s+Name+s+("").join(R_Mean)+s+("").join(R_STDDEV)+s+("").join(G_Mean)+s+("").join(G_STDDEV)+s+("").join(B_Mean)+s+("").join(B_STDDEV)
print(Result,file=open('outCsv.csv',"a"))
# 於CSV首列新增欄位
with open('outCsv.csv',newline='') as f:
r = csv.reader(f)
data = [line for line in r]
with open('outCsv.csv','w',newline='') as f:
w = csv.writer(f)
w.writerow(['Path','Name','R_Mean','R_STDDEV','G_Mean','G_STDDEV','B_Mean','B_STDDEV'])
w.writerows(data)
留言
張貼留言
由於廣告留言太多,因此改採審核發佈,請耐心等候。
無法留言?請點我