读取熊猫/python的xls文件:不支持的格式，或损坏的文件:预期的BOF记录；找到b'\xef\xbb\xbf<?xml‘开发者社区

文章/答案/技术大牛

发布

# method 1 - read excel
file = "C:\\Users\\admin\\Downloads\\product-screener.xls"
df = pd.read_excel(file)
print(df)

# method 2 - pip install xlrd and use engine
file = "C:\\Users\\admin\\Downloads\\product-screener.xls"
df = pd.read_excel(file, engine='xlrd')
print(df)

# method 3 - rename to xlsx and open with openpyxl
file = "C:\\Users\\admin\\Downloads\\product-screener.xlsx"
df = pd.read_excel(file, engine='openpyxl')
print(df)

# method 4 - use read_xml
file = "C:\\Users\\admin\\Downloads\\product-screener.xls"
df = pd.read_xml(file)
print(df)

   Style       Name  Table
0    NaN       None    NaN
1    NaN  All funds    NaN
# method 5 - use read_table
file = "C:\\Users\\admin\\Downloads\\product-screener.xls"
df = pd.read_table(file)
print(df)

0       <Workbook xmlns="urn:schemas-microsoft-com:off...
1                                                <Styles>
2                                 <Style ss:ID="Default">
3                          <Alignment Horizontal="Left"/>
4                                                </Style>
...                                                   ...
226532                                            </Cell>
226533                                             </Row>
226534                                           </Table>
226535                                       </Worksheet>
226536                                        </Workbook>
# method 5 - use read_html
file = "C:\\Users\\admin\\Downloads\\product-screener.xls"
df = pd.read_html(file)
print(df)

import pandas as pd
import xml.etree.ElementTree as ET
tree = ET.parse('product-screener.xls')
root = tree.getroot()
data = [[c[0].text for c in r] for r in root[1][0][2:]]
types = [c[0].get('{urn:schemas-microsoft-com:office:spreadsheet}Type') for c in root[1][0][2]]
df = pd.DataFrame(data)

Stack Overflow用户