Text Practice Mode
0
Rating visible after 3 or more votes
saving score / loading statistics ...
00:00
class Xlsx2csv:
def __init__(self, xlsxfile, **options):
# dateformat=None, delimiter=",", sheetdelimiter="--------", skip_empty_lines=False, escape_strings=False, cmd=False
options.setdefault("delimeter", ",")
options.setdefault("sheetdelimiter", "--------")
options.setdefault("skip_empty_lines", False)
options.setdefault("escape_strings", False)
options.setdefault("cmd", False)
options.setdefault("include_sheet_pattern", ["^.*$"])
options.setdefault("exclude_sheet_pattern", [])
options.setdefault("merge_cells", False)
self.options = options
try:
self.ziphandle = zipfile.ZipFile(xlsxfile)
except (zipfile.BadZipfile, IOError):
if self.options['cmd']:
sys.stderr.write("Invalid xlsx file: " + str(xlsxfile) + os.linesep)
sys.exit(1)
raise InvalidXlsxFileException("Invalid xlsx file: " + str(xlsxfile))
self.py3 = sys.version_info[0] == 3
self.shared_strings = self._parse(SharedStrings, "xl/sharedStrings.xml")
self.styles = self._parse(Styles, "xl/styles.xml")
self.workbook = self._parse(Workbook, "xl/workbook.xml")
self.workbook.relationships = self._parse(Relationships, "xl/_rels/workbook.xml.rels")
if self.options['escape_strings']:
self.shared_strings.escape_strings()
def getSheetIdByName(self, name):
for s in self.workbook.sheets:
if s['name'] == name:
return s['id']
return None
def convert(self, outfile, sheetid=1):
"""outfile - path to file or filehandle"""
if sheetid > 0:
self._convert(sheetid, outfile)
else:
if isinstance(outfile, str):
if not os.path.exists(outfile):
os.makedirs(outfile)
elif os.path.isfile(outfile):
if self.options['cmd']:
sys.stderr.write("File " + str(outfile) + " already exists!" + os.linesep)
sys.exit(1)
raise OutFileAlreadyExistsException("File " + str(outfile) + " already exists!")
for s in self.workbook.sheets:
sheetname = s['name']
# filter sheets by include pattern
include_sheet_pattern = self.options['include_sheet_pattern']
if type(include_sheet_pattern) == type(""): # optparser lib fix
include_sheet_pattern = [include_sheet_pattern]
if len(include_sheet_pattern) > 0:
include = False
for pattern in include_sheet_pattern:
include = pattern and len(pattern) > and re.match(pattern, sheetname)
if include:
break
if not include:
continue
# filter sheets by exclude pattern
exclude_sheet_pattern = self.options['exclude_sheet_pattern']
if type(exclude_sheet_pattern) == type(""): # optparser lib fix
exclude_sheet_pattern = [exclude_sheet_pattern]
exclude = False
for pattern in exclude_sheet_pattern:
exclude = pattern and len(pattern) > and re.match(pattern, sheetname)
if exclude:
break
if exclude:
continue
if not self.py3:
sheetname = sheetname.encode('utf-8')
of = outfile
if isinstance(outfile, str):
of = os.path.join(outfile, sheetname + '.csv')
elif self.options['sheetdelimiter'] and len(self.options['sheetdelimiter']):
of.write(self.options['sheetdelimiter'] + " " + str(s['id']) + " - " + sheetname + os.linesep)
self._convert(s['id'], of)
def _convert(self, sheetid, outfile):
closefile = False
if isinstance(outfile, str):
outfile = open(outfile, 'w+')
closefile = True
try:
writer = csv.writer(outfile, quoting=csv.QUOTE_MINIMAL, delimiter=self.options['delimiter'], lineterminator=os.linesep)
sheetfile = self._filehandle("xl/worksheets/sheet%i.xml" % sheetid)
if not sheetfile and sheetid == 1:
sheetfile = self._filehandle("xl/worksheets/sheet.xml")
if not sheetfile:
if self.options['cmd']:
sys.stderr.write("Sheet %s not found!%s" %(sheetid, os.linesep))
sys.exit(1)
raise SheetNotFoundException("Sheet %s not found" %sheetid)
try:
sheet = Sheet(self.workbook, self.shared_strings, self.styles, sheetfile)
sheet.relationships = self._parse(Relationships, "xl/worksheets/_rels/sheet%i.xml.rels" % sheetid)
sheet.set_dateformat(self.options['dateformat'])
sheet.set_skip_empty_lines(self.options['skip_empty_lines'])
sheet.set_include_hyperlinks(self.options['hyperlinks'])
sheet.set_merge_cells(self.options['merge_cells'])
sheet.to_csv(writer)
finally:
sheetfile.close()
finally:
if closefile:
outfile.close()
def _filehandle(self, filename):
for name in filter(lambda f: f.lower() == filename.lower(), self.ziphandle.namelist()):
# python2.4 fix
if not hasattr(self.ziphandle, "open"):
return StringIO(self.ziphandle.read(name))
return self.ziphandle.open(name, "r")
return None
def _parse(self, klass, filename):
instance = klass()
filehandle = self._filehandle(filename)
if filehandle:
instance.parse(filehandle)
filehandle.close()
return instance
def __init__(self, xlsxfile, **options):
# dateformat=None, delimiter=",", sheetdelimiter="--------", skip_empty_lines=False, escape_strings=False, cmd=False
options.setdefault("delimeter", ",")
options.setdefault("sheetdelimiter", "--------")
options.setdefault("skip_empty_lines", False)
options.setdefault("escape_strings", False)
options.setdefault("cmd", False)
options.setdefault("include_sheet_pattern", ["^.*$"])
options.setdefault("exclude_sheet_pattern", [])
options.setdefault("merge_cells", False)
self.options = options
try:
self.ziphandle = zipfile.ZipFile(xlsxfile)
except (zipfile.BadZipfile, IOError):
if self.options['cmd']:
sys.stderr.write("Invalid xlsx file: " + str(xlsxfile) + os.linesep)
sys.exit(1)
raise InvalidXlsxFileException("Invalid xlsx file: " + str(xlsxfile))
self.py3 = sys.version_info[0] == 3
self.shared_strings = self._parse(SharedStrings, "xl/sharedStrings.xml")
self.styles = self._parse(Styles, "xl/styles.xml")
self.workbook = self._parse(Workbook, "xl/workbook.xml")
self.workbook.relationships = self._parse(Relationships, "xl/_rels/workbook.xml.rels")
if self.options['escape_strings']:
self.shared_strings.escape_strings()
def getSheetIdByName(self, name):
for s in self.workbook.sheets:
if s['name'] == name:
return s['id']
return None
def convert(self, outfile, sheetid=1):
"""outfile - path to file or filehandle"""
if sheetid > 0:
self._convert(sheetid, outfile)
else:
if isinstance(outfile, str):
if not os.path.exists(outfile):
os.makedirs(outfile)
elif os.path.isfile(outfile):
if self.options['cmd']:
sys.stderr.write("File " + str(outfile) + " already exists!" + os.linesep)
sys.exit(1)
raise OutFileAlreadyExistsException("File " + str(outfile) + " already exists!")
for s in self.workbook.sheets:
sheetname = s['name']
# filter sheets by include pattern
include_sheet_pattern = self.options['include_sheet_pattern']
if type(include_sheet_pattern) == type(""): # optparser lib fix
include_sheet_pattern = [include_sheet_pattern]
if len(include_sheet_pattern) > 0:
include = False
for pattern in include_sheet_pattern:
include = pattern and len(pattern) > and re.match(pattern, sheetname)
if include:
break
if not include:
continue
# filter sheets by exclude pattern
exclude_sheet_pattern = self.options['exclude_sheet_pattern']
if type(exclude_sheet_pattern) == type(""): # optparser lib fix
exclude_sheet_pattern = [exclude_sheet_pattern]
exclude = False
for pattern in exclude_sheet_pattern:
exclude = pattern and len(pattern) > and re.match(pattern, sheetname)
if exclude:
break
if exclude:
continue
if not self.py3:
sheetname = sheetname.encode('utf-8')
of = outfile
if isinstance(outfile, str):
of = os.path.join(outfile, sheetname + '.csv')
elif self.options['sheetdelimiter'] and len(self.options['sheetdelimiter']):
of.write(self.options['sheetdelimiter'] + " " + str(s['id']) + " - " + sheetname + os.linesep)
self._convert(s['id'], of)
def _convert(self, sheetid, outfile):
closefile = False
if isinstance(outfile, str):
outfile = open(outfile, 'w+')
closefile = True
try:
writer = csv.writer(outfile, quoting=csv.QUOTE_MINIMAL, delimiter=self.options['delimiter'], lineterminator=os.linesep)
sheetfile = self._filehandle("xl/worksheets/sheet%i.xml" % sheetid)
if not sheetfile and sheetid == 1:
sheetfile = self._filehandle("xl/worksheets/sheet.xml")
if not sheetfile:
if self.options['cmd']:
sys.stderr.write("Sheet %s not found!%s" %(sheetid, os.linesep))
sys.exit(1)
raise SheetNotFoundException("Sheet %s not found" %sheetid)
try:
sheet = Sheet(self.workbook, self.shared_strings, self.styles, sheetfile)
sheet.relationships = self._parse(Relationships, "xl/worksheets/_rels/sheet%i.xml.rels" % sheetid)
sheet.set_dateformat(self.options['dateformat'])
sheet.set_skip_empty_lines(self.options['skip_empty_lines'])
sheet.set_include_hyperlinks(self.options['hyperlinks'])
sheet.set_merge_cells(self.options['merge_cells'])
sheet.to_csv(writer)
finally:
sheetfile.close()
finally:
if closefile:
outfile.close()
def _filehandle(self, filename):
for name in filter(lambda f: f.lower() == filename.lower(), self.ziphandle.namelist()):
# python2.4 fix
if not hasattr(self.ziphandle, "open"):
return StringIO(self.ziphandle.read(name))
return self.ziphandle.open(name, "r")
return None
def _parse(self, klass, filename):
instance = klass()
filehandle = self._filehandle(filename)
if filehandle:
instance.parse(filehandle)
filehandle.close()
return instance
