eng
competition

Text Practice Mode

xlsx2csv python text practice

created Jan 16th 2015, 18:31 by


4


Rating

404 words
2 completed
00:00
class Xlsx2csv:
   def __init__(self, xlsxfile, **options):
        # dateformat=None, delimiter=",", sheetdelimiter="--------", skip_empty_lines=False, escape_strings=False, cmd=False
        options.setdefault("delimeter", ",")
        options.setdefault("sheetdelimiter", "--------")
        options.setdefault("skip_empty_lines", False)
        options.setdefault("escape_strings", False)
        options.setdefault("cmd", False)
        options.setdefault("include_sheet_pattern", ["^.*$"])
        options.setdefault("exclude_sheet_pattern", [])
        options.setdefault("merge_cells", False)
 
        self.options = options
        try:
            self.ziphandle = zipfile.ZipFile(xlsxfile)
        except (zipfile.BadZipfile, IOError):
            if self.options['cmd']:
                sys.stderr.write("Invalid xlsx file: " + str(xlsxfile) + os.linesep)
                sys.exit(1)
            raise InvalidXlsxFileException("Invalid xlsx file: " + str(xlsxfile))
 
        self.py3 = sys.version_info[0] == 3
 
        self.shared_strings = self._parse(SharedStrings, "xl/sharedStrings.xml")
        self.styles = self._parse(Styles, "xl/styles.xml")
        self.workbook = self._parse(Workbook, "xl/workbook.xml")
        self.workbook.relationships = self._parse(Relationships, "xl/_rels/workbook.xml.rels")
        if self.options['escape_strings']:
            self.shared_strings.escape_strings()
 
    def getSheetIdByName(self, name):
        for s in self.workbook.sheets:
            if s['name'] == name:
                return s['id']
        return None
 
    def convert(self, outfile, sheetid=1):
        """outfile - path to file or filehandle"""
        if sheetid > 0:
            self._convert(sheetid, outfile)
        else:
            if isinstance(outfile, str):
                if not os.path.exists(outfile):
                    os.makedirs(outfile)
                elif os.path.isfile(outfile):
                    if self.options['cmd']:
                        sys.stderr.write("File " + str(outfile) + " already exists!" + os.linesep)
                        sys.exit(1)
                    raise OutFileAlreadyExistsException("File " + str(outfile) + " already exists!")
            for s in self.workbook.sheets:
                sheetname = s['name']
 
                # filter sheets by include pattern
                include_sheet_pattern = self.options['include_sheet_pattern']
                if type(include_sheet_pattern) == type(""): # optparser lib fix
                    include_sheet_pattern = [include_sheet_pattern]
                if len(include_sheet_pattern) > 0:
                    include = False
                    for pattern in include_sheet_pattern:
                        include = pattern and len(pattern) >  and re.match(pattern, sheetname)
                        if include:
                            break
                    if not include:
                        continue
 
                # filter sheets by exclude pattern
                exclude_sheet_pattern = self.options['exclude_sheet_pattern']
                if type(exclude_sheet_pattern) == type(""): # optparser lib fix
                    exclude_sheet_pattern = [exclude_sheet_pattern]
                exclude = False
                for pattern in exclude_sheet_pattern:
                    exclude = pattern and len(pattern) >  and re.match(pattern, sheetname)
                    if exclude:
                        break
                if exclude:
                    continue
 
                if not self.py3:
                    sheetname = sheetname.encode('utf-8')
                of = outfile
                if isinstance(outfile, str):
                    of = os.path.join(outfile, sheetname + '.csv')
                elif self.options['sheetdelimiter'] and len(self.options['sheetdelimiter']):
                    of.write(self.options['sheetdelimiter'] + " " + str(s['id']) + " - " + sheetname + os.linesep)
                self._convert(s['id'], of)
 
    def _convert(self, sheetid, outfile):
        closefile = False
        if isinstance(outfile, str):
            outfile = open(outfile, 'w+')
            closefile = True
        try:
            writer = csv.writer(outfile, quoting=csv.QUOTE_MINIMAL, delimiter=self.options['delimiter'], lineterminator=os.linesep)
            sheetfile = self._filehandle("xl/worksheets/sheet%i.xml" % sheetid)
            if not sheetfile and sheetid == 1:
                sheetfile = self._filehandle("xl/worksheets/sheet.xml")
            if not sheetfile:
                if self.options['cmd']:
                    sys.stderr.write("Sheet %s not found!%s" %(sheetid, os.linesep))
                    sys.exit(1)
                raise SheetNotFoundException("Sheet %s not found" %sheetid)
            try:
                sheet = Sheet(self.workbook, self.shared_strings, self.styles, sheetfile)
                sheet.relationships = self._parse(Relationships, "xl/worksheets/_rels/sheet%i.xml.rels" % sheetid)
                sheet.set_dateformat(self.options['dateformat'])
                sheet.set_skip_empty_lines(self.options['skip_empty_lines'])
                sheet.set_include_hyperlinks(self.options['hyperlinks'])
                sheet.set_merge_cells(self.options['merge_cells'])
                sheet.to_csv(writer)
            finally:
                sheetfile.close()
        finally:
            if closefile:
                outfile.close()
 
    def _filehandle(self, filename):
        for name in filter(lambda f: f.lower() == filename.lower(), self.ziphandle.namelist()):
            # python2.4 fix
            if not hasattr(self.ziphandle, "open"):
                return StringIO(self.ziphandle.read(name))
            return self.ziphandle.open(name, "r")
        return None
 
    def _parse(self, klass, filename):
        instance = klass()
        filehandle = self._filehandle(filename)
        if filehandle:
            instance.parse(filehandle)
            filehandle.close()
        return instance

saving score / loading statistics ...