Skip to content

Commit

Permalink
Merge pull request #5 from cdfmlr/#4_file_decode_error
Browse files Browse the repository at this point in the history
resolve #4 open file decode error
  • Loading branch information
cdfmlr authored Jan 16, 2021
2 parents 65e963e + 9226cf6 commit dd4bff7
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 4 deletions.
5 changes: 5 additions & 0 deletions .idea/codeStyles/codeStyleConfig.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 46 additions & 2 deletions pyflowchart/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,65 @@
"""

import sys
import chardet
import argparse

from pyflowchart.flowchart import Flowchart


def detect_decode(file_content: bytes) -> str:
"""detect_decode detect the encoding of file_content,
then decode file_content on the detected encoding.
If the confidence of detect result is less then 0.9,
the UTF-8 will be used to decode. PyFlowchart is
designed to convert Python 3 codes into flowcharts.
And Python 3 is coding in UTF-8 in default. So only
if we can make sure the file is not UTF-8 encoded (
i.e. confidence > 0.9) than we will use that no
default encoding to decoded it.
Args:
file_content: bytes: binary file content to decode
Returns:
str: decoded content
"""
# detect encoding
detect_result = chardet.detect(file_content)
# print("DEBUG detect_result =", detect_result)

encoding = detect_result.get("encoding")
confidence = detect_result.get("confidence")

if confidence < 0.9:
encoding = "UTF-8"

# decode file content by detected encoding
try:
content = file_content.decode(encoding=encoding)
except TypeError: # TypeError: decode() argument 1 must be str, not None
content = file_content.decode()

return content


def main(code_file, field, inner):
code = code_file.read()
# read file content: binary
file_content: bytes = code_file.read()
# detect encoding and decode file content by detected encoding
code = detect_decode(file_content)

flowchart = Flowchart.from_code(code, field=field, inner=inner)
print(flowchart.flowchart())


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Python code to flowchart.')

parser.add_argument('code_file', type=argparse.FileType('r'))
# code_file: open as binary, detect encoding and decode in main later
parser.add_argument('code_file', type=argparse.FileType('rb'))

parser.add_argument('-f', '--field', default="", type=str, help="field to draw flowchart. (e.g. Class.method)")
parser.add_argument('-i', '--inner', action="store_true", help="parse the body of field")

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name='pyflowchart',
version='0.1.0',
version='0.1.1',
url='https://github.com/cdfmlr/pyflowchart',
license='MIT',
author='CDFMLR',
Expand All @@ -23,5 +23,5 @@
"Topic :: Utilities",
],
python_requires='>=3.6',
install_requires=['astunparse'],
install_requires=['astunparse', 'chardet'],
)

0 comments on commit dd4bff7

Please sign in to comment.