Skip to content

Commit c8a9b59

Browse files
committed
Set encoding="utf-8" to avoid Windows default encoding issue
1 parent d9e4756 commit c8a9b59

File tree

5 files changed

+21
-15
lines changed

5 files changed

+21
-15
lines changed

src/pycirclize/parser/bed.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def parse(bed_file: str | Path) -> list[BedRecord]:
5252
BED records
5353
"""
5454
bed_records = []
55-
with open(bed_file) as f:
55+
with open(bed_file, encoding="utf-8") as f:
5656
reader = csv.reader(f, delimiter="\t")
5757
for row in reader:
5858
if row[0].startswith("#") or len(row) < 3:

src/pycirclize/parser/genbank.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ def write_genome_fasta(self, outfile: str | Path) -> None:
383383
outfile : str | Path
384384
Output genome fasta file
385385
"""
386-
with open(outfile, "w") as f:
386+
with open(outfile, "w", encoding="utf-8") as f:
387387
for seqid, seq in self.get_seqid2seq().items():
388388
f.write(f">{seqid}\n{seq}\n")
389389

@@ -406,19 +406,23 @@ def _parse_gbk_source(
406406
list[SeqRecord]
407407
Genbank SeqRecords
408408
"""
409-
# Parse compressed file
409+
# Parse file
410410
if isinstance(gbk_source, (str, Path)):
411411
if Path(gbk_source).suffix == ".gz":
412-
with gzip.open(gbk_source, mode="rt") as f:
412+
with gzip.open(gbk_source, mode="rt", encoding="utf-8") as f:
413413
return list(SeqIO.parse(f, "genbank"))
414414
elif Path(gbk_source).suffix == ".bz2":
415-
with bz2.open(gbk_source, mode="rt") as f:
415+
with bz2.open(gbk_source, mode="rt", encoding="utf-8") as f:
416416
return list(SeqIO.parse(f, "genbank"))
417417
elif Path(gbk_source).suffix == ".zip":
418418
with zipfile.ZipFile(gbk_source) as zip:
419419
with zip.open(zip.namelist()[0]) as f:
420-
return list(SeqIO.parse(TextIOWrapper(f), "genbank"))
421-
# Parse no compressed file or TextIOWrapper
420+
io = TextIOWrapper(f, encoding="utf-8")
421+
return list(SeqIO.parse(io, "genbank"))
422+
else:
423+
with open(gbk_source, encoding="utf-8") as f:
424+
return list(SeqIO.parse(f, "genbank"))
425+
# Parse TextIOWrapper
422426
return list(SeqIO.parse(gbk_source, "genbank"))
423427

424428
def _is_straddle_feature(self, feature: SeqFeature) -> bool:

src/pycirclize/parser/gff.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,18 +280,18 @@ def _parse_gff(
280280
"""
281281
gff_file = Path(gff_file)
282282
if gff_file.suffix == ".gz":
283-
with gzip.open(gff_file, mode="rt") as f:
283+
with gzip.open(gff_file, mode="rt", encoding="utf-8") as f:
284284
gff_records, start, end = self._parse_gff_textio(f, target_seqid)
285285
elif gff_file.suffix == ".bz2":
286-
with bz2.open(gff_file, mode="rt") as f:
286+
with bz2.open(gff_file, mode="rt", encoding="utf-8") as f:
287287
gff_records, start, end = self._parse_gff_textio(f, target_seqid)
288288
elif gff_file.suffix == ".zip":
289289
with zipfile.ZipFile(gff_file) as zip:
290290
with zip.open(zip.namelist()[0]) as f:
291-
io = TextIOWrapper(f)
291+
io = TextIOWrapper(f, encoding="utf-8")
292292
gff_records, start, end = self._parse_gff_textio(io, target_seqid)
293293
else:
294-
with open(gff_file) as f:
294+
with open(gff_file, encoding="utf-8") as f:
295295
gff_records, start, end = self._parse_gff_textio(f, target_seqid)
296296

297297
return gff_records, start, end

src/pycirclize/tree.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,12 @@ def load_tree(data: str | Path | Tree, format: str) -> Tree:
184184
"""
185185
if isinstance(data, str) and urlparse(data).scheme in ("http", "https"):
186186
# Load tree file from URL
187-
return Phylo.read(io.StringIO(urlopen(data).read().decode()), format=format)
187+
treeio = io.StringIO(urlopen(data).read().decode(encoding="utf-8"))
188+
return Phylo.read(treeio, format=format)
188189
elif isinstance(data, (str, Path)) and os.path.isfile(data):
189190
# Load tree file
190-
return Phylo.read(data, format=format)
191+
with open(data, encoding="utf-8") as f:
192+
return Phylo.read(f, format=format)
191193
elif isinstance(data, str):
192194
# Load tree string
193195
return Phylo.read(io.StringIO(data), format=format)

src/pycirclize/utils/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def fetch_genbank_by_accid(
225225
)
226226
if gbk_outfile is not None:
227227
gbk_text = gbk_fetch_data.read()
228-
with open(gbk_outfile, "w") as f:
228+
with open(gbk_outfile, "w", encoding="utf-8") as f:
229229
f.write(gbk_text)
230230
gbk_fetch_data = StringIO(gbk_text)
231231

@@ -258,7 +258,7 @@ def load(chr_link_file: str | Path) -> list[ChrLink]:
258258
Chromosome link list
259259
"""
260260
chr_link_list = []
261-
with open(chr_link_file) as f:
261+
with open(chr_link_file, encoding="utf-8") as f:
262262
reader = csv.reader(f, delimiter="\t")
263263
for row in reader:
264264
qchr, qstart, qend = row[0], int(row[1]), int(row[2])

0 commit comments

Comments
 (0)