Skip to content

Commit cfd49c0

Browse files
committed
Fix issue #149.
This is regression in introduced in #140. When a string in the metadata section contains invalid UTF-8 characters the behavior Python 2 is leave the string exactly as it appears in YARA, in Python 3 however the invalid characters are removed because Python 3 strings are not handled as bytes like in Python 2, they most have a valid encoding. PR #140 was an attempt to homogenize the behavior in both versions of Python, but it introduced this other issue.
1 parent 286897d commit cfd49c0

File tree

2 files changed

+45
-9
lines changed

2 files changed

+45
-9
lines changed

tests.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/local/bin/python
2+
# -*- coding: utf-8 -*-
13
#
24
# Copyright (c) 2007-2014. The YARA Authors. All Rights Reserved.
35
#
@@ -692,24 +694,58 @@ def testEntrypoint(self):
692694
'rule test { condition: entrypoint >= 0 }',
693695
])
694696

695-
def testMeta(self):
696-
697-
r = yara.compile(source=r'rule test { meta: a = "foo\x80bar" condition: true }')
698-
self.assertTrue((list(r)[0].meta['a']) == 'foobar')
699-
700-
# This test ensures that anything after the NULL character is stripped.
697+
# This test ensures that anything after the NULL character is stripped.
701698
def testMetaNull(self):
702699

703700
r = yara.compile(source=r'rule test { meta: a = "foo\x00bar\x80" condition: true }')
704701
self.assertTrue((list(r)[0].meta['a']) == 'foo')
705702

703+
def testMeta(self):
704+
705+
r = yara.compile(source=r"""
706+
rule test {
707+
meta:
708+
a = "foo\x80bar"
709+
b = "ñ"
710+
c = "\xc3\xb1"
711+
condition:
712+
true }
713+
""")
714+
715+
meta = list(r)[0].meta
716+
717+
if sys.version_info > (3, 0):
718+
self.assertTrue(meta['a'] == 'foobar')
719+
else:
720+
self.assertTrue(meta['a'] == 'foo\x80bar')
721+
722+
self.assertTrue(meta['b'] == 'ñ')
723+
self.assertTrue(meta['c'] == 'ñ')
724+
706725
# This test is similar to testMeta but it tests the meta data generated
707726
# when a Match object is created.
708727
def testScanMeta(self):
709728

710-
r = yara.compile(source=r'rule test { meta: a = "foo\x80bar" condition: true }')
729+
r = yara.compile(source=r"""
730+
rule test {
731+
meta:
732+
a = "foo\x80bar"
733+
b = "ñ"
734+
c = "\xc3\xb1"
735+
condition:
736+
true }
737+
""")
738+
711739
m = r.match(data='dummy')
712-
self.assertTrue((list(m)[0].meta['a']) == 'foobar')
740+
meta = list(m)[0].meta
741+
742+
if sys.version_info > (3, 0):
743+
self.assertTrue(meta['a'] == 'foobar')
744+
else:
745+
self.assertTrue(meta['a'] == 'foo\x80bar')
746+
747+
self.assertTrue(meta['b'] == 'ñ')
748+
self.assertTrue(meta['c'] == 'ñ')
713749

714750
def testFilesize(self):
715751

yara-python.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ typedef long Py_hash_t;
5050
#define PY_STRING_TO_C(x) PyUnicode_AsUTF8(x)
5151
#define PY_STRING_CHECK(x) PyUnicode_Check(x)
5252
#else
53-
#define PY_STRING(x) PyString_Decode(x, strlen(x), "utf-8", "ignore")
53+
#define PY_STRING(x) PyString_FromString(x)
5454
#define PY_STRING_TO_C(x) PyString_AsString(x)
5555
#define PY_STRING_CHECK(x) (PyString_Check(x) || PyUnicode_Check(x))
5656
#endif

0 commit comments

Comments
 (0)