From 064788ac7a1e5685611f94c082221bfd0e6baf4a Mon Sep 17 00:00:00 2001 From: Subho Halder Date: Mon, 4 Feb 2019 11:37:11 +0530 Subject: [PATCH] Revert "- support for python 2.7.x and PyPy." --- AUTHORS.rst | 18 -- HISTORY.md | 67 ------- README.rst | 74 +------ examples/get_app_name.py | 36 ++-- pyaxmlparser/__init__.py | 17 -- pyaxmlparser/__main__.py | 163 --------------- pyaxmlparser/arscparser.py | 304 ++++++++++++++-------------- pyaxmlparser/arscutil.py | 148 +++++++------- pyaxmlparser/axmlparser.py | 384 ++++++++++++++++-------------------- pyaxmlparser/axmlprinter.py | 133 +++++-------- pyaxmlparser/bytecode.py | 13 +- pyaxmlparser/cli.py | 18 ++ pyaxmlparser/constants.py | 36 ++-- pyaxmlparser/core.py | 273 ++++++++----------------- pyaxmlparser/public.py | 12 +- pyaxmlparser/stringblock.py | 127 ++++++------ pyaxmlparser/utils.py | 87 ++++---- requirements.txt | 3 + setup.py | 56 +++--- tests/test_parser.py | 33 ++-- 20 files changed, 736 insertions(+), 1266 deletions(-) delete mode 100644 AUTHORS.rst delete mode 100644 HISTORY.md delete mode 100644 pyaxmlparser/__main__.py create mode 100644 pyaxmlparser/cli.py diff --git a/AUTHORS.rst b/AUTHORS.rst deleted file mode 100644 index 6b7425a..0000000 --- a/AUTHORS.rst +++ /dev/null @@ -1,18 +0,0 @@ -Axmlparser is written and maintained by Appknox (XYSec Labs) and various contributors: - -Contributors -``````````````````````` - -- Subho Halder `@subho007 `_. -- Chillar Anand `@ChillarAnand `_. -- Sharat M R `@cosmosgenius `_. -- Jeyfel Brandauer `@jflbr `_. -- Brad Dixon `@rbdixon `_. -- Viren Nadkarni `@viren-nadkarni `_. -- Dhilipsiva Nitimis `@dhilipsiva `_. - - -Patches and Suggestions -``````````````````````` - -- Yury Shumilov `@xiva-wgt `_. diff --git a/HISTORY.md b/HISTORY.md deleted file mode 100644 index 88fb636..0000000 --- a/HISTORY.md +++ /dev/null @@ -1,67 +0,0 @@ -Release History -=============== - -0.3.14(2018-12-05) ------------------- - -- support for python 2.7.x and PyPy. -- remove depend library (lxml optional). -- pep8 formal code. -- remove unused code. -- Extended command line. - -0.3.13(2018-11-04) ------------------- - -0.3.11(2018-09-27) ------------------- - -0.3.10 (2018-08-01) ------------------- - -0.3.9 (2018-06-03) ------------------- - -0.3.8 (2018-06-03) ------------------- - -0.3.7 (2017-11-22) ------------------- - -0.3.6 (2017-10-17) ------------------- - -0.3.5 (2017-07-20) ------------------- - -0.3.4 (2017-07-14) ------------------- - -0.3.3 (2017-07-13) ------------------- - -0.3.1 (2017-04-13) ------------------- - -0.3.0 (2017-04-12) ------------------- - -0.2.0 (2017-04-03) ------------------- - -0.1.5 (2017-03-20) ------------------- - -0.1.4 (2017-03-20) ------------------- - -0.1.3 (2017-03-20) ------------------- - -0.1.2 (2017-03-20) ------------------- - -0.1.1 (2017-03-20) ------------------- - -- Birth! diff --git a/README.rst b/README.rst index bb09182..1f38c5b 100644 --- a/README.rst +++ b/README.rst @@ -1,67 +1,26 @@ -Axmlparser -========== - -.. image:: https://img.shields.io/pypi/v/pyaxmlparser.svg - :alt: package version on https://pypi.org - :target: https://pypi.org/project/pyaxmlparser/ - -.. image:: https://img.shields.io/pypi/l/pyaxmlparser.svg - :alt: package license - :target: https://pypi.org/project/pyaxmlparser/ - -.. image:: https://img.shields.io/pypi/pyversions/pyaxmlparser.svg - :alt: requires python versions for package - :target: https://pypi.org/project/pyaxmlparser/ - -.. image:: https://img.shields.io/github/contributors/appknox/pyaxmlparser.svg - :alt: package contributes - :target: https://github.com/appknox/pyaxmlparser/graphs/contributors +axmlparser +=========== A simple parser to parse Android XML file. Usage -===== +====== CLI : -===== +==== -.. code-block:: console - - $ apkinfo ~/Downloads/com.hardcodedjoy.roboremo.15.apk info - APK: /home/chillaranand/Downloads/com.hardcodedjoy.roboremo.15.apk - App name: RoboRemo - Package: com.hardcodedjoy.roboremo - Version name: 2.0.0 - Version code: 15 - - $ pyaxmlparser ~/Downloads/com.hardcodedjoy.roboremo.15.apk info - APK: /home/chillaranand/Downloads/com.hardcodedjoy.roboremo.15.apk - App name: RoboRemo - Package: com.hardcodedjoy.roboremo - Version name: 2.0.0 - Version code: 15 +.. code-block:: shell - $ python -m pyaxmlparser ~/Downloads/com.hardcodedjoy.roboremo.15.apk info - APK: /home/chillaranand/Downloads/com.hardcodedjoy.roboremo.15.apk + $ apkinfo ~/Downloads/com.hardcodedjoy.roboremo.15.apk + APK: /home/chillaranand/Downloads/com.hardcodedjoy.roboremo.15.apk App name: RoboRemo Package: com.hardcodedjoy.roboremo Version name: 2.0.0 Version code: 15 - $ apkinfo ~/Downloads/com.hardcodedjoy.roboremo.15.apk xml - - ... - - - $ apkinfo ~/Downloads/com.hardcodedjoy.roboremo.15.apk xml > "~/manifest.xml" - (save into ~/manifest.xml output xml) - - $ apkinfo ~/Downloads/com.hardcodedjoy.roboremo.15.apk xml -o "~/manifest.xml" - (save into ~/manifest.xml output xml) - Python package : @@ -71,6 +30,7 @@ Python package : from pyaxmlparser import APK + apk = APK('/foo/bar.apk') print(apk.package) print(apk.version_name) @@ -78,21 +38,3 @@ Python package : print(apk.icon_info) print(apk.icon_data) print(apk.application) - -.. code-block:: python - - from pyaxmlparser import AXMLPrinter - - xml = AXMLPrinter('/foo/bar.apk').get_xml_obj() - print(xml.get('package')) - -.. code-block:: python - - from pyaxmlparser import AXMLPrinter - - apk_path = '/foo/bar.apk' - with open(apk_path, 'rb') as apk_file: - android_xml = apk_file.read() - xml, error = AXMLPrinter(android_xml).get_xml_obj() - if xml is not None: - print(xml.get('package')) diff --git a/examples/get_app_name.py b/examples/get_app_name.py index 47967ca..129d990 100644 --- a/examples/get_app_name.py +++ b/examples/get_app_name.py @@ -2,31 +2,21 @@ Usage: python get_app_name.py /path/to/extracted/apk/dir """ -import os import sys -from pyaxmlparser import ARSCParser, AXMLPrinter -from pyaxmlparser.utils import NS_ANDROID -app_root = sys.argv[1] -axml_file = os.path.join(app_root, 'AndroidManifest.xml') -rsc_file = os.path.join(app_root, 'resources.arsc') +from pyaxmlparser.arscparser import ARSCParser +from pyaxmlparser.axmlprinter import AXMLPrinter + -with open(axml_file, 'rb') as manifest_file, open(rsc_file, 'rb') as resources_file: - manifest_data = manifest_file.read() - resources_data = resources_file.read() - manifest_xml = AXMLPrinter(manifest_data) - axml, error = manifest_xml.get_xml_obj() - if axml is None: - print('Error parse xml {}: \n{}'.format(axml_file, error)) - exit(1) - rsc = ARSCParser(resources_data) +app_root = sys.argv[1] - app_name_label = axml.findall('.//application')[0].get(NS_ANDROID + 'label') - if app_name_label and app_name_label.startswith('@'): - app_name_hex = '0x' + app_name_label[1:] +xml = AXMLPrinter(open("{}/AndroidManifest.xml".format(app_root), 'rb').read()).get_xml_obj() +rsc = ARSCParser(open("{}/resources.arsc".format(app_root), "rb").read()) - app_name = rsc.get_string( - rsc.get_packages_names()[0], - rsc.get_id(rsc.get_packages_names()[0], int(app_name_hex, 0))[1] - ) - print('App name is \'{}\''.format(app_name[1])) +app_name_hex = xml.getElementsByTagName("application")[0].getAttribute("android:label") +app_name = '0x' + app_name_hex[1:] +app_name = rsc.get_string( + rsc.get_packages_names()[0], + rsc.get_id(rsc.get_packages_names()[0], int(app_name, 0))[1] +) +print('App name is "{}"'.format(app_name[1])) diff --git a/pyaxmlparser/__init__.py b/pyaxmlparser/__init__.py index 685fa3d..7e22b06 100644 --- a/pyaxmlparser/__init__.py +++ b/pyaxmlparser/__init__.py @@ -1,20 +1,3 @@ # flake8: noqa from pyaxmlparser.core import APK -from pyaxmlparser.axmlprinter import AXMLPrinter -from pyaxmlparser.axmlparser import AXMLParser -from pyaxmlparser.arscparser import ARSCParser - -__all__ = ( - '__title__', '__package_name__', '__description__', '__url__', '__version__', '__author__', - '__author_email__', '__license__', 'APK', 'AXMLPrinter', 'AXMLParser', 'ARSCParser' -) - -__title__ = 'Pyaxmlparser' -__package_name__ = 'pyaxmlparser' -__description__ = 'Parser for Android XML file and get Application Name without using Androguard.' -__url__ = 'https://github.com/appknox/pyaxmlparser' -__version__ = '0.3.14' -__author__ = 'Subho Halder' -__author_email__ = 'sunny@appknox.com' -__license__ = 'MIT License' diff --git a/pyaxmlparser/__main__.py b/pyaxmlparser/__main__.py deleted file mode 100644 index 214fee3..0000000 --- a/pyaxmlparser/__main__.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -from __future__ import unicode_literals, print_function - -import os -import sys -import argparse -from codecs import open - -try: - from .__init__ import ( - __version__, __title__, __package_name__, APK, AXMLPrinter) - from . import utils -except (ValueError, ImportError): - from __init__ import ( - __version__, __title__, __package_name__, APK, AXMLPrinter) - import utils - - -def get_files(args): - check_files = [] - if args.path: - for item_path in args.path: - if isinstance(item_path, utils.string_types) and \ - item_path and os.path.exists(item_path): - if os.path.isdir(item_path): - for root, dirs, files in os.walk(item_path): - for item_file in files: - if item_file not in check_files: - check_files.append(os.path.join(root, item_file)) - else: - if item_path not in check_files: - check_files.append(item_path) - return check_files - - -def get_info(args): - check_files = get_files(args) - if check_files: - for item_file in check_files: - error = '' - try: - apk = APK(item_file, debug=args.debug) - except Exception as error_message: - apk = None - error = str(error_message) - message = 'APK {} :\n'.format(item_file) - if apk: - message += ' App name: {}\n'.format(apk.application) - message += ' Package: {}\n'.format(apk.package) - message += ' Version name: {}\n'.format(apk.version_name) - message += ' Version code: {}\n'.format(apk.version_code) - else: - message += ' Can\'t get info with error: {}\n'.format(error) - print(message) - else: - print('Error. path for check APK - {0} not found'.format(args.path), file=sys.stderr) - exit(1) - - -def get_xml(args): - output = None - if isinstance(args.output, utils.string_types) and \ - args.output and os.path.exists(os.path.dirname(args.output)): - output = args.output - check_files = get_files(args) - if check_files: - for item_file in check_files: - try: - xml_string = AXMLPrinter(item_file, debug=args.debug).get_xml() - except Exception as error_message: - xml_string = None - print(str(error_message), file=sys.stderr) - exit(1) - if xml_string: - if output: - with open(output, 'wb', encoding='utf-8') as xml_file: - xml_file.write(xml_string) - else: - print(xml_string) - else: - print('Error. path for output xml - {0} not found'.format(args.path), file=sys.stderr) - exit(1) - - -def get_parser(): - parser = argparse.ArgumentParser( - prog='python -m {}'.format(__package_name__), - description='Help for work with {} version {}'.format(__title__, __version__), - usage='%(prog)s [-h] [options]', - add_help=True, formatter_class=argparse.RawTextHelpFormatter) - - parser.add_argument( - '--version', action='version', - version='{} version {}'.format(__title__, __version__)) - - parser.add_argument( - '-d', '--debug', action='store_true', default=False, - help='Enable debug message.') - - parser.add_argument( - 'path', nargs='+', type=str, - default=None, help='path for file or dir.') - - subparsers = parser.add_subparsers(help='List of commands') - - info_parser = \ - subparsers.add_parser( - 'info', help='output info from file') - info_parser.set_defaults(func=get_info) - - xml_parser = \ - subparsers.add_parser( - 'xml', help='output xml from file') - xml_parser.add_argument('-o', '--output', action='store', - type=str, default=None, - help='save xml to file.') - xml_parser.set_defaults(func=get_xml) - - help_message = parser.format_help() + '\n ' - # retrieve subparsers from parser - subparsers_actions = [ - action for action in parser._actions - if isinstance(action, argparse._SubParsersAction)] - # there will probably only be one subparser_action, - # but better save than sorry - for subparsers_action in subparsers_actions: - # get all subparsers and print help - for choice, subparser in subparsers_action.choices.items(): - help_message += 'Subparser \'{0}\'\n {1}\n'.format( - choice, find_options(subparser.format_help(), choice)) - parser.epilog = help_message - return parser.parse_args() - - -def find_options(help_text=None, choice=None): - """ - Return a substring with the optional arguments - :param help_text: Help text, as it's called - :param choice: Name subparser name, as it's called - :return: - """ - if not isinstance(help_text, utils.string_types): - help_text = '' - if not isinstance(choice, utils.string_types): - choice = 'unknown' - new_list = [] - for line in help_text.split('\n'): - if line == 'optional arguments:': - new_list.append('optional arguments for {0}:'.format(choice)) - else: - new_list.append(line) - return '\n'.join(new_list) - - -def main(): - args = get_parser() - args.func(args) - - -if __name__ == '__main__': - main() diff --git a/pyaxmlparser/arscparser.py b/pyaxmlparser/arscparser.py index 26013ce..2a391bb 100644 --- a/pyaxmlparser/arscparser.py +++ b/pyaxmlparser/arscparser.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import unicode_literals import logging from pyaxmlparser import bytecode @@ -24,18 +23,17 @@ ARSCResTypeSpec, ARSCResType, ARSCResTableEntry, ARSCResTableConfig from pyaxmlparser.stringblock import StringBlock import pyaxmlparser.constants as const -from pyaxmlparser.utils import complex_to_float +from pyaxmlparser.utils import complexToFloat from xml.sax.saxutils import escape +log = logging.getLogger("pyaxmlparser.arscparser") + class ARSCParser(object): """ Parser for resource.arsc files """ - def __init__(self, raw_buff, debug=False): - self.log = logging.getLogger('pyaxmlparser.arscparser') - self.log.setLevel(logging.DEBUG if debug else logging.CRITICAL) - + def __init__(self, raw_buff): self.analyzed = False self._resolved_strings = None self.buff = bytecode.BuffHandle(raw_buff) @@ -49,7 +47,7 @@ def __init__(self, raw_buff, debug=False): self.resource_configs = collections.defaultdict(lambda: collections.defaultdict(set)) self.resource_keys = collections.defaultdict( lambda: collections.defaultdict(collections.defaultdict)) - self.string_pool_main = None + self.stringpool_main = None # skip to the start of the first chunk self.buff.set_idx(self.header.start + self.header.header_size) @@ -63,11 +61,11 @@ def __init__(self, raw_buff, debug=False): # this inner chunk crosses the boundary of the table chunk break - if res_header.type == const.RES_STRING_POOL_TYPE and not self.string_pool_main: - self.string_pool_main = StringBlock(self.buff, res_header) + if res_header.type == const.RES_STRING_POOL_TYPE and not self.stringpool_main: + self.stringpool_main = StringBlock(self.buff, res_header) elif res_header.type == const.RES_TABLE_PACKAGE_TYPE: - assert len(self.packages) < self.packageCount, 'Got more packages than expected' + assert len(self.packages) < self.packageCount, "Got more packages than expected" current_package = ARSCResTablePackage(self.buff, res_header) package_name = current_package.get_name() @@ -79,23 +77,23 @@ def __init__(self, raw_buff, debug=False): self.buff.set_idx(current_package.header.start + current_package.typeStrings) type_sp_header = ARSCHeader(self.buff) assert type_sp_header.type == const.RES_STRING_POOL_TYPE, \ - 'Expected String Pool header, got %x' % type_sp_header.type - table_strings = StringBlock(self.buff, type_sp_header) + "Expected String Pool header, got %x" % type_sp_header.type + mTableStrings = StringBlock(self.buff, type_sp_header) # Next, we should have the resource key symbol table self.buff.set_idx(current_package.header.start + current_package.keyStrings) key_sp_header = ARSCHeader(self.buff) assert key_sp_header.type == const.RES_STRING_POOL_TYPE, \ - 'Expected String Pool header, got %x' % key_sp_header.type - key_strings = StringBlock(self.buff, key_sp_header) + "Expected String Pool header, got %x" % key_sp_header.type + mKeyStrings = StringBlock(self.buff, key_sp_header) # Add them to the dict of read packages self.packages[package_name].append(current_package) - self.packages[package_name].append(table_strings) - self.packages[package_name].append(key_strings) + self.packages[package_name].append(mTableStrings) + self.packages[package_name].append(mKeyStrings) - pc = PackageContext(current_package, self.string_pool_main, - table_strings, key_strings) + pc = PackageContext(current_package, self.stringpool_main, + mTableStrings, mKeyStrings) # skip to the first header in this table package chunk # FIXME is this correct? We have already read the first two sections! @@ -106,7 +104,7 @@ def __init__(self, raw_buff, debug=False): # Read all other headers while self.buff.get_idx() <= package_data_end - ARSCHeader.SIZE: pkg_chunk_header = ARSCHeader(self.buff) - self.log.debug('Found a header: {}'.format(pkg_chunk_header)) + log.debug("Found a header: {}".format(pkg_chunk_header)) if pkg_chunk_header.start + pkg_chunk_header.size > package_data_end: # we are way off the package chunk; bail out break @@ -121,12 +119,12 @@ def __init__(self, raw_buff, debug=False): self.packages[package_name].append(a_res_type) self.resource_configs[package_name][a_res_type].add(a_res_type.config) - self.log.debug('Config: {}'.format(a_res_type.config)) + log.debug("Config: {}".format(a_res_type.config)) entries = [] for i in range(0, a_res_type.entryCount): - current_package.resource_id = current_package.resource_id & 0xffff0000 | i - entries.append((unpack('> 24) & 0xFF), ((entry_data >> 16) & 0xFF), ((entry_data >> 8) & 0xFF), @@ -278,20 +271,19 @@ def get_resource_color(ate): def get_resource_dimen(self, ate): try: return [ - ate.get_value(), '%s%s' % ( - complex_to_float(ate.key.get_data()), + ate.get_value(), "%s%s" % ( + complexToFloat(ate.key.get_data()), const.DIMENSION_UNITS[ate.key.get_data() & const.COMPLEX_UNIT_MASK]) ] except IndexError: - self.log.warning('Out of range dimension unit index for %s: %s' % ( - complex_to_float(ate.key.get_data()), + log.warning("Out of range dimension unit index for %s: %s" % ( + complexToFloat(ate.key.get_data()), ate.key.get_data() & const.COMPLEX_UNIT_MASK)) return [ate.get_value(), ate.key.get_data()] # FIXME - @staticmethod - def get_resource_style(ate): - return ['', ''] + def get_resource_style(self, ate): + return ["", ""] def get_packages_names(self): """ @@ -328,10 +320,11 @@ def get_public_resources(self, package_name, locale='\x00\x00'): self._analyse() - buff = '\n\n' + buff = '\n' + buff += '\n' try: - for i in self.values[package_name][locale]['public']: + for i in self.values[package_name][locale]["public"]: buff += '\n' % ( i[0], i[1], i[2]) except KeyError: @@ -351,10 +344,11 @@ def get_string_resources(self, package_name, locale='\x00\x00'): """ self._analyse() - buff = '\n\n' + buff = '\n' + buff += '\n' try: - for i in self.values[package_name][locale]['string']: + for i in self.values[package_name][locale]["string"]: if any(map(i[1].__contains__, '<&>')): value = '' % i[1] else: @@ -375,23 +369,28 @@ def get_strings_resources(self): """ self._analyse() - buff = '\n\n' + buff = '\n' + + buff += "\n" for package_name in self.get_packages_names(): - buff += '\n' % package_name + buff += "\n" % package_name for locale in self.get_locales(package_name): - buff += '\n\n' % repr(locale) + buff += "\n" % repr(locale) + + buff += '\n' try: - for i in self.values[package_name][locale]['string']: + for i in self.values[package_name][locale]["string"]: buff += '%s\n' % (i[0], escape(i[1])) except KeyError: pass - buff += '\n\n' + buff += '\n' + buff += '\n' - buff += '\n' + buff += "\n" - buff += '\n' + buff += "\n" return buff.encode('utf-8') @@ -405,10 +404,11 @@ def get_id_resources(self, package_name, locale='\x00\x00'): """ self._analyse() - buff = '\n\n' + buff = '\n' + buff += '\n' try: - for i in self.values[package_name][locale]['id']: + for i in self.values[package_name][locale]["id"]: if len(i) == 1: buff += '\n' % (i[0]) else: @@ -431,10 +431,11 @@ def get_bool_resources(self, package_name, locale='\x00\x00'): """ self._analyse() - buff = '\n\n' + buff = '\n' + buff += '\n' try: - for i in self.values[package_name][locale]['bool']: + for i in self.values[package_name][locale]["bool"]: buff += '%s\n' % (i[0], i[1]) except KeyError: pass @@ -453,10 +454,11 @@ def get_integer_resources(self, package_name, locale='\x00\x00'): """ self._analyse() - buff = '\n\n' + buff = '\n' + buff += '\n' try: - for i in self.values[package_name][locale]['integer']: + for i in self.values[package_name][locale]["integer"]: buff += '%s\n' % (i[0], i[1]) except KeyError: pass @@ -475,10 +477,11 @@ def get_color_resources(self, package_name, locale='\x00\x00'): """ self._analyse() - buff = '\n\n' + buff = '\n' + buff += '\n' try: - for i in self.values[package_name][locale]['color']: + for i in self.values[package_name][locale]["color"]: buff += '%s\n' % (i[0], i[1]) except KeyError: pass @@ -497,10 +500,11 @@ def get_dimen_resources(self, package_name, locale='\x00\x00'): """ self._analyse() - buff = '\n\n' + buff = '\n' + buff += '\n' try: - for i in self.values[package_name][locale]['dimen']: + for i in self.values[package_name][locale]["dimen"]: buff += '%s\n' % (i[0], i[1]) except KeyError: pass @@ -513,7 +517,7 @@ def get_id(self, package_name, rid, locale='\x00\x00'): self._analyse() try: - for i in self.values[package_name][locale]['public']: + for i in self.values[package_name][locale]["public"]: if i[2] == rid: return i except KeyError: @@ -580,7 +584,7 @@ def get_resolved_strings(self): r[package_name][v_locale] = {} try: - for i in self.values[package_name][locale]['public']: + for i in self.values[package_name][locale]["public"]: if i[0] == 'string': r[package_name][v_locale][i[2]] = None k[i[1]] = i[2] @@ -588,7 +592,7 @@ def get_resolved_strings(self): pass try: - for i in self.values[package_name][locale]['string']: + for i in self.values[package_name][locale]["string"]: if i[0] in k: r[package_name][v_locale][k[i[0]]] = i[1] except KeyError: @@ -623,12 +627,12 @@ def get_res_configs(self, rid, config=None, fallback=True): self._analyse() if not rid: - raise ValueError('\'rid\' should be set') + raise ValueError("'rid' should be set") if not isinstance(rid, int): - raise ValueError('\'rid\' must be an int') + raise ValueError("'rid' must be an int") if rid not in self.resource_values: - self.log.warning('The requested rid could not be found in the resources.') + log.warning("The requested rid could not be found in the resources.") return [] res_options = self.resource_values[rid] @@ -636,7 +640,7 @@ def get_res_configs(self, rid, config=None, fallback=True): if config in res_options: return [(config, res_options[config])] elif fallback and config == ARSCResTableConfig.default_config(): - self.log.warning('No default resource config could be found for the given rid, using fallback!') + log.warning("No default resource config could be found for the given rid, using fallback!") return [list(self.resource_values[rid].items())[0]] else: return [] @@ -647,7 +651,7 @@ def get_string(self, package_name, name, locale='\x00\x00'): self._analyse() try: - for i in self.values[package_name][locale]['string']: + for i in self.values[package_name][locale]["string"]: if i[0] == name: return i except KeyError: @@ -677,18 +681,18 @@ def get_type_configs(self, package_name, type_name=None): class PackageContext(object): - def __init__(self, current_package, string_pool_main, - table_strings, key_strings): - self.string_pool_main = string_pool_main - self.table_strings = table_strings - self.key_strings = key_strings + def __init__(self, current_package, stringpool_main, mTableStrings, + mKeyStrings): + self.stringpool_main = stringpool_main + self.mTableStrings = mTableStrings + self.mKeyStrings = mKeyStrings self.current_package = current_package - def get_resource_id(self): - return self.current_package.resource_id + def get_mResId(self): + return self.current_package.mResId - def set_resource_id(self, resource_id): - self.current_package.resource_id = resource_id + def set_mResId(self, mResId): + self.current_package.mResId = mResId def get_package_name(self): return self.current_package.get_name() diff --git a/pyaxmlparser/arscutil.py b/pyaxmlparser/arscutil.py index 708d7d2..f96f246 100644 --- a/pyaxmlparser/arscutil.py +++ b/pyaxmlparser/arscutil.py @@ -15,11 +15,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import unicode_literals +import logging from struct import unpack import pyaxmlparser.constants as const from pyaxmlparser.utils import format_value +log = logging.getLogger("pyaxmlparser.arscutil") + class ARSCResTablePackage(object): def __init__(self, buff, header): @@ -31,11 +33,11 @@ def __init__(self, buff, header): self.lastPublicType = unpack('' + "" ).format(self.start, self.type, self.header_size, self.size) @@ -77,27 +79,27 @@ def __init__(self, buff, parent=None): self.res1 = unpack(' 0: + log.debug("Skipping padding bytes!") self.padding = buff.read(self.exceedingSize) # TODO there is screenConfig2 @@ -190,7 +193,7 @@ def __init__(self, buff=None, **kwargs): ((kwargs.pop('mnc', 0) & 0xffff) << 16) self.locale = 0 - for char_ix, char in kwargs.pop('locale', '')[0:4]: + for char_ix, char in kwargs.pop('locale', "")[0:4]: self.locale += (ord(char) << (char_ix * 8)) self.screenType = \ @@ -224,7 +227,7 @@ def __init__(self, buff=None, **kwargs): self.exceedingSize = 0 def _unpack_language_or_region(self, char_in, char_base): - char_out = '' + char_out = "" if char_in[0] & 0x80: first = char_in[1] & 0x1f second = ((char_in[1] & 0xe0) >> 5) + ((char_in[0] & 0x03) << 3) @@ -250,8 +253,8 @@ def get_language_and_region(self): ], ord('0') ) - return (_language + '-r' + _region) if _region else _language - return '' + return (_language + "-r" + _region) if _region else _language + return "" def get_config_name_friendly(self): res = [] @@ -259,9 +262,9 @@ def get_config_name_friendly(self): mcc = self.imsi & 0xFFFF mnc = (self.imsi & 0xFFFF0000) >> 16 if mcc != 0: - res.append('mcc%d' % mcc) + res.append("mcc%d" % mcc) if mnc != 0: - res.append('mnc%d' % mnc) + res.append("mnc%d" % mnc) if self.locale != 0: res.append(self.get_language_and_region()) @@ -269,50 +272,50 @@ def get_config_name_friendly(self): screenLayout = self.screenConfig & 0xff if (screenLayout & const.MASK_LAYOUTDIR) != 0: if screenLayout & const.MASK_LAYOUTDIR == const.LAYOUTDIR_LTR: - res.append('ldltr') + res.append("ldltr") elif screenLayout & const.MASK_LAYOUTDIR == const.LAYOUTDIR_RTL: - res.append('ldrtl') + res.append("ldrtl") else: res.append( - 'layoutDir_%d' % (screenLayout & const.MASK_LAYOUTDIR)) + "layoutDir_%d" % (screenLayout & const.MASK_LAYOUTDIR)) smallestScreenWidthDp = (self.screenConfig & 0xFFFF0000) >> 16 if smallestScreenWidthDp != 0: - res.append('sw%ddp' % smallestScreenWidthDp) + res.append("sw%ddp" % smallestScreenWidthDp) screenWidthDp = self.screenSizeDp & 0xFFFF screenHeightDp = (self.screenSizeDp & 0xFFFF0000) >> 16 if screenWidthDp != 0: - res.append('w%ddp' % screenWidthDp) + res.append("w%ddp" % screenWidthDp) if screenHeightDp != 0: - res.append('h%ddp' % screenHeightDp) + res.append("h%ddp" % screenHeightDp) if (screenLayout & const.MASK_SCREENSIZE) != const.SCREENSIZE_ANY: if screenLayout & const.MASK_SCREENSIZE == const.SCREENSIZE_SMALL: - res.append('small') + res.append("small") elif screenLayout & \ const.MASK_SCREENSIZE == const.SCREENSIZE_NORMAL: - res.append('normal') + res.append("normal") elif screenLayout & \ const.MASK_SCREENSIZE == const.SCREENSIZE_LARGE: - res.append('large') + res.append("large") elif screenLayout & \ const.MASK_SCREENSIZE == const.SCREENSIZE_XLARGE: - res.append('xlarge') + res.append("xlarge") else: res.append( - 'screenLayoutSize_%d' % ( + "screenLayoutSize_%d" % ( screenLayout & const.MASK_SCREENSIZE ) ) if (screenLayout & const.MASK_SCREENLONG) != 0: if screenLayout & const.MASK_SCREENLONG == const.SCREENLONG_NO: - res.append('notlong') + res.append("notlong") elif screenLayout & const.MASK_SCREENLONG == const.SCREENLONG_YES: - res.append('long') + res.append("long") else: res.append( - 'screenLayoutLong_%d' % ( + "screenLayoutLong_%d" % ( screenLayout & const.MASK_SCREENLONG ) ) @@ -320,52 +323,52 @@ def get_config_name_friendly(self): density = (self.screenType & 0xffff0000) >> 16 if density != const.DENSITY_DEFAULT: if density == const.DENSITY_LOW: - res.append('ldpi') + res.append("ldpi") elif density == const.DENSITY_MEDIUM: - res.append('mdpi') + res.append("mdpi") elif density == const.DENSITY_TV: - res.append('tvdpi') + res.append("tvdpi") elif density == const.DENSITY_HIGH: - res.append('hdpi') + res.append("hdpi") elif density == const.DENSITY_XHIGH: - res.append('xhdpi') + res.append("xhdpi") elif density == const.DENSITY_XXHIGH: - res.append('xxhdpi') + res.append("xxhdpi") elif density == const.DENSITY_XXXHIGH: - res.append('xxxhdpi') + res.append("xxxhdpi") elif density == const.DENSITY_NONE: - res.append('nodpi') + res.append("nodpi") elif density == const.DENSITY_ANY: - res.append('anydpi') + res.append("anydpi") else: - res.append('%ddpi' % (density)) + res.append("%ddpi" % (density)) touchscreen = (self.screenType & 0xff00) >> 8 if touchscreen != const.TOUCHSCREEN_ANY: if touchscreen == const.TOUCHSCREEN_NOTOUCH: - res.append('notouch') + res.append("notouch") elif touchscreen == const.TOUCHSCREEN_FINGER: - res.append('finger') + res.append("finger") elif touchscreen == const.TOUCHSCREEN_STYLUS: - res.append('stylus') + res.append("stylus") else: - res.append('touchscreen_%d' % touchscreen) + res.append("touchscreen_%d" % touchscreen) screenSize = self.screenSize if screenSize != 0: screenWidth = self.screenSize & 0xffff screenHeight = (self.screenSize & 0xffff0000) >> 16 - res.append('%dx%d' % (screenWidth, screenHeight)) + res.append("%dx%d" % (screenWidth, screenHeight)) version = self.version if version != 0: sdkVersion = self.version & 0xffff minorVersion = (self.version & 0xffff0000) >> 16 - res.append('v%d' % sdkVersion) + res.append("v%d" % sdkVersion) if minorVersion != 0: - res.append('.%d' % minorVersion) + res.append(".%d" % minorVersion) - return '-'.join(res) + return "-".join(res) def get_language(self): x = self.locale & 0x0000ffff @@ -398,7 +401,7 @@ def __eq__(self, other): return self._get_tuple() == other._get_tuple() def __repr__(self): - return ''.format(repr(self._get_tuple())) + return "".format(repr(self._get_tuple())) class ARSCResTableEntry(object): @@ -406,9 +409,9 @@ class ARSCResTableEntry(object): FLAG_PUBLIC = 2 FLAG_WEAK = 4 - def __init__(self, buff, resource_id, parent=None): + def __init__(self, buff, mResId, parent=None): self.start = buff.get_idx() - self.resource_id = resource_id + self.mResId = mResId self.parent = parent self.size = unpack('' + "" ).format( self.start, - self.resource_id, + self.mResId, self.size, self.flags, self.index, @@ -465,7 +468,7 @@ def __init__(self, buff, parent=None): ARSCResStringPoolRef(buff, self.parent))) def __repr__(self): - return ''.format( + return "".format( self.start, self.id_parent, self.count) @@ -474,14 +477,14 @@ def __init__(self, buff, parent=None): self.start = buff.get_idx() self.parent = parent - self.size, = unpack('' + "" ).format( self.start, self.size, - const.TYPE_TABLE.get(self.data_type, '0x%x' % self.data_type), + const.TYPE_TABLE.get(self.data_type, "0x%x" % self.data_type), self.data) + return self.data_type def get_arsc_info(arscobj): @@ -520,18 +524,18 @@ def get_arsc_info(arscobj): :param arscobj: :class:`~ARSCParser` :return: a string """ - buff = '' + buff = "" for package in arscobj.get_packages_names(): - buff += package + ':\n' + buff += package + ":\n" for locale in arscobj.get_locales(package): - buff += '\t' + repr(locale) + ':\n' + buff += "\t" + repr(locale) + ":\n" for ttype in arscobj.get_types(package, locale): - buff += '\t\t' + ttype + ':\n' + buff += "\t\t" + ttype + ":\n" try: - tmp_buff = getattr(arscobj, 'get_' + ttype + '_resources')( - package, locale).decode('utf-8', 'replace').split('\n') + tmp_buff = getattr(arscobj, "get_" + ttype + "_resources")( + package, locale).decode("utf-8", 'replace').split("\n") for i in tmp_buff: - buff += '\t\t\t' + i + '\n' + buff += "\t\t\t" + i + "\n" except AttributeError: pass return buff diff --git a/pyaxmlparser/axmlparser.py b/pyaxmlparser/axmlparser.py index 24a1437..d3f4b93 100644 --- a/pyaxmlparser/axmlparser.py +++ b/pyaxmlparser/axmlparser.py @@ -15,91 +15,52 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import unicode_literals -import os import logging from struct import unpack from collections import defaultdict -from zipfile import ZipFile, is_zipfile import pyaxmlparser.constants as const from pyaxmlparser import bytecode from pyaxmlparser.stringblock import StringBlock from pyaxmlparser import public from . import arscutil -from pyaxmlparser.utils import string_types, text_type + +log = logging.getLogger("pyaxmlparser.axmlparser") class AXMLParser(object): - def __init__(self, raw_buff=None, debug=False): - self.log = logging.getLogger('pyaxmlparser.axmlparser') - self.log.setLevel(logging.DEBUG if debug else logging.CRITICAL) - self.event = -1 - self.line_number = -1 - self.name = -1 - self.namespace_uri = -1 - self.attributes = [] - self.id_attribute = -1 - self.class_attribute = -1 - self.style_attribute = -1 - self.namespace = -1 - data = b'' - - if hasattr(raw_buff, 'read'): - data = raw_buff.read() - else: - path_to_file = None - try: - if os.path.exists(raw_buff): - path_to_file = raw_buff - except BaseException: - pass - if path_to_file: - if is_zipfile(path_to_file): - with ZipFile(path_to_file, 'r') as apk: - if 'AndroidManifest.xml' in apk.namelist(): - data = apk.read('AndroidManifest.xml') - else: - with open(path_to_file, 'rb') as xml_file: - data = xml_file.read() - else: - data = raw_buff + def __init__(self, raw_buff): + self.reset() - if not isinstance(data, string_types): - raise ValueError('AXMLParser need file path to apk or xml, str or bytes data.') - if isinstance(data, text_type): - data = bytearray(data, encoding='utf-8') - else: - data = bytearray(data) + self.valid_axml = True + self.axml_tampered = False + self.packerwarning = False + self.buff = bytecode.BuffHandle(raw_buff) - self.valid_android_xml = True - self.android_xml_tampered = False - self.packer_warning = False - self.buff = bytecode.BuffHandle(data) - android_xml_file, = unpack('> 16 == 0x0008: - self.android_xml_tampered = True - self.log.warning( - 'ANDROID XML file has an unusual header, most malware like ' - 'doing such stuff to anti androguard! But we try to parse ' - 'it anyways. Header: 0x{:08x}'.format(android_xml_file) + if axml_file >> 16 == 0x0008: + self.axml_tampered = True + log.warning( + "AXML file has an unusual header, most malwares like " + "doing such stuff to anti androguard! But we try to parse " + "it anyways. Header: 0x{:08x}".format(axml_file) ) else: - self.valid_android_xml = False - self.log.warning('Not a valid ANDROID XML file. Header 0x{:08x}'.format(android_xml_file)) + self.valid_axml = False + log.warning("Not a valid AXML file. Header 0x{:08x}".format(axml_file)) return # Next is the filesize self.filesize, = unpack(' const.CHUNK_XML_LAST: - self.log.warning('invalid chunk type 0x{:08x}'.format(chunk_type)) + if chunkType < const.CHUNK_XML_FIRST or \ + chunkType > const.CHUNK_XML_LAST: + log.warning("invalid chunk type 0x{:08x}".format(chunkType)) # Fake START_DOCUMENT event. - if chunk_type == const.CHUNK_XML_START_TAG and event == -1: - self.event = const.START_DOCUMENT + if chunkType == const.CHUNK_XML_START_TAG and event == -1: + self.m_event = const.START_DOCUMENT break # After the chunk_type, there are always 3 fields for the remaining @@ -198,7 +156,7 @@ def do_next(self): # are correct in size self.buff.read(4) # Line Number - self.line_number = unpack(' uri is a 1:1 mapping - self.prefix_uri[prefix] = uri + self.m_prefixuri[prefix] = uri # but uri --> prefix is a 1:n mapping! - self.uri_prefix[uri].append(prefix) - self.prefix_uri_list.append((prefix, uri)) - self.namespace = uri + self.m_uriprefix[uri].append(prefix) + self.m_prefixuriL.append((prefix, uri)) + self.ns = uri # Workaround for closing tags if (uri, prefix) in self.visited_ns: self.visited_ns.remove((uri, prefix)) else: - self.namespace = -1 + self.ns = -1 # END_PREFIX contains again prefix and uri field prefix, = unpack('> 16) - 1 - attribute_count = attribute_count & 0xFFFF - self.class_attribute = unpack('> 16) - 1 + attributeCount = unpack('> 16) - 1 + attributeCount = attributeCount & 0xFFFF + self.m_classAttribute = unpack('> 16) - 1 - self.class_attribute = (self.class_attribute & 0xFFFF) - 1 + self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1 # Now, we parse the attributes. # Each attribute has 5 fields of 4 byte - for i in range(0, attribute_count * const.ATTRIBUTE_LENGHT): + for i in range(0, attributeCount * const.ATTRIBUTE_LENGHT): # Each field is linearly parsed into the array - self.attributes.append(unpack('> 24 + self.m_attributes[i] = self.m_attributes[i] >> 24 - self.event = const.START_TAG + self.m_event = const.START_TAG break - if chunk_type == const.CHUNK_XML_END_TAG: - self.namespace_uri = unpack(' prefix is 1:n mapping, # We will just return the first one we match. - if uri not in self.uri_prefix: + if uri not in self.m_uriprefix: return -1 else: - if len(self.uri_prefix[uri]) == 0: + if len(self.m_uriprefix[uri]) == 0: return -1 - return self.uri_prefix[uri][0] + return self.m_uriprefix[uri][0] - def get_prefix(self): + def getPrefix(self): # The default is, that the namespaceUri is 0xFFFFFFFF # Then we know, there is none - if self.namespace_uri == 0xFFFFFFFF: - return '' + if self.m_namespaceUri == 0xFFFFFFFF: + return u'' # FIXME this could be problematic. Need to find the correct namespace prefix - if self.namespace_uri in self.uri_prefix: - candidate = self.uri_prefix[self.namespace_uri][0] + if self.m_namespaceUri in self.m_uriprefix: + candidate = self.m_uriprefix[self.m_namespaceUri][0] try: - return self.string_block.get_string(candidate) + return self.sb.getString(candidate) except KeyError: - return '' + return u'' else: - return '' + return u'' - def get_name(self): - if self.name == -1 or ( - self.event != const.START_TAG and - self.event != const.END_TAG): - return '' + def getName(self): + if self.m_name == -1 or ( + self.m_event != const.START_TAG and + self.m_event != const.END_TAG): + return u'' - return self.string_block.get_string(self.name) + return self.sb.getString(self.m_name) - def get_text(self): - if self.name == -1 or self.event != const.TEXT: - return '' + def getText(self): + if self.m_name == -1 or self.m_event != const.TEXT: + return u'' - return self.string_block.get_string(self.name) + return self.sb.getString(self.m_name) - def get_namespace_prefix(self, pos): - prefix = self.prefix_uri_list[pos][0] - return self.string_block.get_string(prefix) + def getNamespacePrefix(self, pos): + prefix = self.m_prefixuriL[pos][0] + return self.sb.getString(prefix) - def get_namespace_uri(self, pos): - uri = self.prefix_uri_list[pos][1] - return self.string_block.get_string(uri) + def getNamespaceUri(self, pos): + uri = self.m_prefixuriL[pos][1] + return self.sb.getString(uri) - def get_xml_namespace(self): - buff = '' - for prefix, uri in self.prefix_uri.items(): + def getXMLNS(self): + buff = "" + for prefix, uri in self.m_prefixuri.items(): if (uri, prefix) not in self.visited_ns: - prefix_str = self.string_block.get_string(prefix) - prefix_uri = self.string_block.get_string(self.prefix_uri[prefix]) + prefix_str = self.sb.getString(prefix) + prefix_uri = self.sb.getString(self.m_prefixuri[prefix]) # FIXME Packers like Liapp use empty uri to fool XML Parser # FIXME they also mess around with the Manifest, thus it can not be parsed easily if prefix_uri == '': - self.log.warning('Empty Namespace URI for Namespace {}.'.format(prefix_str)) - self.packer_warning = True + log.warning("Empty Namespace URI for Namespace {}.".format(prefix_str)) + self.packerwarning = True # if prefix is (null), which is indicated by an empty str, then do not print : if prefix_str != '': - prefix_str = ':' + prefix_str + prefix_str = ":" + prefix_str buff += 'xmlns{}="{}"\n'.format(prefix_str, prefix_uri) self.visited_ns.append((uri, prefix)) return buff - def get_namespace_count(self, pos): + def getNamespaceCount(self, pos): pass - def get_attribute_offset(self, index): + def getAttributeOffset(self, index): # FIXME - if self.event != const.START_TAG: - self.log.warning('Current event is not START_TAG.') + if self.m_event != const.START_TAG: + log.warning("Current event is not START_TAG.") - offset = (index * 5) - 5 + offset = index * 5 # FIXME - if offset >= len(self.attributes): - self.log.warning('Invalid attribute index') + if offset >= len(self.m_attributes): + log.warning("Invalid attribute index") return offset - def get_attribute_count(self): - return len(self.attributes) // const.ATTRIBUTE_LENGHT if self.event == const.START_TAG else -1 + def getAttributeCount(self): + if self.m_event != const.START_TAG: + return -1 + + return len(self.m_attributes) // const.ATTRIBUTE_LENGHT + + def getAttributePrefix(self, index): + offset = self.getAttributeOffset(index) + uri = self.m_attributes[offset + const.ATTRIBUTE_IX_NAMESPACE_URI] + + prefix = self.getPrefixByUri(uri) + + if prefix == -1: + return "" - def get_attribute_prefix(self, index): - offset = self.get_attribute_offset(index) - uri = self.attributes[offset + const.ATTRIBUTE_IX_NAMESPACE_URI] - prefix = self.get_prefix_by_uri(uri) - return self.string_block.get_string(prefix) if prefix != -1 else '' + return self.sb.getString(prefix) - def get_attribute_name(self, index): - offset = self.get_attribute_offset(index) - name = self.attributes[offset + const.ATTRIBUTE_IX_NAME] + def getAttributeName(self, index): + offset = self.getAttributeOffset(index) + name = self.m_attributes[offset + const.ATTRIBUTE_IX_NAME] if name == -1: - return '' + return "" - res = self.string_block.get_string(name) + res = self.sb.getString(name) # If the result is a (null) string, we need to look it up. if not res: - attr = self.resource_ids[name] + attr = self.m_resourceIDs[name] if attr in public.SYSTEM_RESOURCES['attributes']['inverse']: res = 'android:' + public.SYSTEM_RESOURCES['attributes']['inverse'][ attr @@ -429,15 +395,15 @@ def get_attribute_name(self, index): return res - def get_attribute_value_type(self, index): - offset = self.get_attribute_offset(index) - return self.attributes[offset + const.ATTRIBUTE_IX_VALUE_TYPE] + def getAttributeValueType(self, index): + offset = self.getAttributeOffset(index) + return self.m_attributes[offset + const.ATTRIBUTE_IX_VALUE_TYPE] - def get_attribute_value_data(self, index): - offset = self.get_attribute_offset(index) - return self.attributes[offset + const.ATTRIBUTE_IX_VALUE_DATA] + def getAttributeValueData(self, index): + offset = self.getAttributeOffset(index) + return self.m_attributes[offset + const.ATTRIBUTE_IX_VALUE_DATA] - def get_attribute_value(self, index): + def getAttributeValue(self, index): """ This function is only used to look up strings All other work is made by format_value @@ -445,9 +411,9 @@ def get_attribute_value(self, index): :param index: :return: """ - offset = self.get_attribute_offset(index) - value_type = self.attributes[offset + const.ATTRIBUTE_IX_VALUE_TYPE] - if value_type == const.TYPE_STRING: - value_string = self.attributes[offset + const.ATTRIBUTE_IX_VALUE_STRING] - return self.string_block.get_string(value_string) - return '' + offset = self.getAttributeOffset(index) + valueType = self.m_attributes[offset + const.ATTRIBUTE_IX_VALUE_TYPE] + if valueType == const.TYPE_STRING: + valueString = self.m_attributes[offset + const.ATTRIBUTE_IX_VALUE_STRING] + return self.sb.getString(valueString) + return "" diff --git a/pyaxmlparser/axmlprinter.py b/pyaxmlparser/axmlprinter.py index 13d45dd..add31c8 100644 --- a/pyaxmlparser/axmlprinter.py +++ b/pyaxmlparser/axmlprinter.py @@ -15,83 +15,70 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import unicode_literals -import logging from pyaxmlparser.axmlparser import AXMLParser from pyaxmlparser.utils import format_value import pyaxmlparser.constants as const from xml.sax.saxutils import escape -from xml.dom.minidom import parseString - - -try: - from lxml import etree - from lxml.etree import ParseError - lxml_installed = True -except ImportError: - lxml_installed = False -if not lxml_installed: - import xml.etree.ElementTree as etree - from xml.etree.ElementTree import ParseError +from lxml import etree class AXMLPrinter(object): """ - Converter for Android XML Files into a XML string + Converter for AXML Files into a XML string """ - def __init__(self, raw_buff, debug=False): - self.log = logging.getLogger('pyaxmlparser.axmlprinter') - self.log.setLevel(logging.DEBUG if debug else logging.CRITICAL) - self.android_xml = AXMLParser(raw_buff) + def __init__(self, raw_buff): + self.axml = AXMLParser(raw_buff) self.xmlns = False - self.buff = '' + self.buff = u'' - while True and self.android_xml.is_valid(): - _type = next(self.android_xml) + while True and self.axml.is_valid(): + _type = next(self.axml) if _type == const.START_DOCUMENT: - self.buff += '\n' + self.buff += u'\n' elif _type == const.START_TAG: - self.buff += '<' + self.get_prefix(self.android_xml.get_prefix()) + \ - self.android_xml.get_name() + '\n' - self.buff += self.android_xml.get_xml_namespace() + self.buff += u'<' + self.getPrefix(self.axml.getPrefix()) + \ + self.axml.getName() + u'\n' + self.buff += self.axml.getXMLNS() - for i in range(0, self.android_xml.get_attribute_count()): - prefix = self.get_prefix(self.android_xml.get_attribute_prefix(i)) - name = self.android_xml.get_attribute_name(i) - value = self._escape(self.get_attribute_value(i)) + for i in range(0, self.axml.getAttributeCount()): + prefix = self.getPrefix(self.axml.getAttributePrefix(i)) + name = self.axml.getAttributeName(i) + value = self._escape(self.getAttributeValue(i)) # If the name is a system name AND the prefix is set, # we have a problem. # FIXME we are not sure how this happens, but a quick fix # is to remove the prefix if it already in the name if name.startswith(prefix): - prefix = '' + prefix = u'' - self.buff += '{}{}="{}"\n'.format(prefix, name, value) + self.buff += u'{}{}="{}"\n'.format(prefix, name, value) - self.buff += '>\n' + self.buff += u'>\n' elif _type == const.END_TAG: - self.buff += '\n' % ( - self.get_prefix(self.android_xml.get_prefix()), self.android_xml.get_name()) + self.buff += u"\n" % ( + self.getPrefix(self.axml.getPrefix()), self.axml.getName()) elif _type == const.TEXT: - self.buff += '%s\n' % self._escape(self.android_xml.get_text()) + self.buff += u"%s\n" % self._escape(self.axml.getText()) elif _type == const.END_DOCUMENT: break # pleed patch # FIXME should this be applied for strings directly? - @staticmethod - def _escape(s): + def _escape(self, s): + # FIXME Strings might contain null bytes. Should they be removed? + # We guess so, as normaly the string would terminate there...?! + s = s.replace("\x00", "") # Other HTML Conversions - s = s.replace('&', '&') - s = s.replace('"', '"') - s = s.replace('\'', ''') - s = s.replace('<', '<') - s = s.replace('>', '>') + s = s.replace("&", "&") + s = s.replace('"', """) + s = s.replace("'", "'") + s = s.replace("<", "<") + s = s.replace(">", ">") return escape(s) def is_packed(self): @@ -101,7 +88,7 @@ def is_packed(self): Parser :return: boolean """ - return self.android_xml.packer_warning + return self.axml.packerwarning def get_buff(self): return self.buff.encode('utf-8') @@ -111,65 +98,33 @@ def get_xml(self): Get the XML as an UTF-8 string :return: str """ - xml, error = self.get_xml_obj() - - if lxml_installed: - pretty_xml = etree.tostring(xml, encoding='utf-8', pretty_print=True) - else: - xml_string = etree.tostring(xml, encoding='utf-8') - raw_xml = parseString(xml_string) - pretty_xml = raw_xml.toprettyxml(encoding='utf-8') - - return pretty_xml + return etree.tostring( + self.get_xml_obj(), encoding="utf-8", pretty_print=True) def get_xml_obj(self): """ Get the XML as an ElementTree object - :return: :class:`etree.Element` + :return: :class:`~lxml.etree.Element` """ + parser = etree.XMLParser(recover=True, resolve_entities=False) + tree = etree.fromstring(self.get_buff(), parser=parser) + return tree - error = '' - xml_string = self.get_buff() - tree = None - if lxml_installed: - parser = etree.XMLParser(recover=True, resolve_entities=False) - try: - tree = etree.fromstring(xml_string, parser=parser) - except ParseError as error_message: - error = 'Error message: {}\n code {}\n position {}\n\n {}'.format( - str(error_message), - error_message.code if hasattr(error_message, 'code') else 'no code', - error_message.position if hasattr(error_message, 'position') else 'no position', - xml_string) - else: - # if error xml - need patch ) - try: - tree = etree.fromstring(xml_string) - except ParseError as error_message: - error = 'Error message: {}\n code {}\n position {}\n\n {}'.format( - str(error_message), - error_message.code if hasattr(error_message, 'code') else 'no code', - error_message.position if hasattr(error_message, 'position') else 'no position', - xml_string) - - return tree, error - - @staticmethod - def get_prefix(prefix): + def getPrefix(self, prefix): if prefix is None or len(prefix) == 0: - return '' + return u'' - return prefix + ':' + return prefix + u':' - def get_attribute_value(self, index): + def getAttributeValue(self, index): """ Wrapper function for format_value to resolve the actual value of an attribute in a tag :param index: :return: """ - _type = self.android_xml.get_attribute_value_type(index) - _data = self.android_xml.get_attribute_value_data(index) + _type = self.axml.getAttributeValueType(index) + _data = self.axml.getAttributeValueData(index) return format_value( - _type, _data, lambda _: self.android_xml.get_attribute_value(index)) + _type, _data, lambda _: self.axml.getAttributeValue(index)) diff --git a/pyaxmlparser/bytecode.py b/pyaxmlparser/bytecode.py index d7eb8ce..a2f52e6 100644 --- a/pyaxmlparser/bytecode.py +++ b/pyaxmlparser/bytecode.py @@ -15,7 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import unicode_literals from struct import unpack, pack @@ -29,7 +28,7 @@ def _get(self): return pack(self.__size, self.__value) def __str__(self): - return '0x%x' % self.__value + return "0x%x" % self.__value def __int__(self): return self.__value @@ -77,11 +76,11 @@ def object_to_str(obj): if isinstance(obj, str): return obj elif isinstance(obj, bool): - return '' + return "" elif isinstance(obj, int): - return pack(' 0: - if value[0] == '.': + if value[0] == ".": value = self.package + value else: - v_dot = value.find('.') + v_dot = value.find(".") if v_dot == 0: - value = self.package + '.' + value + value = self.package + "." + value elif v_dot == -1: - value = self.package + '.' + value + value = self.package + "." + value return value def get_main_activity(self): @@ -81,26 +80,26 @@ def get_main_activity(self): x = set() y = set() - activities_and_aliases = self.xml.findall('.//activity') + \ - self.xml.findall('.//activity-alias') + activities_and_aliases = self.xml.findall(".//activity") + \ + self.xml.findall(".//activity-alias") for item in activities_and_aliases: # Some applications have more than one MAIN activity. # For example: paid and free content - activity_enabled = item.get(NS_ANDROID + 'enabled') - if activity_enabled is not None and \ - activity_enabled != '' and activity_enabled == 'false': + activityEnabled = item.get(NS_ANDROID + "enabled") + if activityEnabled is not None and \ + activityEnabled != "" and activityEnabled == "false": continue - for action_item in item.findall('.//action'): - value = action_item.get(NS_ANDROID + 'name') - if value == 'android.intent.action.MAIN': - x.add(item.get(NS_ANDROID + 'name')) + for sitem in item.findall(".//action"): + val = sitem.get(NS_ANDROID + "name") + if val == "android.intent.action.MAIN": + x.add(item.get(NS_ANDROID + "name")) - for category_item in item.findall('.//category'): - value = category_item.get(NS_ANDROID + 'name') - if value == 'android.intent.category.LAUNCHER': - y.add(item.get(NS_ANDROID + 'name')) + for sitem in item.findall(".//category"): + val = sitem.get(NS_ANDROID + "name") + if val == "android.intent.category.LAUNCHER": + y.add(item.get(NS_ANDROID + "name")) z = x.intersection(y) if len(z) > 0: @@ -124,30 +123,29 @@ def application(self): app_name = self.get_element( 'activity', 'label', name=main_activity_name) - if app_name is None or self.android_resource is None: + if app_name is None: # No App name set - # TODO return package name instead? + # TODO return packagename instead? return self.package - if app_name.startswith('@'): - app_name_resource_id = string_to_int(app_name, base=16) - app_name = self.package - if app_name_resource_id: - try: - app_name = \ - self.android_resource.get_resolved_res_configs( - app_name_resource_id, - ARSCResTableConfig.default_config())[0][1] - except Exception as e: - self.log.warning( - 'Exception selecting app name: {}'.format(str(e))) + if app_name.startswith("@"): + res_id = int(app_name[1:], 16) + res_parser = self.arsc + + try: + app_name = res_parser.get_resolved_res_configs( + res_id, + ARSCResTableConfig.default_config())[0][1] + except Exception as e: + log.warning("Exception selecting app name: %s" % e) + app_name = self.package return app_name @property def version_name(self): - version_name = self.xml.get(NS_ANDROID + 'versionName') + version_name = self.xml.get(NS_ANDROID + "versionName") if not version_name: - return '' - if not version_name.startswith('@'): + return "" + if not version_name.startswith("@"): return version_name rsc = self.get_resource(version_name, self.package) if rsc: @@ -155,41 +153,23 @@ def version_name(self): return version_name def get_resource(self, key, value): - rsc = None - if self.android_resource is not None: - try: - key = '0x' + key[1:] - hex_value = self.android_resource.get_id(value, int(key, 0))[1] - rsc = self.android_resource.get_string(value, hex_value)[1] - except Exception as e: - self.log.warning(str(e)) + try: + key = '0x' + key[1:] + hex_value = self.arsc.get_id(value, int(key, 0))[1] + rsc = self.arsc.get_string(value, hex_value)[1] + except Exception as e: + log.warning(str(e)) + rsc = None return rsc @property def version_code(self): - version_code = self.xml.get(NS_ANDROID + 'versionCode') - return version_code if version_code else 1 + version_code = self.xml.get(NS_ANDROID + "versionCode") + return version_code @property def package(self): - return self.xml.get('package') - - @property - def platform_build_version_code(self): - platform_build_version_code_value = \ - self.xml.get('platformBuildVersionCode') - if not platform_build_version_code_value: - platform_build_version_code_value = \ - self.get_min_sdk_version - return platform_build_version_code_value - - @property - def platform_build_version_name(self): - platform_build_version_name_value = \ - self.xml.get('platformBuildVersionName') - if not platform_build_version_name_value: - platform_build_version_name_value = '' - return platform_build_version_name_value + return self.xml.get("package") @property def icon_info(self): @@ -197,9 +177,9 @@ def icon_info(self): app = self.xml.findall('.//application')[0] app_icon = app.get(NS_ANDROID + 'icon')[1:] - if app_icon and self.android_resource is not None: + if app_icon: icon_id = int('0x' + app_icon, 0) - icon_data = self.android_resource.get_id(self.package, icon_id) + icon_data = self.arsc.get_id(self.package, icon_id) if icon_data: icon_type, icon_name = icon_data[0], icon_data[1] return icon_type, icon_name @@ -215,25 +195,25 @@ def icon_data(self): if not app_icon: app_icon = self.get_element('application', 'icon') - if not app_icon and self.android_resource is not None: - res_id = self.android_resource.get_res_id_by_key( + if not app_icon: + res_id = self.arsc.get_res_id_by_key( self.package, 'mipmap', 'ic_launcher') if res_id: - app_icon = '@%x' % res_id + app_icon = "@%x" % res_id - if not app_icon and self.android_resource is not None: - res_id = self.android_resource.get_res_id_by_key( + if not app_icon: + res_id = self.arsc.get_res_id_by_key( self.package, 'drawable', 'ic_launcher') if res_id: - app_icon = '@%x' % res_id + app_icon = "@%x" % res_id if not app_icon: # If the icon can not be found, return now return None - if app_icon.startswith('@') and self.android_resource is not None: + if app_icon.startswith("@"): res_id = int(app_icon[1:], 16) - res_parser = self.android_resource + res_parser = self.arsc candidates = res_parser.get_resolved_res_configs(res_id) app_icon = None @@ -246,30 +226,30 @@ def icon_data(self): app_icon = file_name current_dpi = dpi except Exception as e: - self.log.warning('Exception selecting app icon: %s' % e) + log.warning("Exception selecting app icon: %s" % e) return self.zip_file.read(app_icon) @property def get_min_sdk_version(self): - return self.get_element('uses-sdk', 'minSdkVersion') + return self.get_element("uses-sdk", "minSdkVersion") @property def get_max_sdk_version(self): - return self.get_element('uses-sdk', 'maxSdkVersion') + return self.get_element("uses-sdk", "maxSdkVersion") @property def get_target_sdk_version(self): - return self.get_element('uses-sdk', 'targetSdkVersion') + return self.get_element("uses-sdk", "targetSdkVersion") @property def get_effective_target_sdk_version(self): """ - Return the effective targetSdkVersion, always returns int > 0. - If the targetSdkVersion is not set, it defaults to 1. This is - set based on defaults as defined in: - https://developer.android.com/guide/topics/manifest/uses-sdk-element.html - :rtype: int + Return the effective targetSdkVersion, always returns int > 0. + If the targetSdkVersion is not set, it defaults to 1. This is + set based on defaults as defined in: + https://developer.android.com/guide/topics/manifest/uses-sdk-element.html + :rtype: int """ target_sdk_version = self.get_target_sdk_version if not target_sdk_version: @@ -278,102 +258,3 @@ def get_effective_target_sdk_version(self): return int(target_sdk_version) except (ValueError, TypeError): return 1 - - @property - def uses_permissions(self): - """ - find all uses-permission and uses-permission-sdk-* example uses-permission-sdk-23 - :return: list - """ - permissions = [] - if hasattr(self.xml, 'xpath'): - elements = self.xml.findall("//*[starts-with(name(), 'uses-permission')]") - for item in elements: - value = item.get(NS_ANDROID + 'name') - if value is not None and value not in permissions: - permissions.append(value) - else: - manifest = self.xml.findall('.//manifest') - if manifest: - for item in manifest.getiterator(): - if item.tag.startswith('uses-permission'): - value = item.get(NS_ANDROID + 'name') - if value is not None and value not in permissions: - permissions.append(value) - return permissions - - @property - def permissions(self): - """ - find permission - :return: list - """ - permissions = [] - elements = self.xml.findall('.//permission') - for item in elements: - value = item.get(NS_ANDROID + 'name') - if value is not None and value not in permissions: - permissions.append(value) - return permissions - - @property - def all_permissions(self): - """ - find all permission - :return: list - """ - permissions = self.permissions - permissions.extend(self.uses_permissions) - return list(set(permissions)) - - @property - def uses_feature(self): - return self.get_uses_feature() - - @property - def uses_feature_required(self): - return self.get_uses_feature(True) - - @property - def uses_feature_non_required(self): - return self.get_uses_feature(False) - - def get_uses_feature(self, required=None): - """ - :param required: None - return all uses feature. - :param required: True - return all uses feature is required. - :param required: False - return all uses feature is not required. - :return: - """ - uses_feature = [] - elements = self.xml.findall('.//uses-feature') - for item in elements: - value = item.get(NS_ANDROID + 'name') - if value is not None and value not in uses_feature: - if required is None: - uses_feature.append(value) - elif required and item.get(NS_ANDROID + 'required') == 'true': - uses_feature.append(value) - elif not required and item.get(NS_ANDROID + 'required') == 'false': - uses_feature.append(value) - else: - continue - return uses_feature - - @property - def gles_version(self): - return self.get_gles_version() - - def get_gles_version(self): - gl_es_version = None - if hasattr(self.xml, 'xpath'): - result = self.xml.xpath("//uses-feature/@*[contains(name(), 'glEsVersion')]") - if len(result): - gl_es_version = result[0] - else: - elements = self.xml.findall('.//uses-feature') - for item in elements: - value = item.get(NS_ANDROID + 'glEsVersion') - if value: - gl_es_version = value - return gl_es_version diff --git a/pyaxmlparser/public.py b/pyaxmlparser/public.py index 88c7bb3..3a7f3f4 100644 --- a/pyaxmlparser/public.py +++ b/pyaxmlparser/public.py @@ -1924,12 +1924,12 @@ } SYSTEM_RESOURCES = { - 'attributes': { - 'forward': {k: v for k, v in resources['attr'].items()}, - 'inverse': {v: k for k, v in resources['attr'].items()} + "attributes": { + "forward": {k: v for k, v in resources['attr'].items()}, + "inverse": {v: k for k, v in resources['attr'].items()} }, - 'styles': { - 'forward': {k: v for k, v in resources['style'].items()}, - 'inverse': {v: k for k, v in resources['style'].items()} + "styles": { + "forward": {k: v for k, v in resources['style'].items()}, + "inverse": {v: k for k, v in resources['style'].items()} } } diff --git a/pyaxmlparser/stringblock.py b/pyaxmlparser/stringblock.py index 932c860..4de2dd8 100644 --- a/pyaxmlparser/stringblock.py +++ b/pyaxmlparser/stringblock.py @@ -15,12 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import unicode_literals import logging from struct import unpack -import unicodedata -from pyaxmlparser.utils import is_python_3 + +log = logging.getLogger("pyaxmlparser.stringblock") + # Flags in the STRING Section SORTED_FLAG = 1 << 0 @@ -31,148 +31,145 @@ class StringBlock(object): """ StringBlock is a CHUNK inside an AXML File It contains all strings, which are used by referecing to ID's - TODO might migrate this block into the ARSCParser, as it it not a 'special' block but a normal tag. + TODO might migrate this block into the ARSCParser, as it it not a "special" block but a normal tag. """ - def __init__(self, buff, header, debug=False): - self.log = logging.getLogger('pyaxmlparser.stringblock') - self.log.setLevel(logging.DEBUG if debug else logging.CRITICAL) + def __init__(self, buff, header): self._cache = {} self.header = header # We already read the header (which was chunk_type and chunk_size # Now, we read the string_count: - self.string_count = unpack(' 0: - self.log.warning('Styles Offset given, but styleCount is zero.') + # Check if they supplied a stylesOffset even if the count is 0: + if self.styleOffsetCount == 0 and self.stylesOffset > 0: + log.warning("Styles Offset given, but styleCount is zero.") - self.string_offsets = [] - self.style_offsets = [] - self.char_buffer = b'' if is_python_3 else bytearray(b'') - self.styles = [] + self.m_stringOffsets = [] + self.m_styleOffsets = [] + self.m_charbuff = "" + self.m_styles = [] # Next, there is a list of string following # This is only a list of offsets (4 byte each) - for i in range(0, self.string_count): - self.string_offsets.append(unpack('= len( - self.string_offsets): - return '' + if idx < 0 or not self.m_stringOffsets or idx >= len( + self.m_stringOffsets): + return "" + + offset = self.m_stringOffsets[idx] - offset = self.string_offsets[idx] - if self.is_utf8: - value = self.decode_utf8(offset) + if self.m_isUTF8: + self._cache[idx] = self.decode8(offset) else: - value = self.decode_utf16(offset) + self._cache[idx] = self.decode16(offset) - # Remove all control symbol https://www.compart.com/en/unicode/category/Cc - # fix function _escape from axmlprinter - self._cache[idx] = ''.join(c if unicodedata.category(c) != 'Cc' else '' for c in value) return self._cache[idx] - def get_style(self, idx): + def getStyle(self, idx): # FIXME - return self.styles[idx] + return self.m_styles[idx] - def decode_utf8(self, offset): - str_len, skip = self.decode_length(offset, 1) + def decode8(self, offset): + str_len, skip = self.decodeLength(offset, 1) offset += skip - encoded_bytes, skip = self.decode_length(offset, 1) + encoded_bytes, skip = self.decodeLength(offset, 1) offset += skip - data = self.char_buffer[offset: offset + encoded_bytes] + data = self.m_charbuff[offset: offset + encoded_bytes] return self.decode_bytes(data, 'utf-8', str_len) - def decode_utf16(self, offset): - str_len, skip = self.decode_length(offset, 2) + def decode16(self, offset): + str_len, skip = self.decodeLength(offset, 2) offset += skip encoded_bytes = str_len * 2 - data = self.char_buffer[offset: offset + encoded_bytes] + data = self.m_charbuff[offset: offset + encoded_bytes] return self.decode_bytes(data, 'utf-16', str_len) def decode_bytes(self, data, encoding, str_len): string = data.decode(encoding, 'replace') if len(string) != str_len: - self.log.warning('invalid decoded string length') + log.warning("invalid decoded string length") return string - def decode_length(self, offset, sizeof_char): - length = self.char_buffer[offset] + def decodeLength(self, offset, sizeof_char): + length = self.m_charbuff[offset] sizeof_2chars = sizeof_char << 1 - fmt = '<2B' if sizeof_char == 1 else '<2H' + fmt_chr = 'B' if sizeof_char == 1 else 'H' + fmt = "<2" + fmt_chr length1, length2 = unpack( - fmt, self.char_buffer[offset:(offset + sizeof_2chars)]) + fmt, self.m_charbuff[offset:(offset + sizeof_2chars)]) - high_bit = 0x80 << (8 * (sizeof_char - 1)) + highbit = 0x80 << (8 * (sizeof_char - 1)) - if (length & high_bit) != 0: + if (length & highbit) != 0: return ( - (length1 & ~high_bit) << (8 * sizeof_char) + (length1 & ~highbit) << (8 * sizeof_char) ) | length2, sizeof_2chars else: return length1, sizeof_char def show(self): - print('StringBlock(%x, %x, %x, %x, %x, %x' % ( + print("StringBlock(%x, %x, %x, %x, %x, %x" % ( self.start, self.header, self.header_size, - self.chunk_size, - self.strings_offset, + self.chunkSize, + self.stringsOffset, self.flags)) - for i in range(0, len(self.string_offsets)): - print(i, repr(self.get_string(i))) + for i in range(0, len(self.m_stringOffsets)): + print(i, repr(self.getString(i))) diff --git a/pyaxmlparser/utils.py b/pyaxmlparser/utils.py index dca71cd..b5ca948 100644 --- a/pyaxmlparser/utils.py +++ b/pyaxmlparser/utils.py @@ -1,22 +1,22 @@ -from __future__ import unicode_literals import io -import sys -from zipfile import ZipFile, is_zipfile +import os.path +from xml.dom.pulldom import SAX2DOM +from zipfile import ZipFile import pyaxmlparser.constants as const from struct import unpack, pack +import lxml.sax + NS_ANDROID_URI = 'http://schemas.android.com/apk/res/android' NS_ANDROID = '{http://schemas.android.com/apk/res/android}' RADIX_MULTS = [0.00390625, 3.051758E-005, 1.192093E-007, 4.656613E-010] -is_python_3 = sys.version_info > (3, 0, 0) -if is_python_3: - string_types = (bytes, str) - text_type = str -else: - string_types = (bytes, str, unicode) - text_type = unicode + +def parse_lxml_dom(tree): + handler = SAX2DOM() + lxml.sax.saxify(tree, handler) + return handler.document def _range(a, b, step=None): @@ -26,85 +26,72 @@ def _range(a, b, step=None): def get_zip_file(resource): - try: - is_zip = is_zipfile(resource) - except Exception: - is_zip = False - if is_zip: - return ZipFile(resource) - if isinstance(resource, string_types): - if not isinstance(resource, bytes): - resource = resource.encode() + if isinstance(resource, bytes): return ZipFile(io.BytesIO(resource)) + if os.path.isfile(resource): + return ZipFile(resource) raise TypeError('Resource should be file or bytes stream') def is_str(item, string=False): - return str(item) if string else item + if string: + return str(item) + return item -def complex_to_float(value): - return float(value & 0xFFFFFF00) * RADIX_MULTS[(value >> 4) & 3] +def complexToFloat(xcomplex): + return float(xcomplex & 0xFFFFFF00) * RADIX_MULTS[(xcomplex >> 4) & 3] -def long_to_int(input_l): +def long2int(input_l): if input_l > 0x7fffffff: input_l = (0x7fffffff & input_l) - 0x80000000 return input_l -def get_package(i): - return 'android:' if i >> 24 == 1 else '' +def getPackage(i): + if i >> 24 == 1: + return "android:" + return "" -def format_value(_type, _data, lookup_string=lambda ix: ''): +def format_value(_type, _data, lookup_string=lambda ix: ""): if _type == const.TYPE_STRING: return lookup_string(_data) elif _type == const.TYPE_ATTRIBUTE: - return '?%s%08X' % (get_package(_data), _data) + return "?%s%08X" % (getPackage(_data), _data) elif _type == const.TYPE_REFERENCE: - return '@%s%08X' % (get_package(_data), _data) + return "@%s%08X" % (getPackage(_data), _data) elif _type == const.TYPE_FLOAT: - return '%f' % unpack('=f', pack('=L', _data))[0] + return "%f" % unpack("=f", pack("=L", _data))[0] elif _type == const.TYPE_INT_HEX: - return '0x%08X' % _data + return "0x%08X" % _data elif _type == const.TYPE_INT_BOOLEAN: if _data == 0: - return 'false' - return 'true' + return "false" + return "true" elif _type == const.TYPE_DIMENSION: - return '%f%s' % ( - complex_to_float(_data), + return "%f%s" % ( + complexToFloat(_data), const.DIMENSION_UNITS[_data & const.COMPLEX_UNIT_MASK] ) elif _type == const.TYPE_FRACTION: - return '%f%s' % ( - complex_to_float(_data) * 100, + return "%f%s" % ( + complexToFloat(_data) * 100, const.FRACTION_UNITS[_data & const.COMPLEX_UNIT_MASK] ) elif const.TYPE_FIRST_COLOR_INT <= _type <= const.TYPE_LAST_COLOR_INT: - return '#%08X' % _data + return "#%08X" % _data elif const.TYPE_FIRST_INT <= _type <= const.TYPE_LAST_INT: - return '%d' % long_to_int(_data) - - return '<0x%X, type 0x%02X>' % (_data, _type) - + return "%d" % long2int(_data) -def string_to_int(target_string=None, base=10): - result = None - if isinstance(target_string, string_types): - number_symbols = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'] - try: - result = int('0' + ''.join([x for x in target_string if x in number_symbols]), base) - except ValueError: - pass - return result + return "<0x%X, type 0x%02X>" % (_data, _type) diff --git a/requirements.txt b/requirements.txt index fd57bc5..8da49d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ +lxml==3.7.3 +click==6.7 + # testing pytest==3.0.6 flake8==3.3.0 diff --git a/setup.py b/setup.py index 885f92a..5b8640b 100644 --- a/setup.py +++ b/setup.py @@ -1,60 +1,48 @@ -#!/usr/bin/python -# coding=utf-8 - -from __future__ import absolute_import, unicode_literals - from setuptools import find_packages, setup -from codecs import open -from os import path -import pyaxmlparser - -here = path.abspath(path.dirname(__file__)) +__VERSION__ = '0.3.13' -with open(path.join(here, 'README.rst'), 'r', encoding='utf-8') as f: - long_description = f.read() +with open("README.rst", "r") as fh: + long_description = fh.read() setup( - name=pyaxmlparser.__package_name__, - version=pyaxmlparser.__version__, - url=pyaxmlparser.__url__, - author=pyaxmlparser.__author__, - author_email=pyaxmlparser.__author_email__, - license=pyaxmlparser.__license__, + name='pyaxmlparser', + version=__VERSION__, + url='https://github.com/appknox/pyaxmlparser', + + author='Subho Halder', + author_email='sunny@appknox.com', + license='MIT', + packages=find_packages(exclude=['tests', 'examples']), - python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*', include_package_data=True, zip_safe=False, platforms='any', + install_requires=['lxml', 'click==6.7'], entry_points=''' [console_scripts] - apkinfo = pyaxmlparser.__main__:main - pyaxmlparser = pyaxmlparser.__main__:main + apkinfo = pyaxmlparser.cli:main ''', py_modules=['pyaxmlparser'], - description=pyaxmlparser.__description__, + description="Python3 Parser for Android XML file and get Application Name without using Androguard", long_description=long_description, - long_description_content_type='text/markdown', + keywords='appknox axmlparser arscparser android', classifiers=[ 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', - 'Operating System :: MacOS :: MacOS X', + 'Operating System :: POSIX', - 'Operating System :: POSIX :: BSD', - 'Operating System :: POSIX :: Linux', - 'Operating System :: Microsoft :: Windows', + 'Operating System :: MacOS', + 'Operating System :: Unix', + 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy' + 'Topic :: Software Development :: Build Tools', 'Topic :: Software Development :: Libraries :: Python Modules', ] diff --git a/tests/test_parser.py b/tests/test_parser.py index 3a839fe..4b471e8 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,26 +1,27 @@ -from os import path +import os.path +import sys from pyaxmlparser.arscparser import ARSCParser from pyaxmlparser.axmlprinter import AXMLPrinter from pyaxmlparser.utils import NS_ANDROID +PATH_INSTALL = "./" +sys.path.append(PATH_INSTALL) +test_apk = 'tests/test_apk/' + + def test_app_name_extraction(): - here = path.abspath(path.dirname(__file__)) - axml_file = path.join(here, 'tests/test_apk/AndroidManifest.xml') - rsc_file = path.join(here, 'tests/test_apk/resources.arsc') - with open(axml_file, 'rb') as manifest_file, open(rsc_file, 'rb') as resources_file: - manifest_data = manifest_file.read() - resources_data = resources_file.read() - axml, error = AXMLPrinter(manifest_data).get_xml_obj() - assert error == '', 'Error parse xml {}: \n{}'.format(axml_file, error) - rsc = ARSCParser(resources_data) + axml_file = os.path.join(test_apk, 'AndroidManifest.xml') + axml = AXMLPrinter(open(axml_file, 'rb').read()).get_xml_obj() + app_name_hex = axml.findall(".//application")[0].get(NS_ANDROID + "label") + appnamehex = '0x' + app_name_hex[1:] - app_name_label = axml.findall('.//application')[0].get(NS_ANDROID + 'label') - app_name_hex = '0x' + app_name_label[1:] + rsc_file = os.path.join(test_apk, 'resources.arsc') + rsc = ARSCParser(open(rsc_file, 'rb').read()) - app_name = rsc.get_string( - rsc.get_packages_names()[0], - rsc.get_id(rsc.get_packages_names()[0], int(app_name_hex, 0))[1] - ) + app_name = rsc.get_string( + rsc.get_packages_names()[0], + rsc.get_id(rsc.get_packages_names()[0], int(appnamehex, 0))[1] + ) assert app_name == ['app_name', 'Evie']