跳转到主要内容

一组高性能解析工具

项目描述

parserutils

Build Status Coverage Status

这是一个工具函数库,旨在使开发者的生活更加轻松。

此库中的函数旨在既高效又符合Python风格,并且与Python 3.6至3.9兼容。它们都有文档,并由单元测试全面覆盖,以详细描述和证明其行为。

总的来说,我的观点是,实用函数应该是快速的,能够处理边缘情况,这样调用者就不需要采取所有类型的预防措施或对结果进行类型检查。因此,在此库中,如果None会破坏一个函数,它将直接返回;如果没有值要做,则返回结果而不进行处理;否则,值要么成功处理,要么返回标准异常。

但这只是一个起点。我欢迎反馈和对额外功能的需求。

安装

使用 pip install parserutils 进行安装。

用法

以下是您可以使用 dict 对象和其他集合执行的操作。

from parserutils import collections

collections.accumulate_items([('key', 'val1'), ('key', 'val2'), ('key', 'val3')])   # {'key': ['val1', 'val2', 'val3']}
collections.accumulate_items(
    [('key1', 'val1'), ('key2', 'val2'), ('key3', 'val3')], reduce_each=True  # {'key1': 'val1', 'key2': 'val2', 'key3': 'val3'}
)

collections.setdefaults({}, 'a.b')                         # {'a': {'b': None}}
collections.setdefaults({}, ['a.b', 'a.c'])                # {'a': {'b': None, 'c': None}}
collections.setdefaults({}, {'a.b': 'bbb', 'a.c': 'ccc'})  # {'a': {'b': 'bbb', 'c': 'ccc'}}

collections.filter_empty(x for x in (None, [], ['a'], '', {'b'}, 'c'))      # [['a'], {'b'}, 'c']
collections.flatten_items(x for x in ('abc', ['a', 'b', 'c'], ('d', 'e')))  # ['abc', 'a', 'b', 'c', 'd', 'e']

collections.remove_duplicates('abcdefabc')                                   # 'abcdef'
collections.remove_duplicates('abcdefabc', in_reverse=True)                  # 'defabc'
collections.remove_duplicates(['a', 'b', 'c', 'a'])                          # ['a', 'b', 'c']
collections.remove_duplicates(('a', 'b', 'c', 'a'), in_reverse=True)         # ('b', 'c', 'a')
collections.remove_duplicates(x for x in 'abca')                             # ['a', 'b', 'c']
collections.remove_duplicates((x for x in 'abca'), in_reverse=True)          # ['b', 'c', 'a']
collections.remove_duplicates((set(x) for x in 'abca'), is_unhashable=True)  # [{'a'}, {'b'}, {'c'}]

collections.rindex('aba', 'a')               # 2
collections.rindex(['a', 'b', 'a'], 'a')     # 2
collections.rindex(('a', 'b', 'a'), 'a')     # 2
collections.rindex('xyz', 'a')               # ValueError
collections.rindex([x for x in 'xyz'], 'a')  # ValueError

collections.rfind('aba', 'a')                # 2
collections.rfind(['a', 'b', 'a'], 'a')      # 2
collections.rfind(('a', 'b', 'a'), 'a')      # 2
collections.rindex('xyz', 'a')               # -1
collections.rfind([x for x in 'xyz'], 'a')   # -1

collections.reduce_value(['abc'])          # 'abc'
collections.reduce_value(('abc',))         # 'abc'
collections.reduce_value({'abc'})          # 'abc'
collections.reduce_value('abc')            # 'abc'
collections.reduce_value({'a': 'aaa'})     # {'a': 'aaa'}
collections.reduce_value([{'a': 'aaa'}])   # {'a': 'aaa'}
collections.reduce_value(['a', 'b', 'c'])  # ['a', 'b', 'c']

collections.wrap_value(['abc'])           # ['abc']
collections.wrap_value(('abc',))          # ('abc',)
collections.wrap_value('abc')             # ['abc']
collections.wrap_value(x for x in 'abc')  # ['a', 'b', 'c']
collections.wrap_value({'a': 'aaa'})      # [{'a': 'aaa'}]
collections.wrap_value(['a', 'b', 'c'])   # ['a', 'b', 'c']

以下是关于日期和数字的一些信息。

from parserutils import dates
from parserutils import numbers

# Leverages dateutil in general, but also handles milliseconds and provides defaults

dates.parse_dates(None, default='today')  # Today (default behavior)
dates.parse_dates(None, default=None)     # Returns None
dates.parse_dates('nope', default=None)   # Returns None
dates.parse_dates(0)                      # 1970
dates.parse_dates('<date_format>')        # Behaves as described in dateutil library

# Reliably handles all the usual cases

numbers.is_number(0)                    # Integer: True
numbers.is_number(1.1)                  # Float: True
numbers.is_number('2.2')                # String: True
numbers.is_number(False)                # Boolean: False by default
numbers.is_number(False, if_bool=True)  # Boolean: True if you need it to
numbers.is_number(float('inf'))         # Infinite: False
numbers.is_number(float('nan'))         # NaN: False

以下是关于字符串和URL解析辅助工具的一些信息。

from parserutils import strings
from parserutils import urls

# These string conversions are written to be fast and reliable

strings.camel_to_constant('toConstant')        # TO_CONSTANT
strings.camel_to_constant('XMLConstant')       # XML_CONSTANT
strings.camel_to_constant('withNumbers1And2')  # WITH_NUMBERS1_AND2

strings.camel_to_snake('toSnake')              # to_snake
strings.camel_to_snake('withXMLAbbreviation')  # with_xml_abbreviation
strings.camel_to_snake('withNumbers3And4')     # with_numbers3_and4

strings.snake_to_camel('from_snake')              # fromSnake
strings.snake_to_camel('_leading_and_trailing_')  # leadingAndTrailing
strings.snake_to_camel('extra___underscores')     # extraUnderscores

strings.find_all('ab??ca??bc??', '??')                         # [2, 6, 10]
strings.find_all('ab??ca??bc??', '??', reverse=True)           # [10, 6, 2]
strings.find_all('ab??ca??bc??', '??', limit=2, reverse=True)  # [10, 6]
strings.find_all('ab??ca??bc??', '??', start=4)                # [6, 10]
strings.find_all('ab??ca??bc??', '??', end=8)                  # [2, 6]
strings.find_all('ab??ca??bc??', '??', start=4, end=8)         # [6]

strings.splitany('ab:ca:bc', ',')           # Same as 'ab:ca:bc'.split(':')
strings.splitany('ab:ca:bc', ',', 1)        # Same as 'ab:ca:bc'.split(':', 1)
strings.splitany('ab|ca:bc', '|:')          # ['ab', 'ca', 'bc']
strings.splitany('ab|ca:bc', ':|', 1)       # ['ab', 'ca:bc']
strings.splitany('0<=3<5', ['<', '<='])     # ['0', '3', '5']
strings.splitany('0<=3<5', ['<', '<='], 1)  # ['0', '3<5']

strings.to_ascii_equivalent('smart quotes, etc.')  # Replaces with ascii quotes, etc.

# URL manipulation leverages urllib, but spares you the extra code

urls.get_base_url('http://www.params.com?a=aaa')                  # 'http://www.params.com'
urls.get_base_url('http://www.path.com/test')                     # 'http://www.path.com'
urls.get_base_url('http://www.path.com/test', include_path=True)  # 'http://www.path.com/test'
urls.get_base_url('http://www.params.com/test?a=aaa', True)       # 'http://www.params.com/test'

urls.update_url_params('http://www.params.com?a=aaa', a='aaa')  # 'http://www.params.com?a=aaa'
urls.update_url_params('http://www.params.com?a=aaa', a='xxx')  # 'http://www.params.com?a=xxx'
urls.update_url_params('http://www.params.com', b='bbb')        # 'http://www.params.com?b=bbb'
urls.update_url_params('http://www.params.com', c=['c', 'cc'])  # 'http://www.params.com?c=c&c=cc'

# Helpers to parse urls to and from parts: parses path as list and params as dict
urls.url_to_parts('http://www.params.com/test/path?a=aaa')      # SplitResult(..., path=['test', 'path'], query={'a': 'aaa'})
urls.parts_to_url(
    {'netloc': 'www.params.com', 'query': {'a': 'aaa'}          # 'http://www.params.com?a=aaa'
)
urls.parts_to_url(
    urls.url_to_parts('http://www.params.com/test/path?a=aaa')  # 'http://www.params.com/test/path?a=aaa'
)

最后,也支持XML解析,使用cElementTree和defusedxml库以提高性能和安全性

from parserutils import elements

# First convert an XML string to an Element object
xml_string = '<root><parent><child>one</child><child>two</child><uglyChild>yuck</uglyChild></parent></root>'
xml_element = elements.get_element(xml_string)


# Update the XML string and print it back out
elements.set_element_text(xml_element, 'parent/child', 'child text')
elements.set_element_attributes(xml_element, childHas='child attribute')
elements.remove_element(xml_element, 'parent/uglyChild')
elements.element_to_string(xml_element)


# Conversion from string to Element, to dict, and then back to string
converted = elements.element_to_dict(xml_string, recurse=True)
reverted = elements.dict_to_element(converted)
reverted = elements.get_element(converted)
xml_string == elements.element_to_string(converted)


# Conversion to flattened dict object
root, obj = elements.element_to_object(converted)
obj == {'root': {'parent': {'child': ['one', 'two'], 'uglyChild': 'yuck'}}}


# Read in an XML file and write it elsewhere
with open('/path/to/file.xml', 'wb') as xml:
    xml_from_file = elements.get_element(xml)
    elements.write_element(xml_from_file, '/path/to/updated/file.xml')


# Write a local file from a remote location (via URL)
xml_from_web = elements.get_remote_element('http://en.wikipedia.org/wiki/XML')
elements.write_element(xml_from_web, '/path/to/new/file.xml')


# Read content at a local file path to a string
xml_from_path = elements.get_remote_element('/path/to/file.xml')
elements.element_to_string(xml_from_path)

项目详情


下载文件

下载您平台上的文件。如果您不确定选择哪个,请了解更多关于 安装软件包 的信息。

源代码分发

parserutils-2.0.1.tar.gz (43.0 kB 查看哈希值)

上传时间 源代码

构建分发

parserutils-2.0.1-py3-none-any.whl (43.7 kB 查看哈希值)

上传时间 Python 3

支持

AWSAWS云计算和安全赞助商DatadogDatadog监控FastlyFastlyCDNGoogleGoogle下载分析MicrosoftMicrosoftPSF赞助商PingdomPingdom监控SentrySentry错误日志StatusPageStatusPage状态页