一组高性能解析工具

这些详情尚未通过PyPI验证

项目链接

项目描述

parserutils

这是一个工具函数库，旨在使开发者的生活更加轻松。

此库中的函数旨在既高效又符合Python风格，并且与Python 3.6至3.9兼容。它们都有文档，并由单元测试全面覆盖，以详细描述和证明其行为。

总的来说，我的观点是，实用函数应该是快速的，能够处理边缘情况，这样调用者就不需要采取所有类型的预防措施或对结果进行类型检查。因此，在此库中，如果None会破坏一个函数，它将直接返回；如果没有值要做，则返回结果而不进行处理；否则，值要么成功处理，要么返回标准异常。

但这只是一个起点。我欢迎反馈和对额外功能的需求。

安装

使用 pip install parserutils 进行安装。

用法

以下是您可以使用 dict 对象和其他集合执行的操作。

from parserutils import collections

collections.accumulate_items([('key', 'val1'), ('key', 'val2'), ('key', 'val3')])   # {'key': ['val1', 'val2', 'val3']}
collections.accumulate_items(
    [('key1', 'val1'), ('key2', 'val2'), ('key3', 'val3')], reduce_each=True  # {'key1': 'val1', 'key2': 'val2', 'key3': 'val3'}
)

collections.setdefaults({}, 'a.b')                         # {'a': {'b': None}}
collections.setdefaults({}, ['a.b', 'a.c'])                # {'a': {'b': None, 'c': None}}
collections.setdefaults({}, {'a.b': 'bbb', 'a.c': 'ccc'})  # {'a': {'b': 'bbb', 'c': 'ccc'}}

collections.filter_empty(x for x in (None, [], ['a'], '', {'b'}, 'c'))      # [['a'], {'b'}, 'c']
collections.flatten_items(x for x in ('abc', ['a', 'b', 'c'], ('d', 'e')))  # ['abc', 'a', 'b', 'c', 'd', 'e']

collections.remove_duplicates('abcdefabc')                                   # 'abcdef'
collections.remove_duplicates('abcdefabc', in_reverse=True)                  # 'defabc'
collections.remove_duplicates(['a', 'b', 'c', 'a'])                          # ['a', 'b', 'c']
collections.remove_duplicates(('a', 'b', 'c', 'a'), in_reverse=True)         # ('b', 'c', 'a')
collections.remove_duplicates(x for x in 'abca')                             # ['a', 'b', 'c']
collections.remove_duplicates((x for x in 'abca'), in_reverse=True)          # ['b', 'c', 'a']
collections.remove_duplicates((set(x) for x in 'abca'), is_unhashable=True)  # [{'a'}, {'b'}, {'c'}]

collections.rindex('aba', 'a')               # 2
collections.rindex(['a', 'b', 'a'], 'a')     # 2
collections.rindex(('a', 'b', 'a'), 'a')     # 2
collections.rindex('xyz', 'a')               # ValueError
collections.rindex([x for x in 'xyz'], 'a')  # ValueError

collections.rfind('aba', 'a')                # 2
collections.rfind(['a', 'b', 'a'], 'a')      # 2
collections.rfind(('a', 'b', 'a'), 'a')      # 2
collections.rindex('xyz', 'a')               # -1
collections.rfind([x for x in 'xyz'], 'a')   # -1

collections.reduce_value(['abc'])          # 'abc'
collections.reduce_value(('abc',))         # 'abc'
collections.reduce_value({'abc'})          # 'abc'
collections.reduce_value('abc')            # 'abc'
collections.reduce_value({'a': 'aaa'})     # {'a': 'aaa'}
collections.reduce_value([{'a': 'aaa'}])   # {'a': 'aaa'}
collections.reduce_value(['a', 'b', 'c'])  # ['a', 'b', 'c']

collections.wrap_value(['abc'])           # ['abc']
collections.wrap_value(('abc',))          # ('abc',)
collections.wrap_value('abc')             # ['abc']
collections.wrap_value(x for x in 'abc')  # ['a', 'b', 'c']
collections.wrap_value({'a': 'aaa'})      # [{'a': 'aaa'}]
collections.wrap_value(['a', 'b', 'c'])   # ['a', 'b', 'c']

以下是关于日期和数字的一些信息。

from parserutils import dates
from parserutils import numbers

# Leverages dateutil in general, but also handles milliseconds and provides defaults

dates.parse_dates(None, default='today')  # Today (default behavior)
dates.parse_dates(None, default=None)     # Returns None
dates.parse_dates('nope', default=None)   # Returns None
dates.parse_dates(0)                      # 1970
dates.parse_dates('<date_format>')        # Behaves as described in dateutil library

# Reliably handles all the usual cases

numbers.is_number(0)                    # Integer: True
numbers.is_number(1.1)                  # Float: True
numbers.is_number('2.2')                # String: True
numbers.is_number(False)                # Boolean: False by default
numbers.is_number(False, if_bool=True)  # Boolean: True if you need it to
numbers.is_number(float('inf'))         # Infinite: False
numbers.is_number(float('nan'))         # NaN: False

以下是关于字符串和URL解析辅助工具的一些信息。

from parserutils import strings
from parserutils import urls

# These string conversions are written to be fast and reliable

strings.camel_to_constant('toConstant')        # TO_CONSTANT
strings.camel_to_constant('XMLConstant')       # XML_CONSTANT
strings.camel_to_constant('withNumbers1And2')  # WITH_NUMBERS1_AND2

strings.camel_to_snake('toSnake')              # to_snake
strings.camel_to_snake('withXMLAbbreviation')  # with_xml_abbreviation
strings.camel_to_snake('withNumbers3And4')     # with_numbers3_and4

strings.snake_to_camel('from_snake')              # fromSnake
strings.snake_to_camel('_leading_and_trailing_')  # leadingAndTrailing
strings.snake_to_camel('extra___underscores')     # extraUnderscores

strings.find_all('ab??ca??bc??', '??')                         # [2, 6, 10]
strings.find_all('ab??ca??bc??', '??', reverse=True)           # [10, 6, 2]
strings.find_all('ab??ca??bc??', '??', limit=2, reverse=True)  # [10, 6]
strings.find_all('ab??ca??bc??', '??', start=4)                # [6, 10]
strings.find_all('ab??ca??bc??', '??', end=8)                  # [2, 6]
strings.find_all('ab??ca??bc??', '??', start=4, end=8)         # [6]

strings.splitany('ab:ca:bc', ',')           # Same as 'ab:ca:bc'.split(':')
strings.splitany('ab:ca:bc', ',', 1)        # Same as 'ab:ca:bc'.split(':', 1)
strings.splitany('ab|ca:bc', '|:')          # ['ab', 'ca', 'bc']
strings.splitany('ab|ca:bc', ':|', 1)       # ['ab', 'ca:bc']
strings.splitany('0<=3<5', ['<', '<='])     # ['0', '3', '5']
strings.splitany('0<=3<5', ['<', '<='], 1)  # ['0', '3<5']

strings.to_ascii_equivalent('smart quotes, etc.')  # Replaces with ascii quotes, etc.

# URL manipulation leverages urllib, but spares you the extra code

urls.get_base_url('http://www.params.com?a=aaa')                  # 'http://www.params.com'
urls.get_base_url('http://www.path.com/test')                     # 'http://www.path.com'
urls.get_base_url('http://www.path.com/test', include_path=True)  # 'http://www.path.com/test'
urls.get_base_url('http://www.params.com/test?a=aaa', True)       # 'http://www.params.com/test'

urls.update_url_params('http://www.params.com?a=aaa', a='aaa')  # 'http://www.params.com?a=aaa'
urls.update_url_params('http://www.params.com?a=aaa', a='xxx')  # 'http://www.params.com?a=xxx'
urls.update_url_params('http://www.params.com', b='bbb')        # 'http://www.params.com?b=bbb'
urls.update_url_params('http://www.params.com', c=['c', 'cc'])  # 'http://www.params.com?c=c&c=cc'

# Helpers to parse urls to and from parts: parses path as list and params as dict
urls.url_to_parts('http://www.params.com/test/path?a=aaa')      # SplitResult(..., path=['test', 'path'], query={'a': 'aaa'})
urls.parts_to_url(
    {'netloc': 'www.params.com', 'query': {'a': 'aaa'}          # 'http://www.params.com?a=aaa'
)
urls.parts_to_url(
    urls.url_to_parts('http://www.params.com/test/path?a=aaa')  # 'http://www.params.com/test/path?a=aaa'
)

最后，也支持XML解析，使用cElementTree和defusedxml库以提高性能和安全性

from parserutils import elements

# First convert an XML string to an Element object
xml_string = '<root><parent><child>one</child><child>two</child><uglyChild>yuck</uglyChild></parent></root>'
xml_element = elements.get_element(xml_string)


# Update the XML string and print it back out
elements.set_element_text(xml_element, 'parent/child', 'child text')
elements.set_element_attributes(xml_element, childHas='child attribute')
elements.remove_element(xml_element, 'parent/uglyChild')
elements.element_to_string(xml_element)


# Conversion from string to Element, to dict, and then back to string
converted = elements.element_to_dict(xml_string, recurse=True)
reverted = elements.dict_to_element(converted)
reverted = elements.get_element(converted)
xml_string == elements.element_to_string(converted)


# Conversion to flattened dict object
root, obj = elements.element_to_object(converted)
obj == {'root': {'parent': {'child': ['one', 'two'], 'uglyChild': 'yuck'}}}


# Read in an XML file and write it elsewhere
with open('/path/to/file.xml', 'wb') as xml:
    xml_from_file = elements.get_element(xml)
    elements.write_element(xml_from_file, '/path/to/updated/file.xml')


# Write a local file from a remote location (via URL)
xml_from_web = elements.get_remote_element('http://en.wikipedia.org/wiki/XML')
elements.write_element(xml_from_web, '/path/to/new/file.xml')


# Read content at a local file path to a string
xml_from_path = elements.get_remote_element('/path/to/file.xml')
elements.element_to_string(xml_from_path)

parserutils-2.0.1.tar.gz 的哈希值

parserutils-2.0.1.tar.gz 的哈希值
算法	哈希摘要
SHA256	`f927e69779d81db508db98e6e8ec331f90ff31ff2868161d1cce30fffe92bec3`
MD5	`322f05d3e4bfe5d6d9fcf38e06dcb913`
BLAKE2b-256	`131c4c0208d7d5b959bf14f831b63b2d1a582e74df9a02f092792d8987c131cf`

parserutils-2.0.1-py3-none-any.whl 的哈希值

parserutils-2.0.1-py3-none-any.whl 的哈希值
算法	哈希摘要
SHA256	`19fd3086fd360b3b53322400a236baebcd8109f0483f7c6396f4855e5d3515b1`
MD5	`67529c6caacf92ae7ed14186ab54a4f9`
BLAKE2b-256	`77f15affd125ccf476926c985441fce27d8a6c3cf18eac071729b8a0dd73c5a0`

parserutils 2.0.1

导航

验证详情

维护者

未验证详情

项目链接

元数据

分类器

项目描述

parserutils

安装

用法

项目详情

验证详情

维护者

未验证详情

项目链接

元数据

分类器

发行历史发布通知 | RSS 源

下载文件

源代码分发

构建分发

parserutils 2.0.1

导航

验证详情

维护者

未验证详情

项目链接

元数据

分类器

项目描述

parserutils

安装

用法

项目详情

验证详情

维护者

未验证详情

项目链接

元数据

分类器

发行历史 发布通知 | RSS 源

下载文件

源代码分发

构建分发

发行历史发布通知 | RSS 源