You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

255 lines
4.9 KiB

---
test case: empty text nothing happens
# empty text
in:
encoding: ''
buffer_length: 0
buffer: buffer
- ''
out:
expected_result_buffer_length: 0
expected_result_buffer: expected_result_buffer ''
return: SUCCEED
---
test case: bom is not added to plain data utf8 assumed
in:
encoding: ''
buffer_length: 6
buffer: &buffer
- '\x11\x11\x11\x11\x11\x11'
out:
expected_result_buffer_length: 6
expected_result_buffer: *buffer
return: SUCCEED
---
test case: utf8 bom is removed
in:
encoding: ''
buffer_length: 9
buffer:
- '\xEF\xBB\xBF\x11\x11\x11\x11\x11\x11'
out:
expected_result_buffer_length: 6
expected_result_buffer:
- '\x11\x11\x11\x11\x11\x11'
return: SUCCEED
---
test case: utf8 bom is removed2
in:
encoding: ''
buffer_length: 9
buffer: &buffer
- '\xEF\xBB\xBF\x11\x11\x11\x11\x11\x11'
out:
expected_result_buffer_length: 9
expected_result_buffer: *buffer
return: FAIL
---
test case: utf8 bom is removed with encoding specified
in:
encoding: 'UTF-8'
buffer_length: 9
buffer:
- '\xEF\xBB\xBF\x11\x11\x11\x11\x11\x11'
out:
expected_result_buffer_length: 6
expected_result_buffer:
- '\x11\x11\x11\x11\x11\x11'
return: SUCCEED
---
test case: utf8 bom is removed empty text
in:
encoding: ''
buffer_length: 3
buffer:
- '\xEF\xBB\xBF'
out:
expected_result_buffer_length: 0
expected_result_buffer:
- ''
return: SUCCEED
---
test case: utf16 LE bom is removed
#
# utf-16 conversion to utf-8
# echo -n -e '\xff\xfe\x11\x00\x11\x00' > /tmp/bytes
# iconv /tmp/bytes -f UTF-16LE -t UTF-8 -o /tmp/utf8
# xxd /tmp/utf8
# efbb bf11 11
#
in:
encoding: ''
buffer_length: 6
buffer:
- '\xFF\xFE\x11\x00\x11\x00'
out:
expected_result_buffer_length: 2
expected_result_buffer:
- '\x11\x11'
return: SUCCEED
---
test case: utf16 LE noBOM with encoding
#
# no BOM, but we still correctly decode the data as encoding was provided
#
in:
encoding: 'UTF-16LE'
buffer_length: 4
buffer:
- '\x11\x00\x11\x00'
out:
expected_result_buffer_length: 2
expected_result_buffer:
- '\x11\x11'
return: SUCCEED
---
test case: utf16 LE bom is removed empty text
in:
encoding: ''
buffer_length: 2
buffer:
- '\xFF\xFE'
out:
expected_result_buffer_length: 0
expected_result_buffer:
- ''
return: SUCCEED
---
test case: utf16 BE bom is removed
#
# utf-16_BE conversion to utf-8
# echo -n -e '\xfe\xff\x00\x11\x00\x11' > /tmp/bytes
# iconv /tmp/bytes -f UTF-16BE -t UTF-8 -o /tmp/utf8
# xxd /tmp/utf8
# efbb bf11 11
#
in:
encoding: ''
buffer_length: 6
buffer:
- '\xFE\xFF\x00\x11\x00\x11'
out:
expected_result_buffer_length: 2
expected_result_buffer:
- '\x11\x11'
return: SUCCEED
---
test case: utf16 BE noBOM with enconding
in:
encoding: 'UTF-16BE'
buffer_length: 4
buffer:
- '\x00\x11\x00\x11'
out:
expected_result_buffer_length: 2
expected_result_buffer:
- '\x11\x11'
return: SUCCEED
---
test case: utf16 BE bom is removed empty text
in:
encoding: ''
buffer_length: 2
buffer:
- '\xFE\xFF'
out:
expected_result_buffer_length: 0
expected_result_buffer:
- ''
return: SUCCEED
---
test case: utf32 LE bom is removed incorrect guess conversion to utf16
#
# utf32 conversion to utf-8
# utf32 without encoding is guessed as utf16:
# 11 in utf8 is represented in utf32 as \x11\x00\x00\x00
# \xFF\xFE\x00\x00 is utf-32 BOM
#
# So that,
# echo -n -e '\xFF\xFE\x00\x00\x11\x00\x00\x00' > /tmp/bytes
# iconv /tmp/bytes -f UTF-32 -t UTF-8 -o /tmp/utf8
# xxd /tmp/utf8
# 11
#
# However, if encoding is not specified, we can confuse it with utf-16 LE
# which also has \xFF\xFE at the beginning, so the the result is different:
# 00 11 00
#
# which then reads as an empty string
#
in:
encoding: ''
buffer_length: 8
buffer:
- '\xFF\xFE\x00\x00\x11\x00\x00\x00'
out:
expected_result_buffer_length: 0
expected_result_buffer:
- ''
return: SUCCEED
---
test case: invalid encoding supplied 1
in:
encoding: 'BADGER'
buffer_length: 2
buffer:
- '\xFE\xFF'
out:
error: "Failed to convert from encoding BADGER to utf8. Error: [22] Invalid argument."
expected_result_buffer_length: 0
expected_result_buffer:
- ''
return: FAIL
---
test case: invalid encoding supplied 2
in:
encoding: 'UTF'
buffer_length: 2
buffer:
- '\xFE\xFF'
out:
error: "Failed to convert from encoding UTF to utf8. Error: [22] Invalid argument."
expected_result_buffer_length: 0
expected_result_buffer:
- ''
return: FAIL
---
test case: invalid encoding supplied, but iconv cannot detect this
in:
encoding: '🌭'
buffer_length: 2
buffer:
- '\xFE\xFF'
out:
error: ""
expected_result_buffer_length: 0
expected_result_buffer:
- ''
return: SUCCEED
---
test case: invalid encoding supplied, but iconv cannot detect this 2
in:
encoding: 'кирпич'
buffer_length: 2
buffer:
- '\xFE\xFF'
out:
error: ""
expected_result_buffer_length: 0
expected_result_buffer:
- ''
return: SUCCEED
...