Description
Hi Anthony / Christopher -
I've narrowed down at least one of the failures I'm having re: LOB to the NCLOB datatype, and it seems to be independent of whether or not I'm using setinputsizes and whether or not I use an outputtype handler. What's most disturbing is that the failure is non-deterministic, failing only sometimes for large strings of text that have many random occurrences of characters in random orders.
This is using NLS_LANG=AMERICAN_AMERICA.AL32UTF8 so we would normally assume the full range of Unicode codepoints should round trip accurately.
The character in question is this: 🐍 , the Python snake (on some GUI elements it looks more like a duck, though if I look closely, it's still a snake).
Oracle server 18c, client version info:
>>> import sys
>>> import platform
>>>
>>> print("platform.platform:", platform.platform())
platform.platform: Linux-5.14.9-200.fc34.x86_64-x86_64-with-glibc2.33
>>> print("sys.maxsize > 2**32:", sys.maxsize > 2**32)
sys.maxsize > 2**32: True
>>> print("platform.python_version:", platform.python_version())
platform.python_version: 3.10.0
>>>
>>> import cx_Oracle
>>> print("cx_Oracle.version:", cx_Oracle.version)
cx_Oracle.version: 8.3.0
>>> print("cx_Oracle.clientversion:", cx_Oracle.clientversion())
cx_Oracle.clientversion: (19, 3, 0, 0, 0)
The test case below inserts and returns a row that contains the character embedded in a randomized field of characters. the larger the field of characters, the more chance that it fails. See sample run at the end.
import cx_Oracle
import random
conn = cx_Oracle.connect(user="scott", password="tiger", dsn="oracle18c")
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type in (
cx_Oracle.CLOB,
cx_Oracle.NCLOB,
):
return cursor.var(cx_Oracle.LONG_STRING, size, cursor.arraysize)
# fetching LOB without the output type handler also doesn't help
conn.outputtypehandler = output_type_handler
def setup():
cursor = conn.cursor()
try:
cursor.execute("drop table long_text")
except:
pass
cursor.execute(
"""
CREATE TABLE long_text (
x INTEGER,
y NCLOB,
z INTEGER
)
"""
)
cursor.close()
def run_test(datasize):
cursor = conn.cursor()
# the third character is the failure character
word_seed = u"ab🐍’«cdefg"
data = u" ".join(
"".join(random.choice(word_seed) for j in range(150))
for i in range(datasize)
)
# print(f"test data has {len(data.split('🐍'))} of the problematic character 🐍")
# this actually doesn't help
# cursor.setinputsizes(
# **{"x": cx_Oracle.NUMBER, "y": cx_Oracle.NCLOB, "z": cx_Oracle.NUMBER}
# )
cursor.execute(
"INSERT INTO long_text (x, y, z) VALUES (:x, :y, :z)",
{"x": 5, "y": data, "z": 10},
)
cursor.execute(
"SELECT long_text.x, long_text.y, long_text.z FROM long_text"
)
row = cursor.fetchone()
try:
assert row[1] == data
finally:
cursor.close()
conn.rollback()
def harness(datasize, num_runs):
for i in range(num_runs):
try:
run_test(datasize)
except AssertionError as err:
print(f"datasize: {datasize} test run {i} failed")
else:
print(f"datasize: {datasize} test run {i} succeeded")
setup()
for datasize in (10, 25, 50, 100, 250):
print(f"\ndatasize: {datasize}")
harness(datasize, 10)
sample run
$ python test3.py
datasize: 10
datasize: 10 test run 0 succeeded
datasize: 10 test run 1 succeeded
datasize: 10 test run 2 succeeded
datasize: 10 test run 3 succeeded
datasize: 10 test run 4 succeeded
datasize: 10 test run 5 succeeded
datasize: 10 test run 6 succeeded
datasize: 10 test run 7 succeeded
datasize: 10 test run 8 succeeded
datasize: 10 test run 9 succeeded
datasize: 25
datasize: 25 test run 0 succeeded
datasize: 25 test run 1 succeeded
datasize: 25 test run 2 succeeded
datasize: 25 test run 3 succeeded
datasize: 25 test run 4 succeeded
datasize: 25 test run 5 succeeded
datasize: 25 test run 6 succeeded
datasize: 25 test run 7 succeeded
datasize: 25 test run 8 succeeded
datasize: 25 test run 9 succeeded
datasize: 50
datasize: 50 test run 0 succeeded
datasize: 50 test run 1 succeeded
datasize: 50 test run 2 succeeded
datasize: 50 test run 3 succeeded
datasize: 50 test run 4 succeeded
datasize: 50 test run 5 succeeded
datasize: 50 test run 6 succeeded
datasize: 50 test run 7 succeeded
datasize: 50 test run 8 succeeded
datasize: 50 test run 9 succeeded
datasize: 100
datasize: 100 test run 0 failed
datasize: 100 test run 1 succeeded
datasize: 100 test run 2 succeeded
datasize: 100 test run 3 failed
datasize: 100 test run 4 succeeded
datasize: 100 test run 5 failed
datasize: 100 test run 6 succeeded
datasize: 100 test run 7 failed
datasize: 100 test run 8 failed
datasize: 100 test run 9 succeeded
datasize: 250
datasize: 250 test run 0 succeeded
datasize: 250 test run 1 failed
datasize: 250 test run 2 failed
datasize: 250 test run 3 failed
datasize: 250 test run 4 failed
datasize: 250 test run 5 failed
datasize: 250 test run 6 failed
datasize: 250 test run 7 failed
datasize: 250 test run 8 failed
datasize: 250 test run 9 failed