Skip to content

Use Charset throughout #26

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 29, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main/java/com/microsoft/sqlserver/jdbc/DDC.java
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ static final Object convertBytesToObject(byte[] bytesValue, JDBCType jdbcType, T
*/
static final Object convertStringToObject(
String stringVal,
String charset,
Charset charset,
JDBCType jdbcType,
StreamType streamType) throws UnsupportedEncodingException, IllegalArgumentException
{
Expand Down
28 changes: 10 additions & 18 deletions src/main/java/com/microsoft/sqlserver/jdbc/IOBuffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.*;
import java.nio.*;
import java.nio.channels.*;
import java.nio.charset.Charset;
import java.net.*;
import java.math.*;
import java.util.concurrent.*;
Expand Down Expand Up @@ -3946,7 +3947,7 @@ void writeNonUnicodeReader(
Reader reader,
long advertisedLength,
boolean isDestBinary,
String charSet) throws SQLServerException
Charset charSet) throws SQLServerException
{
assert DataTypes.UNKNOWN_STREAM_LENGTH == advertisedLength || advertisedLength >= 0;

Expand Down Expand Up @@ -3997,25 +3998,16 @@ void writeNonUnicodeReader(

for (int charsCopied = 0; charsCopied < charsToWrite; ++charsCopied)
{
try
if(null == charSet)
{
if(null == charSet)
{
streamByteBuffer[charsCopied] = (byte)(streamCharBuffer[charsCopied] & 0xFF);
}
else
{
// encoding as per collation
streamByteBuffer[charsCopied] = new String(streamCharBuffer[charsCopied] +
"")
.getBytes(charSet)[0];
}
streamByteBuffer[charsCopied] = (byte)(streamCharBuffer[charsCopied] & 0xFF);
}
catch (UnsupportedEncodingException e)
else
{
throw new SQLServerException(
SQLServerException.getErrString("R_encodingErrorWritingTDS"),
e);
// encoding as per collation
streamByteBuffer[charsCopied] = new String(streamCharBuffer[charsCopied] +
"")
.getBytes(charSet)[0];
}
}
writeBytes(streamByteBuffer, 0, charsToWrite);
Expand Down Expand Up @@ -7257,7 +7249,7 @@ final Object readGUID(int valueLength, JDBCType jdbcType, StreamType streamType)

try
{
return DDC.convertStringToObject(sb.toString(), Encoding.UNICODE.charsetName(), jdbcType, streamType);
return DDC.convertStringToObject(sb.toString(), Encoding.UNICODE.charset(), jdbcType, streamType);
}
catch (UnsupportedEncodingException e)
{
Expand Down
18 changes: 3 additions & 15 deletions src/main/java/com/microsoft/sqlserver/jdbc/ReaderInputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,26 +66,14 @@ class ReaderInputStream extends InputStream
private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0);
private ByteBuffer encodedChars = EMPTY_BUFFER;

ReaderInputStream(Reader reader, String charsetName, long readerLength) throws UnsupportedEncodingException
ReaderInputStream(Reader reader, Charset charset, long readerLength)
{
assert reader != null;
assert charsetName != null;
assert charset != null;
assert DataTypes.UNKNOWN_STREAM_LENGTH == readerLength || readerLength >= 0;

this.reader = reader;
try
{
this.charset = Charset.forName(charsetName);
}
catch (IllegalCharsetNameException e)
{
throw new UnsupportedEncodingException(e.getMessage());
}
catch (UnsupportedCharsetException e)
{
throw new UnsupportedEncodingException(e.getMessage());
}

this.charset = charset;
this.readerLength = readerLength;
}

Expand Down
30 changes: 22 additions & 8 deletions src/main/java/com/microsoft/sqlserver/jdbc/SQLCollation.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package com.microsoft.sqlserver.jdbc;

import java.io.*;
import java.nio.charset.Charset;
import java.util.*;
import java.text.MessageFormat;

Expand Down Expand Up @@ -49,7 +50,7 @@ final class SQLCollation implements java.io.Serializable
private final Encoding encoding;

// Utility methods for getting details of this collation's encoding
final String getCharset() { return encoding.charsetName(); }
final Charset getCharset() throws SQLServerException { return encoding.charset(); }
final boolean supportsAsciiConversion() { return encoding.supportsAsciiConversion(); }
final boolean hasAsciiCompatibleSBCS() { return encoding.hasAsciiCompatibleSBCS(); }

Expand Down Expand Up @@ -541,7 +542,7 @@ private Encoding encodingFromSortId() throws UnsupportedEncodingException
/**
* Enumeration of encodings that are supported by SQL Server (and hopefully the JVM).
*
* See, for example, http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
* See, for example, https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html
* for a complete list of supported encodings with their canonical names.
*/
enum Encoding
Expand All @@ -568,6 +569,7 @@ enum Encoding
private final boolean supportsAsciiConversion;
private final boolean hasAsciiCompatibleSBCS;
private boolean jvmSupportConfirmed = false;
private Charset charset;

private Encoding(
String charsetName,
Expand All @@ -585,11 +587,7 @@ final Encoding checkSupported() throws UnsupportedEncodingException
{
// Checks for support by converting a java.lang.String
// This works for all of the code pages above in SE 5 and later.
try
{
" ".getBytes(charsetName);
}
catch (UnsupportedEncodingException e)
if (!Charset.isSupported(charsetName))
{
MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_codePageNotSupported"));
Object[] msgArgs = {charsetName};
Expand All @@ -602,7 +600,23 @@ final Encoding checkSupported() throws UnsupportedEncodingException
return this;
}

final String charsetName() { return charsetName; }
final Charset charset() throws SQLServerException
{
try
{
checkSupported();
if (charset == null)
{
charset = Charset.forName(charsetName);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marschall thank you very much for your great idea and fix. I just have one more question and hope you could consider. Is it possible to delete the local variable 'private Charset charset'? I am not sure why we need to check if the variable is null here. instead of assigning it to charset and returning charset., could we just do "return Charset.forName(charsetName);" ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I considered this. It would be possible but:
'private Charset charset' ist not a local variable but an instance/member variable. Encoding#charset() is called quite frequently and by many users both directly and indirectly. If you look at the implementation of Charset#forName it is quite involved and the worst case is quite bad, eg. some of the charsets are in the Extended Encoding Set. I would be more comfortable with looking them up just once.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marschall thank you for your feedback.
I totally agree to what you said, we definitely need to look up once.

However, what I meant was something like this.:

        try
    	{
    		checkSupported();
    	} 
    	catch (UnsupportedEncodingException e) 
              ...
    	return Charset.forName(charsetName);

the look up is already called within checkSupported (by Charset.isSupported())
I am not sure why we need the member variable, or why we need to check if the variable is null.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need the member variable in order to cache the look up. Otherwise every time somebody calls Encoding#charset() directly or indirectly a Charset lookup is performed. Examples include:

  • TypeInfo.Builder.Strategy#apply(TypeInfo, TDSReader)
  • Util#readUnicodeString(byte[], int, int, SQLServerConnection)
  • SQLServerSQLXML#getString()
  • SQLServerSQLXML#getValue()
  • TDSReader#readGUID(int, JDBCType, StreamType)
  • SQLServerBulkCopy#writeColumnToTdsWriter(TDSWriter, int, int, int, boolean, int, int, boolean, Object)

Without the member variable every time one of this methods is called directly or indirectly a lookup would be performed anew. The null check avoids the look up the second and following times the method is called.
It is for the same reason that jvmSupportConfirmed is cached in a member variable instead of computed every time the method is called.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marschall thanks for the very detailed explanation, now I understand why you have introduced a new member variable here. I wanted the code to be consistent with the current implementation, but your cache implementation is better than the current. I am going to accept the PR once the testing is done. thanks again.

}
} catch (UnsupportedEncodingException e)
{
MessageFormat form = new MessageFormat(SQLServerException.getErrString("R_codePageNotSupported"));
Object[] msgArgs = {charsetName};
throw new SQLServerException(form.format(msgArgs), e);
}
return charset;
}

/**
* Returns true if the collation supports conversion to ascii.
Expand Down
36 changes: 13 additions & 23 deletions src/main/java/com/microsoft/sqlserver/jdbc/SQLServerBulkCopy.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
Expand Down Expand Up @@ -2552,31 +2551,22 @@ else if (null != sourceBulkRecord)
}
else
{
try
tdsWriter.writeShort((short) (colValueStr.length()));
// converting string into destination collation using Charset

SQLCollation destCollation = destColumnMetadata
.get(destColOrdinal).collation;
if (null != destCollation)
{
tdsWriter.writeShort((short) (colValueStr.length()));
// converting string into destination collation using Charset
tdsWriter.writeBytes(colValueStr.getBytes(
destColumnMetadata.get(destColOrdinal).collation
.getCharset()));

SQLCollation destCollation = destColumnMetadata
.get(destColOrdinal).collation;
if (null != destCollation)
{
tdsWriter.writeBytes(colValueStr.getBytes(
destColumnMetadata.get(destColOrdinal).collation
.getCharset()));

}
else
{
tdsWriter.writeBytes(colValueStr.getBytes());
}
}
catch (UnsupportedEncodingException e)
{
throw new SQLServerException(
SQLServerException.getErrString("R_encodingErrorWritingTDS"),
e);
}
else
{
tdsWriter.writeBytes(colValueStr.getBytes());
}
}
}
}
Expand Down
14 changes: 4 additions & 10 deletions src/main/java/com/microsoft/sqlserver/jdbc/SQLServerClob.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import java.util.*;
import java.util.logging.*;

import static java.nio.charset.StandardCharsets.US_ASCII;

/**
* SQLServerClob represents a character LOB object and implements java.sql.Clob.
*/
Expand Down Expand Up @@ -186,16 +188,8 @@ public InputStream getAsciiStream() throws SQLException
if (null != sqlCollation && !sqlCollation.supportsAsciiConversion())
DataTypes.throwConversionError(getDisplayClassName(), "AsciiStream");

InputStream getterStream;
try
{
// Need to use a BufferedInputStream since the stream returned by this method is assumed to support mark/reset
getterStream = new BufferedInputStream(new ReaderInputStream(new StringReader(value), "US-ASCII", value.length()));
}
catch (UnsupportedEncodingException unsupportedEncodingException)
{
throw new SQLServerException(unsupportedEncodingException.getMessage(), null, 0, unsupportedEncodingException);
}
// Need to use a BufferedInputStream since the stream returned by this method is assumed to support mark/reset
InputStream getterStream = new BufferedInputStream(new ReaderInputStream(new StringReader(value), US_ASCII, value.length()));

activeStreams.add(getterStream);
return getterStream;
Expand Down
32 changes: 3 additions & 29 deletions src/main/java/com/microsoft/sqlserver/jdbc/SQLServerSQLXML.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ InputStream getValue() throws SQLServerException
ByteArrayOutputStreamToInputStream strm = new ByteArrayOutputStreamToInputStream();
// Need to beat a stream out of docValue
TransformerFactory factory;
Writer wr=null;
try
{
factory = TransformerFactory.newInstance();
Expand All @@ -126,14 +125,7 @@ InputStream getValue() throws SQLServerException
assert null ==outputStreamValue;
assert null == docValue;
assert null !=strValue;
try
{
o = new ByteArrayInputStream(strValue.getBytes(Encoding.UNICODE.charsetName()));
}
catch (UnsupportedEncodingException ex)
{
throw new SQLServerException(null, ex.getMessage(), null, 0, true);
}
o = new ByteArrayInputStream(strValue.getBytes(Encoding.UNICODE.charset()));
}
assert null != o;
isFreed = true; // we have consumed the data
Expand Down Expand Up @@ -252,16 +244,7 @@ public java.io.Writer setCharacterStream() throws SQLException
checkWriteXML();
isUsed = true;
outputStreamValue = new ByteArrayOutputStreamToInputStream();
java.io.Writer wrt=null;
try
{
wrt = new OutputStreamWriter(outputStreamValue, Encoding.UNICODE.charsetName());
}
catch (UnsupportedEncodingException ex)
{
throw new SQLServerException(null, ex.getMessage(), null, 0, true);
}
return wrt;
return new OutputStreamWriter(outputStreamValue, Encoding.UNICODE.charset());
}
public Reader getCharacterStream() throws SQLException
{
Expand Down Expand Up @@ -309,16 +292,7 @@ public String getString() throws SQLException
}

byte byteContents[] = contents.getBytes();
String ret = null;
try
{
ret = new String(byteContents,0, byteContents.length, Encoding.UNICODE.charsetName() );
}
catch (UnsupportedEncodingException ex)
{
throw new SQLServerException(null, ex.getMessage(), null, 0, true);
}
return ret;
return new String(byteContents,0, byteContents.length, Encoding.UNICODE.charset() );
}
public void setString(String value) throws SQLException
{
Expand Down
10 changes: 1 addition & 9 deletions src/main/java/com/microsoft/sqlserver/jdbc/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -619,15 +619,7 @@ static String readUnicodeString(byte [] b, int offset, int byteLength, SQLServer
{
try
{
return new String(b, offset, byteLength, Encoding.UNICODE.charsetName());
}
catch (UnsupportedEncodingException ex)
{
String txtMsg = SQLServerException.checkAndAppendClientConnId(SQLServerException.getErrString("R_stringReadError"), conn);
MessageFormat form = new MessageFormat(txtMsg);
Object[] msgArgs = {new Integer(offset)};
// Re-throw SQLServerException if conversion fails.
throw new SQLServerException(null, form.format(msgArgs), null, 0, true);
return new String(b, offset, byteLength, Encoding.UNICODE.charset());
}
catch (IndexOutOfBoundsException ex)
{
Expand Down
Loading