Skip to content

Commit 95455c8

Browse files
Introduce String type for java when string-refinement is activated
We define what java.lang.String objects are supposed to contains, and we affect this kind of objects for string literals.
1 parent b49d07f commit 95455c8

7 files changed

+148
-15
lines changed

src/java_bytecode/java_bytecode_convert_class.cpp

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@ class java_bytecode_convert_classt:public messaget
2727
symbol_tablet &_symbol_table,
2828
message_handlert &_message_handler,
2929
bool _disable_runtime_checks,
30-
size_t _max_array_length):
30+
size_t _max_array_length,
31+
bool _string_refinement_enabled):
3132
messaget(_message_handler),
3233
symbol_table(_symbol_table),
3334
disable_runtime_checks(_disable_runtime_checks),
34-
max_array_length(_max_array_length)
35+
max_array_length(_max_array_length),
36+
string_refinement_enabled(_string_refinement_enabled)
3537
{
3638
}
3739

@@ -41,6 +43,9 @@ class java_bytecode_convert_classt:public messaget
4143

4244
if(parse_tree.loading_successful)
4345
convert(parse_tree.parsed_class);
46+
else if(string_refinement_enabled &&
47+
parse_tree.parsed_class.name=="java.lang.String")
48+
add_string_type();
4449
else
4550
generate_class_stub(parse_tree.parsed_class.name);
4651
}
@@ -52,13 +57,15 @@ class java_bytecode_convert_classt:public messaget
5257
symbol_tablet &symbol_table;
5358
const bool disable_runtime_checks;
5459
const size_t max_array_length;
60+
bool string_refinement_enabled;
5561

5662
// conversion
5763
void convert(const classt &c);
5864
void convert(symbolt &class_symbol, const fieldt &f);
5965

6066
void generate_class_stub(const irep_idt &class_name);
6167
void add_array_types();
68+
void add_string_type();
6269
};
6370

6471
/*******************************************************************\
@@ -322,13 +329,15 @@ bool java_bytecode_convert_class(
322329
symbol_tablet &symbol_table,
323330
message_handlert &message_handler,
324331
bool disable_runtime_checks,
325-
size_t max_array_length)
332+
size_t max_array_length,
333+
bool string_refinement_enabled)
326334
{
327335
java_bytecode_convert_classt java_bytecode_convert_class(
328336
symbol_table,
329337
message_handler,
330338
disable_runtime_checks,
331-
max_array_length);
339+
max_array_length,
340+
string_refinement_enabled);
332341

333342
try
334343
{
@@ -352,3 +361,63 @@ bool java_bytecode_convert_class(
352361

353362
return true;
354363
}
364+
365+
/*******************************************************************\
366+
367+
Function: java_bytecode_convert_classt::add_string_type
368+
369+
Purpose: Implements the java.lang.String type in the case that
370+
we provide an internal implementation.
371+
372+
\*******************************************************************/
373+
374+
void java_bytecode_convert_classt::add_string_type()
375+
{
376+
class_typet string_type;
377+
string_type.set_tag("java.lang.String");
378+
string_type.components().resize(3);
379+
string_type.components()[0].set_name("@java.lang.Object");
380+
string_type.components()[0].type()=symbol_typet("java::java.lang.Object");
381+
string_type.components()[1].set_name("length");
382+
string_type.components()[1].set_pretty_name("length");
383+
string_type.components()[1].type()=java_int_type();
384+
string_type.components()[2].set_name("data");
385+
string_type.components()[2].set_pretty_name("data");
386+
// Use a pointer-to-unbounded-array instead of a pointer-to-char.
387+
// Saves some casting in the string refinement algorithm but may
388+
// be unnecessary.
389+
string_type.components()[2].type()=pointer_typet(
390+
array_typet(java_char_type(), infinity_exprt(java_int_type())));
391+
string_type.add_base(symbol_typet("java::java.lang.Object"));
392+
393+
symbolt string_symbol;
394+
string_symbol.name="java::java.lang.String";
395+
string_symbol.base_name="java.lang.String";
396+
string_symbol.type=string_type;
397+
string_symbol.is_type=true;
398+
399+
symbol_table.add(string_symbol);
400+
401+
// Also add a stub of `String.equals` so that remove-virtual-functions
402+
// generates a check for Object.equals vs. String.equals.
403+
// No need to fill it in, as pass_preprocess will replace the calls again.
404+
symbolt string_equals_symbol;
405+
string_equals_symbol.name=
406+
"java::java.lang.String.equals:(Ljava/lang/Object;)Z";
407+
string_equals_symbol.base_name="java.lang.String.equals";
408+
string_equals_symbol.pretty_name="java.lang.String.equals";
409+
string_equals_symbol.mode=ID_java;
410+
411+
code_typet string_equals_type;
412+
string_equals_type.return_type()=java_boolean_type();
413+
code_typet::parametert thisparam;
414+
thisparam.set_this();
415+
thisparam.type()=pointer_typet(symbol_typet(string_symbol.name));
416+
code_typet::parametert otherparam;
417+
otherparam.type()=pointer_typet(symbol_typet("java::java.lang.Object"));
418+
string_equals_type.parameters().push_back(thisparam);
419+
string_equals_type.parameters().push_back(otherparam);
420+
string_equals_symbol.type=std::move(string_equals_type);
421+
422+
symbol_table.add(string_equals_symbol);
423+
}

src/java_bytecode/java_bytecode_convert_class.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ bool java_bytecode_convert_class(
1919
symbol_tablet &symbol_table,
2020
message_handlert &message_handler,
2121
bool disable_runtime_checks,
22-
size_t max_array_length);
22+
size_t max_array_length,
23+
bool string_refinement_enabled);
2324

2425
#endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_CLASS_H

src/java_bytecode/java_bytecode_language.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd)
4040
{
4141
disable_runtime_checks=cmd.isset("disable-runtime-check");
4242
assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null");
43+
string_refinement_enabled=cmd.isset("string-refine");
4344
if(cmd.isset("java-max-input-array-length"))
4445
max_nondet_array_length=
4546
std::stoi(cmd.get_value("java-max-input-array-length"));
@@ -200,13 +201,14 @@ bool java_bytecode_languaget::typecheck(
200201
symbol_table,
201202
get_message_handler(),
202203
disable_runtime_checks,
203-
max_user_array_length))
204+
max_user_array_length,
205+
string_refinement_enabled))
204206
return true;
205207
}
206208

207209
// now typecheck all
208210
if(java_bytecode_typecheck(
209-
symbol_table, get_message_handler()))
211+
symbol_table, get_message_handler(), string_refinement_enabled))
210212
return true;
211213

212214
return false;

src/java_bytecode/java_bytecode_language.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ class java_bytecode_languaget:public languaget
8282
// - array size for newarray
8383
size_t max_nondet_array_length; // maximal length for non-det array creation
8484
size_t max_user_array_length; // max size for user code created arrays
85+
bool string_refinement_enabled;
8586
};
8687

8788
languaget *new_java_bytecode_language();

src/java_bytecode/java_bytecode_typecheck.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,11 @@ Function: java_bytecode_typecheck
126126

127127
bool java_bytecode_typecheck(
128128
symbol_tablet &symbol_table,
129-
message_handlert &message_handler)
129+
message_handlert &message_handler,
130+
bool string_refinement_enabled)
130131
{
131132
java_bytecode_typecheckt java_bytecode_typecheck(
132-
symbol_table, message_handler);
133+
symbol_table, message_handler, string_refinement_enabled);
133134
return java_bytecode_typecheck.typecheck_main();
134135
}
135136

src/java_bytecode/java_bytecode_typecheck.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ Author: Daniel Kroening, kroening@kroening.com
2121

2222
bool java_bytecode_typecheck(
2323
symbol_tablet &symbol_table,
24-
message_handlert &message_handler);
24+
message_handlert &message_handler,
25+
bool string_refinement_enabled);
2526

2627
bool java_bytecode_typecheck(
2728
exprt &expr,
@@ -33,10 +34,12 @@ class java_bytecode_typecheckt:public typecheckt
3334
public:
3435
java_bytecode_typecheckt(
3536
symbol_tablet &_symbol_table,
36-
message_handlert &_message_handler):
37+
message_handlert &_message_handler,
38+
bool _string_refinement_enabled):
3739
typecheckt(_message_handler),
3840
symbol_table(_symbol_table),
39-
ns(symbol_table)
41+
ns(symbol_table),
42+
string_refinement_enabled(_string_refinement_enabled)
4043
{
4144
}
4245

@@ -48,6 +51,7 @@ class java_bytecode_typecheckt:public typecheckt
4851
protected:
4952
symbol_tablet &symbol_table;
5053
const namespacet ns;
54+
bool string_refinement_enabled;
5155

5256
void typecheck_type_symbol(symbolt &);
5357
void typecheck_non_type_symbol(symbolt &);

src/java_bytecode/java_bytecode_typecheck_expr.cpp

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@ Author: Daniel Kroening, kroening@kroening.com
88

99
#include <util/std_expr.h>
1010
#include <util/prefix.h>
11+
#include <util/arith_tools.h>
12+
#include <util/unicode.h>
1113

1214
#include "java_bytecode_typecheck.h"
1315
#include "java_pointer_casts.h"
16+
#include "java_types.h"
1417

1518
/*******************************************************************\
1619
@@ -98,6 +101,15 @@ static void escape_non_alnum(std::string &toescape)
98101
ch='_';
99102
}
100103

104+
static array_exprt utf16_to_array(const std::wstring& in)
105+
{
106+
const auto jchar=java_char_type();
107+
array_exprt ret(array_typet(jchar, infinity_exprt(java_int_type())));
108+
for(const auto c : in)
109+
ret.copy_to_operands(from_integer(c, jchar));
110+
return ret;
111+
}
112+
101113
/*******************************************************************\
102114
103115
Function: java_bytecode_typecheckt::typecheck_expr_java_string_literal
@@ -118,7 +130,7 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr)
118130
auto findit=string_literal_to_symbol_name.find(value);
119131
if(findit!=string_literal_to_symbol_name.end())
120132
{
121-
expr=symbol_exprt(findit->second, pointer_typet(string_type));
133+
expr=address_of_exprt(symbol_exprt(findit->second, string_type));
122134
return;
123135
}
124136

@@ -138,21 +150,64 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr)
138150

139151
symbolt new_symbol;
140152
new_symbol.name=identifier_id;
141-
new_symbol.type=pointer_typet(string_type);
153+
new_symbol.type=string_type;
142154
new_symbol.base_name="Literal";
143155
new_symbol.pretty_name=value;
144156
new_symbol.mode=ID_java;
145157
new_symbol.is_type=false;
146158
new_symbol.is_lvalue=true;
147159
new_symbol.is_static_lifetime=true; // These are basically const global data.
148160

161+
if(string_refinement_enabled)
162+
{
163+
// Initialise the string with a constant utf-16 array:
164+
symbolt array_symbol;
165+
array_symbol.name=identifier_str.str()+"_constarray";
166+
array_symbol.type=array_typet(
167+
java_char_type(), infinity_exprt(java_int_type()));
168+
array_symbol.base_name="Literal_constarray";
169+
array_symbol.pretty_name=value;
170+
array_symbol.mode=ID_java;
171+
array_symbol.is_type=false;
172+
array_symbol.is_lvalue=true;
173+
// These are basically const global data:
174+
array_symbol.is_static_lifetime=true;
175+
array_symbol.is_state_var=true;
176+
auto literal_array=utf16_to_array(
177+
utf8_to_utf16_little_endian(id2string(value)));
178+
array_symbol.value=literal_array;
179+
180+
if(symbol_table.add(array_symbol))
181+
throw "failed to add constarray symbol to symtab";
182+
183+
symbol_typet jlo_symbol("java::java.lang.Object");
184+
const auto& jlo_struct=to_struct_type(ns.follow(jlo_symbol));
185+
const auto& jls_struct=to_struct_type(ns.follow(string_type));
186+
187+
struct_exprt literal_init(new_symbol.type);
188+
struct_exprt jlo_init(jlo_symbol);
189+
jlo_init.copy_to_operands(
190+
constant_exprt("java::java.lang.String",
191+
jlo_struct.components()[0].type()));
192+
jlo_init.copy_to_operands(
193+
from_integer(0, jlo_struct.components()[1].type()));
194+
literal_init.move_to_operands(jlo_init);
195+
literal_init.copy_to_operands(
196+
from_integer(literal_array.operands().size(),
197+
jls_struct.components()[1].type()));
198+
literal_init.copy_to_operands(
199+
address_of_exprt(array_symbol.symbol_expr()));
200+
201+
new_symbol.value=literal_init;
202+
}
203+
149204
if(symbol_table.add(new_symbol))
150205
{
151206
error() << "failed to add string literal symbol to symbol table" << eom;
152207
throw 0;
153208
}
154209

155-
expr=new_symbol.symbol_expr();
210+
expr=address_of_exprt(new_symbol.symbol_expr());
156211
}
157212

158213
/*******************************************************************\

0 commit comments

Comments
 (0)