|
1 | 1 | package Query.Engine;
|
2 | 2 |
|
| 3 | +import Utility.DBSchema; |
| 4 | +import Utility.DBUtil; |
| 5 | +import Utility.FileParser; |
| 6 | + |
3 | 7 | import java.sql.Connection;
|
4 |
| -import java.sql.PreparedStatement; |
5 |
| -import java.sql.ResultSet; |
6 |
| -import java.sql.SQLException; |
7 | 8 | import java.util.*;
|
8 | 9 |
|
9 | 10 | /**
|
10 | 11 | * Created by liuche on 5/29/17.
|
| 12 | + * |
| 13 | + * Requirements: |
| 14 | + * Schema of the imported CSV file should be |
| 15 | + * "{node1: n, node1Label: labels(n), relationship: r, rel_type: type(r), node2:m, node2Label: labels(m)}" |
| 16 | + * |
11 | 17 | */
|
12 | 18 | public class QueryIndexer {
|
13 | 19 | Connection conn;
|
14 | 20 |
|
15 |
| - Map<String, Integer> labelRelation = new HashMap<>(); |
16 |
| - Map<String, Integer> labelNodes = new HashMap<>(); |
17 |
| - Map<String, Integer> propertyCountOfNodes = new HashMap<>(); |
18 |
| - Map<String, Integer> nodeLabelIncoming = new HashMap<>(); |
19 |
| - Map<String, Integer> nodeLabelOutgoing = new HashMap<>(); |
20 |
| - Map<String, Map<String, Integer>> nodeRelationInEdgeCount = new HashMap<>(); |
21 |
| - Map<String, Map<String, Integer>> nodeRelationOutEdgeCount = new HashMap<>(); |
22 |
| - |
23 |
| - Integer numberOfNodes = 0, numberOfRelations = 0; |
24 |
| - |
25 |
| - private Integer getIntegerFromSQL(String statement){ |
26 |
| - try { |
27 |
| - PreparedStatement preparedStatement = conn.prepareStatement(statement); |
28 |
| - ResultSet result = preparedStatement.executeQuery(); |
29 |
| - result.next(); |
30 |
| - return result.getInt(1); |
31 |
| - } catch (SQLException e) { |
32 |
| - e.printStackTrace(); |
33 |
| - } |
34 |
| - return -1; |
35 |
| - } |
36 |
| - |
37 |
| - private List<String> getListFromSQL(String statement){ |
38 |
| - List<String> resList = new ArrayList<>(); |
39 |
| - try { |
40 |
| - PreparedStatement preparedStatement = conn.prepareStatement(statement); |
41 |
| - ResultSet result = preparedStatement.executeQuery(); |
42 |
| - while(result.next()){ |
43 |
| - String str = result.getString(1); |
44 |
| - resList.add(str); |
45 |
| - } |
46 |
| - } catch (SQLException e) { |
47 |
| - e.printStackTrace(); |
48 |
| - } |
49 |
| - return resList; |
50 |
| - } |
51 |
| - |
52 |
| - private Map<String, Integer> getMapFromSQL(String statement){ |
53 |
| - Map<String, Integer> resMap = new HashMap<>(); |
54 |
| - try { |
55 |
| - PreparedStatement preparedStatement = conn.prepareStatement(statement); |
56 |
| - ResultSet result = preparedStatement.executeQuery(); |
57 |
| - while(result.next()){ |
58 |
| - String str = result.getString(1); |
59 |
| - Integer integer = result.getInt(2); |
60 |
| - resMap.put(str, integer); |
61 |
| - } |
62 |
| - } catch (SQLException e) { |
63 |
| - e.printStackTrace(); |
64 |
| - } |
65 |
| - return resMap; |
66 |
| - } |
| 21 | + private Map<String, Integer> labelRelation = new HashMap<>(); |
| 22 | + private Map<String, Integer> labelNodes = new HashMap<>(); |
| 23 | + private Map<String, Integer> propertyCountOfNodes = new HashMap<>(); |
| 24 | + private Map<String, Integer> nodeLabelIncoming = new HashMap<>(); |
| 25 | + private Map<String, Integer> nodeLabelOutgoing = new HashMap<>(); |
| 26 | + private Map<String, Map<String, Integer>> nodeRelationInEdgeCount = new HashMap<>(); |
| 27 | + private Map<String, Map<String, Integer>> nodeRelationOutEdgeCount = new HashMap<>(); |
67 | 28 |
|
| 29 | + private Integer numberOfNodes = 0, numberOfRelations = 0; |
| 30 | + private DBUtil dbUtil; |
68 | 31 |
|
69 |
| - public QueryIndexer(Connection conn) { |
| 32 | + public QueryIndexer(Connection conn, DBSchema dbSchema) { |
70 | 33 | this.conn = conn;
|
| 34 | + this.dbUtil = new DBUtil(conn); |
71 | 35 |
|
72 | 36 | // Get number of nodes and relations
|
73 |
| - String statement = "SELECT COUNT(*) FROM person;"; |
74 |
| - this.numberOfNodes = getIntegerFromSQL(statement); |
75 |
| - statement = "SELECT COUNT(*) FROM movie;"; |
76 |
| - this.numberOfNodes += getIntegerFromSQL(statement); |
| 37 | + String statement = "SELECT COUNT(*) FROM ObjectType WHERE type != \"0\";"; |
| 38 | + this.numberOfNodes = dbUtil.getIntegerFromSQL(statement); |
77 | 39 |
|
78 | 40 | statement = "SELECT COUNT(*) FROM Edge;";
|
79 |
| - this.numberOfRelations = getIntegerFromSQL(statement); |
| 41 | + this.numberOfRelations = dbUtil.getIntegerFromSQL(statement); |
80 | 42 |
|
81 | 43 | // Get number of nodes with same label
|
82 | 44 | statement = "select label, COUNT(*) from NodeLabel GROUP BY (label);";
|
83 |
| - labelNodes = getMapFromSQL(statement); |
| 45 | + labelNodes = dbUtil.getMapFromSQL(statement); |
84 | 46 |
|
85 | 47 | // Get number of relations with same label
|
86 | 48 | statement = "SELECT rel_type, COUNT(*) from Edge GROUP BY rel_type;";
|
87 |
| - labelRelation = getMapFromSQL(statement); |
| 49 | + labelRelation = dbUtil.getMapFromSQL(statement); |
88 | 50 |
|
89 |
| - ArrayList<String> node1Fields = new ArrayList<>(Arrays.asList( |
90 |
| - "birthday", "birthplace", "deg", "name", "lastModified", |
91 |
| - "id", "biography", "version", "profileImageUrl" |
92 |
| - )); |
| 51 | + // Get number of distinct values of each property in nodes. |
| 52 | + statement = "SELECT DISTINCT(name) FROM typeProperty WHERE id > 0"; |
| 53 | + List<String> nodeFields = dbUtil.getListFromSQL(statement); |
93 | 54 |
|
94 |
| - for(String field : node1Fields){ |
95 |
| - statement = "SELECT COUNT(distinct " + field + ") FROM Person;"; |
96 |
| - Integer counts = getIntegerFromSQL(statement); |
| 55 | + for(String field : nodeFields){ |
| 56 | + statement = "SELECT COUNT(distinct value) FROM P_" + field + ";"; |
| 57 | + Integer counts = dbUtil.getIntegerFromSQL(statement); |
97 | 58 | Integer prevCount = propertyCountOfNodes.getOrDefault(field, 0);
|
98 | 59 | propertyCountOfNodes.put(field, prevCount + counts);
|
99 | 60 | }
|
100 | 61 |
|
| 62 | + // Get number of edges that comes out of nodes with same label. |
| 63 | + statement = "SELECT label, COUNT(DISTINCT eid) from (Edge e LEFT JOIN NodeLabel n ON e.node1 = n.gid) GROUP BY (label);"; |
| 64 | + nodeLabelOutgoing = dbUtil.getMapFromSQL(statement); |
101 | 65 |
|
102 |
| - ArrayList<String> node2Fields = new ArrayList<>(Arrays.asList( |
103 |
| - "studio", "releaseDate", "imdbId", "runtime", "description", |
104 |
| - "language", "title", "version", "trailer", "imageUrl", "genre", |
105 |
| - "tagline", "lastModified", "id", "homepage" |
106 |
| - )); |
107 |
| - |
108 |
| - for(String field : node2Fields){ |
109 |
| - statement = "SELECT COUNT(distinct " + field + ") FROM Movie;"; |
110 |
| - Integer counts = getIntegerFromSQL(statement); |
111 |
| - Integer prevCount = propertyCountOfNodes.getOrDefault(field, 0); |
112 |
| - propertyCountOfNodes.put(field, prevCount + counts); |
113 |
| - } |
114 |
| - |
115 |
| - statement = "SELECT label, COUNT(DISTINCT eid) from (Edge e LEFT JOIN NodeLabel n ON e.pid = n.pid) GROUP BY (label);"; |
116 |
| - nodeLabelOutgoing = getMapFromSQL(statement); |
| 66 | + // Get number of edges that goes into nodes with same label. |
| 67 | + statement = "SELECT label, COUNT(DISTINCT eid) from (Edge e LEFT JOIN NodeLabel n ON e.node2 = n.gid) GROUP BY (label);"; |
| 68 | + nodeLabelIncoming = dbUtil.getMapFromSQL(statement); |
117 | 69 |
|
118 |
| - statement = "SELECT label, COUNT(DISTINCT eid) from (Edge e LEFT JOIN NodeLabel n ON e.mid = n.mid) GROUP BY (label);"; |
119 |
| - nodeLabelIncoming = getMapFromSQL(statement); |
120 | 70 |
|
121 |
| - for(String nodeLabel : labelNodes.keySet()){ |
122 |
| - nodeRelationOutEdgeCount.put(nodeLabel, new HashMap<>()); |
| 71 | + for(String label : labelNodes.keySet()){ |
| 72 | + nodeRelationOutEdgeCount.put(label, new HashMap<>()); |
123 | 73 | for(String relationLabel : labelRelation.keySet()){
|
124 | 74 | statement =
|
125 | 75 | "SELECT COUNT(*)\n" +
|
126 |
| - "from Edge LEFT JOIN Person ON Edge.pid = Person.id " + |
127 |
| - " LEFT JOIN NodeLabel ON Person.id = NodeLabel.pid " + |
128 |
| - "WHERE label = \"" + nodeLabel + "\" AND rel_type = \"" + relationLabel + "\""; |
129 |
| - Integer edges = getIntegerFromSQL(statement); |
130 |
| - nodeRelationOutEdgeCount.get(nodeLabel).put(relationLabel, edges); |
| 76 | + "from Edge LEFT JOIN NodeLabel ON Edge.node1 = NodeLabel.gid " + |
| 77 | + "WHERE label = \"" + label + "\" AND rel_type = \"" + relationLabel + "\""; |
| 78 | + Integer edges = dbUtil.getIntegerFromSQL(statement); |
| 79 | + nodeRelationOutEdgeCount.get(label).put(relationLabel, edges); |
131 | 80 | }
|
132 | 81 | }
|
133 | 82 |
|
134 |
| - for(String nodeLabel : labelNodes.keySet()){ |
135 |
| - nodeRelationInEdgeCount.put(nodeLabel, new HashMap<>()); |
| 83 | + for(String label : labelNodes.keySet()){ |
| 84 | + nodeRelationInEdgeCount.put(label, new HashMap<>()); |
136 | 85 | for(String relationLabel : labelRelation.keySet()){
|
137 | 86 | statement =
|
138 | 87 | "SELECT COUNT(*)\n" +
|
139 |
| - "from Edge LEFT JOIN Movie ON Edge.mid = Movie.id " + |
140 |
| - " LEFT JOIN NodeLabel ON Movie.id = NodeLabel.mid " + |
141 |
| - "WHERE label = \"" + nodeLabel + "\" AND rel_type = \"" + relationLabel + "\""; |
142 |
| - Integer edges = getIntegerFromSQL(statement); |
143 |
| - nodeRelationInEdgeCount.get(nodeLabel).put(relationLabel, edges); |
| 88 | + "from Edge LEFT JOIN NodeLabel ON Edge.node2 = NodeLabel.gid " + |
| 89 | + "WHERE label = \"" + label + "\" AND rel_type = \"" + relationLabel + "\""; |
| 90 | + Integer edges = dbUtil.getIntegerFromSQL(statement); |
| 91 | + nodeRelationInEdgeCount.get(label).put(relationLabel, edges); |
144 | 92 | }
|
145 | 93 | }
|
146 | 94 |
|
|
0 commit comments