Skip to content

Oracle of Bacon - Agathe HAINAUT, Kévin LETHUILLIER #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.serli.oracle.of.bacon.repository.ElasticSearchRepository;
import com.serli.oracle.of.bacon.repository.Neo4JRepository;
import com.serli.oracle.of.bacon.repository.Neo4JRepository.GraphItem;
import com.serli.oracle.of.bacon.repository.RedisRepository;
import net.codestory.http.annotations.Get;

Expand All @@ -24,62 +25,22 @@ public APIEndPoint() {
@Get("bacon-to?actor=:actorName")
// TODO change return type
public String getConnectionsToKevinBacon(String actorName) {
return "[\n" +
"{\n" +
"\"data\": {\n" +
"\"id\": 85449,\n" +
"\"type\": \"Actor\",\n" +
"\"value\": \"Bacon, Kevin (I)\"\n" +
"}\n" +
"},\n" +
"{\n" +
"\"data\": {\n" +
"\"id\": 2278636,\n" +
"\"type\": \"Movie\",\n" +
"\"value\": \"Mystic River (2003)\"\n" +
"}\n" +
"},\n" +
"{\n" +
"\"data\": {\n" +
"\"id\": 1394181,\n" +
"\"type\": \"Actor\",\n" +
"\"value\": \"Robbins, Tim (I)\"\n" +
"}\n" +
"},\n" +
"{\n" +
"\"data\": {\n" +
"\"id\": 579848,\n" +
"\"source\": 85449,\n" +
"\"target\": 2278636,\n" +
"\"value\": \"PLAYED_IN\"\n" +
"}\n" +
"},\n" +
"{\n" +
"\"data\": {\n" +
"\"id\": 9985692,\n" +
"\"source\": 1394181,\n" +
"\"target\": 2278636,\n" +
"\"value\": \"PLAYED_IN\"\n" +
"}\n" +
"}\n" +
"]";
redisRepository.addSearch(actorName);
List<Map<String, GraphItem>> graph = neo4JRepository.getConnectionsToKevinBacon(actorName);
return Arrays.toString(
graph.stream()
.map(map -> String.format("{ \"data\": %s }", map.entrySet().iterator().next()
.getValue().toString()))
.toArray());
}

@Get("suggest?q=:searchQuery")
public List<String> getActorSuggestion(String searchQuery) throws IOException {
return Arrays.asList("Niro, Chel",
"Senanayake, Niro",
"Niro, Juan Carlos",
"de la Rua, Niro",
"Niro, Simão");
return elasticSearchRepository.getActorsSuggests(searchQuery);
}

@Get("last-searches")
public List<String> last10Searches() {
return Arrays.asList("Peckinpah, Sam",
"Robbins, Tim (I)",
"Freeman, Morgan (I)",
"De Niro, Robert",
"Pacino, Al (I)");
return redisRepository.getLastXSearches(10);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public static void main(String[] args) throws IOException, InterruptedException
bufferedReader
.lines()
.forEach(line -> {
// TODO
// TODO: See JS
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,21 @@

import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;

import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
// import org.elasticsearch.index.query.QueryStringQueryBuilder;
// import org.elasticsearch.search.suggest.SuggestBuilder;
// import org.elasticsearch.search.suggest.term.TermSuggestionBuilder;
import org.elasticsearch.search.SearchHit;

public class ElasticSearchRepository {

Expand All @@ -25,7 +36,27 @@ public static RestHighLevelClient createClient() {
}

public List<String> getActorsSuggests(String searchQuery) throws IOException {
// TODO
return null;
MatchQueryBuilder queryBuilder = new MatchQueryBuilder("suggest", searchQuery);
// Not working as intended
// QueryStringQueryBuilder queryBuilder = new QueryStringQueryBuilder(String.format("*%s*", searchQuery));
// queryBuilder = queryBuilder.defaultField("suggest");
/*
SuggestBuilder suggestBuilder = new SuggestBuilder();
TermSuggestionBuilder suggestionBuilder = new TermSuggestionBuilder("suggest");
suggestionBuilder = suggestionBuilder.text(searchQuery);
suggestBuilder = suggestBuilder.addSuggestion("suggestion", suggestionBuilder);
*/
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// sourceBuilder = sourceBuilder.query(queryBuilder).size(10).suggest(suggestBuilder);
sourceBuilder = sourceBuilder.query(queryBuilder).size(10);
String[] indices = { "actor" };
SearchRequest request = new SearchRequest(indices, sourceBuilder);
SearchResponse response = this.client.search(request, RequestOptions.DEFAULT);
List<String> suggests = new ArrayList<String>();
for (SearchHit hit : response.getHits().getHits()) {
Map<String, Object> map = hit.getSourceAsMap();
suggests.add(map.get("name").toString());
}
return suggests;
}
}
Original file line number Diff line number Diff line change
@@ -1,34 +1,62 @@
package com.serli.oracle.of.bacon.repository;

import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;

import org.neo4j.driver.AuthTokens;
import org.neo4j.driver.Driver;
import org.neo4j.driver.GraphDatabase;
import org.neo4j.driver.Session;
import org.neo4j.driver.types.Node;
import org.neo4j.driver.types.Relationship;
import org.neo4j.driver.types.Path;
import org.neo4j.driver.Result;
import org.neo4j.driver.Value;
import org.neo4j.driver.Transaction;
import org.neo4j.driver.TransactionWork;

import static org.neo4j.driver.Values.parameters;

public class Neo4JRepository {
private final Driver driver;

public Neo4JRepository() {
this.driver = GraphDatabase.driver("bolt://localhost:7687", AuthTokens.basic("neo4j", "password"));
this.driver = GraphDatabase.driver("bolt://localhost:7687", AuthTokens.basic("neo4j", "a"));
}

public List<Map<String, GraphItem>> getConnectionsToKevinBacon(String actorName) {
Session session = driver.session();

// TODO
return null;
List<Map<String, GraphItem>> result = session.writeTransaction(new TransactionWork<List<Map<String, GraphItem>>>() {
@Override
public List<Map<String, GraphItem>> execute(Transaction tx) {
Result result = tx.run("MATCH (bacon:Actor {name: 'Bacon, Kevin (I)'} ), " +
"(actor:Actor {name: $actor}), " +
"path = shortestPath((bacon)-[:PLAYED_IN*]-(actor)) RETURN path", parameters( "actor", actorName ));
Path path = result.single().get("path").asPath();
List<Map<String, GraphItem>> list = new ArrayList<Map<String, GraphItem>>();
for (Node node : path.nodes()) {
Map<String, GraphItem> map = new HashMap<String, GraphItem>();
map.put(String.valueOf(node.id()), mapNodeToGraphNode(node));
list.add(map);
}
for (Relationship relationship : path.relationships()) {
Map<String, GraphItem> map = new HashMap<String, GraphItem>();
map.put(String.valueOf(relationship.id()), mapRelationShipToGraphEdge(relationship));
list.add(map);
}
return list;
}
});
return result;
}

private GraphEdge mapRelationShipToNodeEdge(Relationship relationship) {
private GraphEdge mapRelationShipToGraphEdge(Relationship relationship) {
return new GraphEdge(relationship.id(), relationship.startNodeId(), relationship.endNodeId(), relationship.type());
}

private GraphNode mapNodeToGrapNode(Node node) {
private GraphNode mapNodeToGraphNode(Node node) {
String type = node.labels().iterator().next();
String value = null;
if (!node.get("name").isNull()) {
Expand Down Expand Up @@ -72,6 +100,11 @@ public GraphNode(long id, String value, String type) {
this.value = value;
this.type = type;
}

@Override
public String toString() {
return String.format("{ \"id\": \"%s\", \"value\": \"%s\", \"type\": \"%s\" }", String.valueOf(this.id), this.value, this.type);
}
}

private static class GraphEdge extends GraphItem {
Expand All @@ -85,5 +118,10 @@ public GraphEdge(long id, long source, long target, String value) {
this.target = target;
this.value = value;
}

@Override
public String toString() {
return String.format("{ \"id\": \"%s\", \"source\": \"%s\", \"target\": \"%s\", \"value\": \"%s\" }", String.valueOf(this.id), this.source, this.target, this.value);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
package com.serli.oracle.of.bacon.repository;

import java.util.List;
import java.util.Collections;

import redis.clients.jedis.Jedis;

public class RedisRepository {
private final Jedis jedis;

private final String search_key = "oracle_of_bacon:searches";

public RedisRepository() {
this.jedis = new Jedis("localhost");
}

public void addSearch(String search) {
this.jedis.rpush(this.search_key, search);
}

public List<String> getLastTenSearches() {
// TODO
return null;
public List<String> getLastXSearches(int x) {
List<String> lastX = this.jedis.lrange(this.search_key, -x, -1);
// We invert the list's order to make the first element of the list correspond to the last search
Collections.reverse(lastX);
return lastX;
}
}
31 changes: 25 additions & 6 deletions script/insert.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,28 @@ const { Client } = require("@elastic/elasticsearch");
const client = new Client({ node: "http://localhost:9200" });

async function insert() {
// TODO créer l'index (et plus pour la ré-exécution ?)
client.indices.create({ index: 'name' }, (err, resp) => {
if (err) console.trace(err.message);
});

let actors = [];
let first = true;
fs.createReadStream("./imdb-data/actors.csv")
fs.createReadStream("../imdb-data/actors.csv")
.pipe(csv())
// Pour chaque ligne on créé un document JSON pour l'acteur correspondant
.on("data", async ({ name }) => {
// TODO ajouter les acteurs au tableau
.on("data", async (data) => {
actors.push(data["name:ID"]);
})
// A la fin on créé l'ensemble des acteurs dans ElasticSearch
.on("end", () => {
// TODO insérer dans elastic (les fonctions ci-dessous peuvent vous aider)
recBulk(client, createBulkInsertQueries(actors, actors.length / 10000));
});
}

function recBulk(client, bulks) {
console.log("remaining bulks " + bulks.length);
if (bulks.length <= 0) {
client.close();
return Promise.resolve();
}

Expand All @@ -35,7 +38,23 @@ function recBulk(client, bulks) {
}

function createBulkInsertQueries(names, length) {
// TODO
const nb_elements = names.length / length;
let i = 0;
// Etape 1 : On sépare la liste des acteurs en sous-listes de longueur à peu près égale
const chunks = names.reduce((acc, name) => {
if (i < nb_elements) {
acc[acc.length-1].push(name);
i++;
}
else {
acc.push([name]);
i = 0;
}
return acc;
}, [[]]);
// Etape 2 : On transforme ces sous-listes en requêtes
const queries = chunks.map(chunk => createBulkInsertQuery(chunk));
return queries;
}

// Fonction utilitaire permettant de formatter les données pour l'insertion "bulk" dans elastic
Expand Down