add search pages

BruceHenry · May 6, 2018 · 55fd186 · 55fd186
1 parent 6be528b
commit 55fd186
Show file tree

Hide file tree

Showing 10 changed files with 194 additions and 75 deletions.
diff --git a/MovieHunter/views.py b/MovieHunter/views.py
@@ -8,31 +8,27 @@
 
 @csrf_protect
 def index(request):
-    if request.POST:
-        if request.POST.get('Search'):
-            content = request.POST.get('title')
-            return redirect('/movie/search/' + content)
-    else:
-        data = {}
-        movie_dict = search_index.data_in_memory['movie_dict']
-        if request.user.is_authenticated:
-            data = {'username': request.user.get_username()}
-        popular_movies = Popularity.objects.all().order_by('-weight')
-        popular = []
-        for movie in popular_movies[:5]:
-            try:
-                popular.append({'movieid': movie.movieid_id, 'poster': movie_dict[movie.movieid_id].poster})
-            except:
-                continue
-        data['popular'] = popular
-        popular_movie_list = [movie_dict[movie.movieid_id] for movie in popular_movies[:5]]
-        data['recommendation'] = get_recommendation(request, popular_movie_list)
-        return render(request, 'base.html', data)
+    data = {}
+    movie_dict = search_index.data_in_memory['movie_dict']
+    if request.user.is_authenticated:
+        data = {'username': request.user.get_username()}
+    popular_movies = Popularity.objects.all().order_by('-weight')
+    popular = []
+    for movie in popular_movies[:5]:
+        try:
+            popular.append({'movieid': movie.movieid_id, 'poster': movie_dict[movie.movieid_id].poster})
+        except:
+            continue
+    data['popular'] = popular
+    popular_movie_list = [movie_dict[movie.movieid_id] for movie in popular_movies[:5]]
+    data['recommendation'] = get_recommendation(request, popular_movie_list)
+    return render(request, 'base.html', data)
 
 
 def get_recommendation(request, popular_movie_list):
     result = []
     movie_dict = search_index.data_in_memory['movie_dict']
+    added_movie_list = []
     if request.user.is_authenticated:
         username = request.user.get_username()
         watched_movies = set([movie_dict[movie.movieid_id] for movie in Seen.objects.filter(username=username)] +
@@ -44,19 +40,20 @@ def get_recommendation(request, popular_movie_list):
                 genre_stats[genre] = genre_stats.get(genre, 0) + 1
         movie_score = {}
         for movie in unwatched_movies:
+            movie_score[movie.movieid] = movie.rate
             for genre in movie.genres.split('|'):
-                movie_score[movie.movieid] = genre_stats.get(genre, 0) / len(watched_movies) + movie.rate
+                movie_score[movie.movieid] += genre_stats.get(genre, 0) / len(watched_movies)
         sorted_list = sorted(movie_score.items(), key=operator.itemgetter(1), reverse=True)
         for item in sorted_list:
             movie = movie_dict[item[0]]
             result.append({'movieid': movie.movieid, 'poster': movie.poster})
+            added_movie_list.append(movie)
             if len(result) == 8:
                 break
-    sorted_list = sorted(search_index.data_in_memory['movie_rating'].items(), key=operator.itemgetter(1),
-                         reverse=True)
+    sorted_list = sorted(search_index.data_in_memory['movie_rating'].items(), key=operator.itemgetter(1), reverse=True)
     for item in sorted_list:
         movie = movie_dict[item[0]]
-        if movie not in popular_movie_list:
+        if movie not in popular_movie_list and movie not in added_movie_list:
             result.append({'movieid': movie.movieid, 'poster': movie.poster})
         if len(result) == 10:
             break

diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ For the current version, the following features are implemented,
 - Login via Facebook (**OAuth** with Facebook API)
 - **Recommender** according to user's taste
 
-I deployed the website at [https://baohan08.pythonanywhere.com/](https://baohan08.pythonanywhere.com/).
+This website is deployed at [https://baohan08.pythonanywhere.com/](https://baohan08.pythonanywhere.com/).
 
 ***
 
@@ -21,7 +21,9 @@ I deployed the website at [https://baohan08.pythonanywhere.com/](https://baohan0
 <a name="data-and-database"></a>
 
 ## Data and Database 
-I got 5000+ movie_ids from this [movie dataset](https://www.kaggle.com/oxanozaep/imdb-eda/data). With these movie_ids, I utilized a python lib called [imdbpie](https://pypi.org/project/imdbpie/) to collect other columns of data. Currently, there are about 3000 movies in the database.
+5000+ movie_ids from this [movie dataset](https://www.kaggle.com/oxanozaep/imdb-eda/data) are the origin data source. With these movie_ids, I utilized a python lib called [imdbpie](https://pypi.org/project/imdbpie/) to collect other columns of data. 
+
+Currently, there are about **3000 movies** in the database.
 
 To make it easy to deploy, SQLite is chosen as database. The database file is `movie.db` in the root directory.
 
@@ -30,24 +32,27 @@ To make it easy to deploy, SQLite is chosen as database. The database file is `m
 
 ## Search Engine and Cache
 
-### Approaches
-- An Inverted Permuterm Index in B-Tree Structure
-- LRU Cache for Frequent Query
-- Results Sorted by Rating
+- **Search Index**: Built an inverted index structure with wildcard to enable vague search.
+- **Rank**: Movie search results are sorted by rating, while actor search results are sorted by the number of movies acted.
+- **Cache**: Implemented a LRU Cache to record search result to make search suggestion faster.
 
 
 <a name="recommender"></a>
 
 ## Recommender
 
-- **Item-based** : Do the item-based recommendation based on users’ seens and expects
+An **item-based** recommender is implemented.
+
+According to movies in user's movie list, movies with **same genres** will be recommended for each user. If user's movie list is empty or the number of movies to recommend is not sufficient, movies with highest ratings will be recommended instead.
+
+The final recommendation is **randomly** chosen from a set of candidate movies, so the result will be slightly different each time.
 
 
 <a name="deployment-instructions"></a>
 
 ## Deployment Instructions
 1. Install [**Python 3**]( https://www.python.org/) in your computer, and make sure to set environment variable correctly.
-2. Install **Django** and **Sklearn** for the Python environment. The easiest way is to use pip by running `pip install django` and `pip install sklearn` in a terminal.
+2. Install **Django** for the Python environment. The easiest way is to use pip by running `pip install django`.
 3. Open a terminal, input command: `python manage.py runserver 8080`
 4. Open your web browser, input `localhost:8080` in the address bar.
 - P.S. If you fail running `python manage.py runserver 8080`, try another port numbers, like 8081 or 8000.
diff --git a/movie.db b/movie.db
diff --git a/movie/search_index.py b/movie/search_index.py
@@ -43,46 +43,50 @@ def __build_index(self):
                         self.actor_index[permuted_term] = set()
                     self.actor_index[permuted_term].add(actor.actorid)
 
-    def search(self, query_string):
+    def search_movie(self, query_string):
         high_matching_movies, middle_matching_movies, low_matching_movies = set(), set(), set()
-        high_matching_actors, middle_matching_actors, low_matching_actors = set(), set(), set()
         for token in self.tokenize(query_string):
             start_with_token = self.rotate(token + "*")
             end_with_token = self.rotate("*" + token)
-            movie_result, actor_result = set(), set()
+            movie_result = set()
             for movie in self.search_index(self.movie_index, [start_with_token, end_with_token]):
                 movie_result.add(movie)
-            for actor in self.search_index(self.actor_index, [start_with_token, end_with_token]):
-                actor_result.add(actor)
             wild_tokens = self.add_wild_card(token)
             for movie in self.search_index(self.movie_index, [self.rotate(t) for t in wild_tokens]):
                 low_matching_movies.add(movie)
-            for actor in self.search_index(self.actor_index, [self.rotate(t) for t in wild_tokens]):
-                low_matching_actors.add(actor)
-
             if len(high_matching_movies) == 0:
                 high_matching_movies = high_matching_movies.union(movie_result)
             else:
                 high_matching_movies = high_matching_movies.intersection(movie_result)
+            middle_matching_movies = middle_matching_movies.union(movie_result)
+        middle_matching_movies = middle_matching_movies - high_matching_movies
+        low_matching_movies = low_matching_movies - high_matching_movies - middle_matching_movies
+        return (sorted(high_matching_movies, key=self.get_movie_rating, reverse=True) +
+                sorted(middle_matching_movies, key=self.get_movie_rating, reverse=True) +
+                sorted(low_matching_movies, key=self.get_movie_rating, reverse=True))
+
+    def search_actor(self, query_string):
+        high_matching_actors, middle_matching_actors, low_matching_actors = set(), set(), set()
+        for token in self.tokenize(query_string):
+            start_with_token = self.rotate(token + "*")
+            end_with_token = self.rotate("*" + token)
+            actor_result = set()
+            for actor in self.search_index(self.actor_index, [start_with_token, end_with_token]):
+                actor_result.add(actor)
+            wild_tokens = self.add_wild_card(token)
+            for actor in self.search_index(self.actor_index, [self.rotate(t) for t in wild_tokens]):
+                low_matching_actors.add(actor)
+
             if len(high_matching_actors) == 0:
                 high_matching_actors = high_matching_actors.union(actor_result)
             else:
                 high_matching_actors = high_matching_actors.intersection(actor_result)
-            middle_matching_movies = middle_matching_movies.union(movie_result)
             middle_matching_actors = middle_matching_actors.union(actor_result)
-
-        middle_matching_movies = middle_matching_movies - high_matching_movies
         middle_matching_actors = middle_matching_actors - high_matching_actors
-        low_matching_movies = low_matching_movies - high_matching_movies - middle_matching_movies
         low_matching_actors = low_matching_actors - high_matching_actors - middle_matching_actors
-
-        movie_result = sorted(high_matching_movies, key=self.get_movie_rating, reverse=True) + \
-                       sorted(middle_matching_movies, key=self.get_movie_rating, reverse=True) + \
-                       sorted(low_matching_movies, key=self.get_movie_rating, reverse=True)
-        actor_result = sorted(high_matching_actors, key=self.get_actor_act_num, reverse=True) + \
-                       sorted(middle_matching_actors, key=self.get_actor_act_num, reverse=True) + \
-                       sorted(low_matching_actors, key=self.get_actor_act_num, reverse=True)
-        return [movie_result, actor_result]
+        return (sorted(high_matching_actors, key=self.get_actor_act_num, reverse=True) +
+                sorted(middle_matching_actors, key=self.get_actor_act_num, reverse=True) +
+                sorted(low_matching_actors, key=self.get_actor_act_num, reverse=True))
 
     def search_suggest(self, query_string):
         movie_flag, actor_flag = False, False

diff --git a/movie/urls.py b/movie/urls.py
@@ -7,7 +7,7 @@
     url(r'^actor_all/(?P<page>\d*)', views.whole_list, {'model': models.Actor}, name='whole_list'),
     url(r'^movie_detail/(?P<id>.*)', views.detail, {'model': models.Movie}, name='movie_detail'),
     url(r'^actor_detail/(?P<id>.*)', views.detail, {'model': models.Actor}, name='actor_detail'),
-    url(r'^search/(?P<pattern>.*)', views.search, name='search'),
+    url(r'^search/(?P<item>.*)/(?P<query_string>.*)/(?P<page>\d*).*', views.search, name='search'),
     url(r'^seen/(?P<movie_id>.*)', views.seen, name='seen'),
     url(r'^add_seen/(?P<movie_id>.*)', views.add_seen, name='seen'),
     url(r'^expect/(?P<movie_id>.*)', views.expect, name='expect'),

diff --git a/movie/views.py b/movie/views.py
@@ -104,18 +104,33 @@ def whole_list(request, model, page):
     return render(request, '{}_list.html'.format(model.get_name()), data)
 
 
-def search(request, pattern):
-    pattern = pattern.replace("%20", " ")
-    search_results = search_index.search(pattern)
-    movies, actors = [], []
-    for movieid in search_results[0]:
-        movies.append(search_index.data_in_memory['movie_dict'].get(movieid))
-    for actorid in search_results[1]:
-        actors.append(search_index.data_in_memory['actor_dict'].get(actorid))
-    return render(request, 'searchresult.html',
-                  {'items1': movies, 'search1': pattern, 'number1': len(movies),
-                   'items2': actors,
-                   'search2': pattern, 'number2': len(actors)})
+def search(request, item, query_string, page):
+    if item is None or query_string is None or page is None:
+        return render(request, '404.html')
+    query_string = query_string.replace("%20", " ")
+    if item == 'movie':
+        result = [search_index.data_in_memory['movie_dict'][movie_id] for movie_id in
+                  search_index.search_movie(query_string)]
+    elif item == 'actor':
+        result = [search_index.data_in_memory['actor_dict'][actor_id] for actor_id in
+                  search_index.search_actor(query_string)]
+    else:
+        return render(request, '404.html')
+    page = int(page)
+    total_page = int(math.ceil(len(result) / 10))
+    if page > total_page and total_page != 0:
+        return render(request, '404.html')
+    last_item_index = 10 * page if page != total_page else len(result)
+    pages = []
+    end_distance = total_page - page
+    start_page_num = page - 5 if end_distance >= 5 else page - 10 + end_distance
+    end_page_num = page + 5 if page > 5 else 10
+    for i in range(start_page_num, end_page_num + 1):
+        if 1 <= i <= total_page:
+            pages.append(i)
+    return render(request, item + '_search.html',
+                  {'items': result[10 * (page - 1):last_item_index], 'length': len(result),
+                   'query_string': query_string, 'current_page': page, 'page_number': total_page, 'pages': pages})
 
 
 def search_suggest(request, query_string):

diff --git a/templates/actor_search.html b/templates/actor_search.html
@@ -0,0 +1,47 @@
+{% extends "base.html" %}
+
+{% block mainbody %}
+
+    <a href="/movie/search/movie/{{ query_string }}/{{ 1 }}">
+        <button class="button search_button">
+            <span class="glyphicon glyphicon-film"></span> Movie
+        </button>
+    </a>
+    <a href="/movie/search/actor/{{ query_string }}/{{ 1 }}">
+        <button class="button search_button" style="background-color: #008CBA;color: white;">
+            <span class="glyphicon glyphicon-user"></span> Actor
+        </button>
+    </a>
+    <div>
+        <h2>{{ length }} actor(s) with "{{ query_string }}":</h2>
+        <hr>
+
+        {% for actor in items %}
+            <h2><a href="/movie/actor_detail/{{ actor.actorid }}" target="_self">{{ actor.name }}</a></h2>
+            <img border="0" src="{{ actor.photo }}" alt="Cannot load photo, sorry!" width="200" height="280">
+            <hr>
+        {% endfor %}
+
+        {% if pages %}
+            <p>You are in the {{ current_page }} page, {{ page_number }} pages in total</p>
+            {% if current_page != 1 %}
+                <a href="/movie/search/actor/{{ query_string }}/{{ 1 }}" target="_self">First</a>
+                <a href="/movie/search/actor/{{ query_string }}/{{ current_page|add:-1 }}" target="_self">Previous</a>
+            {% endif %}
+
+            {% for page in pages %}
+                {% if page == current_page %}
+                    <b>{{ page }}</b>
+                {% else %}
+                    <a href="/movie/search/actor/{{ query_string }}/{{ page }}" target="_self">{{ page }}</a>
+                {% endif %}
+            {% endfor %}
+
+            {% if current_page != page_number %}
+                <a href="/movie/search/actor/{{ query_string }}/{{ current_page|add:1 }}" target="_self">Next</a>
+                <a href="/movie/search/actor/{{ query_string }}/{{ page_number }}" target="_self">Last</a>
+            {% endif %}
+        {% endif %}
+
+    </div>
+{% endblock %}
diff --git a/templates/base.html b/templates/base.html
@@ -37,21 +37,20 @@
                 <li><a href="/movie/movie_all/1"><span class="glyphicon glyphicon-film"></span> Movies</a></li>
                 <li><a href="/movie/actor_all/1"><span class="glyphicon glyphicon-user"></span> Actors</a></li>
             </ul>
-            <form class="navbar-form navbar-left" action="/" method="post">
+            <form class="navbar-form navbar-left" id="search--form" onsubmit="searchSubmit()">
                 {% csrf_token %}
 
                 <input type="text" name="title" id="search-input" class="form-control"
                        placeholder="Search Movies or Actors"
                        onkeyup="start_timer()" size="55" autocomplete="off" required/>
-                <input type="submit" class="btn btn-default" name="Search" value="Submit"/>
+                <input type="submit" class="btn btn-default" name="search" value="Submit"/>
 
                 <div class="searchlistdiv"
                      style="position:absolute;top:42px;left:295px;user-select:none;display:none;width: 380px;"
                      aria-expanded="false">
                     <div class="search-movie">Movie:</div>
                     <div class="search-actor">Actor:</div>
                 </div>
-
             </form>
 
             <ul class="nav navbar-nav navbar-right">
@@ -190,6 +189,11 @@
     }
 </script>
 <script>
+    function searchSubmit() {
+        var action_src = '/movie/search/movie/' + document.getElementById("search-input").value + '/1';
+        document.getElementById('search--form').action = action_src;
+    }
+
     $(".form-control").bind('blur', function () {
         setTimeout(function () {
             $(".searchlistdiv").hide();

diff --git a/templates/movie_search.html b/templates/movie_search.html
@@ -0,0 +1,47 @@
+{% extends "base.html" %}
+
+{% block mainbody %}
+
+    <a href="/movie/search/movie/{{ query_string }}/{{ 1 }}">
+        <button class="button search_button" style="background-color: #008CBA;color: white;">
+            <span class="glyphicon glyphicon-film"></span> Movie
+        </button>
+    </a>
+    <a href="/movie/search/actor/{{ query_string }}/{{ 1 }}">
+        <button class="button search_button">
+            <span class="glyphicon glyphicon-user"></span> Actor
+        </button>
+    </a>
+    <div>
+        <h2>{{ length }} movie(s) with "{{ query_string }}":</h2>
+        <hr>
+
+        {% for movie in items %}
+            <h2><a href="/movie/movie_detail/{{ movie.movieid }}" target="_self">{{ movie.title }}</a></h2>
+            <img border="0" src="{{ movie.poster }}" alt="Cannot load photo, sorry!" width="200" height="320">
+            <hr>
+        {% endfor %}
+
+        {% if pages %}
+            <p>You are in the {{ current_page }} page, {{ page_number }} pages in total</p>
+            {% if current_page != 1 %}
+                <a href="/movie/search/movie/{{ query_string }}/{{ 1 }}" target="_self">First</a>
+                <a href="/movie/search/movie/{{ query_string }}/{{ current_page|add:-1 }}" target="_self">Previous</a>
+            {% endif %}
+
+            {% for page in pages %}
+                {% if page == current_page %}
+                    <b>{{ page }}</b>
+                {% else %}
+                    <a href="/movie/search/movie/{{ query_string }}/{{ page }}" target="_self">{{ page }}</a>
+                {% endif %}
+            {% endfor %}
+
+            {% if current_page != page_number %}
+                <a href="/movie/search/movie/{{ query_string }}/{{ current_page|add:1 }}" target="_self">Next</a>
+                <a href="/movie/search/movie/{{ query_string }}/{{ page_number }}" target="_self">Last</a>
+            {% endif %}
+        {% endif %}
+
+    </div>
+{% endblock %}