Skip to content

Commit

Permalink
Merge pull request readthedocs#4211 from safwanrahman/search
Browse files Browse the repository at this point in the history
Upgrade Elasticsearch to version 6.x
  • Loading branch information
ericholscher authored and safwanrahman committed Jul 16, 2018
2 parents dfdf4df + 0965a94 commit 8d7942b
Show file tree
Hide file tree
Showing 29 changed files with 443 additions and 173 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ python:
- 3.6
sudo: false
env:
- ES_VERSION=1.3.9 ES_DOWNLOAD_URL=https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz
- ES_VERSION=6.2.4 ES_DOWNLOAD_URL=https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz
matrix:
include:
- python: 2.7
Expand Down Expand Up @@ -42,3 +42,4 @@ notifications:
branches:
only:
- master
- search_upgrade
3 changes: 2 additions & 1 deletion readthedocs/projects/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from .forms import FeatureForm
from .models import (Project, ImportedFile, Feature,
ProjectRelationship, EmailHook, WebHook, Domain)
ProjectRelationship, EmailHook, WebHook, Domain, HTMLFile)
from .notifications import ResourceUsageNotification
from .tasks import remove_dir

Expand Down Expand Up @@ -206,3 +206,4 @@ def project_count(self, feature):
admin.site.register(Feature, FeatureAdmin)
admin.site.register(EmailHook)
admin.site.register(WebHook)
admin.site.register(HTMLFile)
1 change: 1 addition & 0 deletions readthedocs/projects/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ class ProjectsConfig(AppConfig):
def ready(self):
from readthedocs.projects import tasks
from readthedocs.worker import app

app.tasks.register(tasks.SyncRepositoryTask)
app.tasks.register(tasks.UpdateDocsTask)
7 changes: 7 additions & 0 deletions readthedocs/projects/managers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from django.db import models


class HTMLFileManager(models.Manager):

def get_queryset(self):
return super(HTMLFileManager, self).get_queryset().filter(name__endswith='.html')
54 changes: 54 additions & 0 deletions readthedocs/projects/migrations/0026_add_htmlfile_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.13 on 2018-06-18 16:45
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('projects', '0025_show-version-warning-existing-projects'),
]

operations = [
migrations.CreateModel(
name='HTMLFile',
fields=[
],
options={
'proxy': True,
},
bases=('projects.importedfile',),
),
migrations.AlterField(
model_name='project',
name='comment_moderation',
field=models.BooleanField(default=False, verbose_name='Comment Moderation'),
),
migrations.AlterField(
model_name='project',
name='documentation_type',
field=models.CharField(choices=[('auto', 'Automatically Choose'), ('sphinx', 'Sphinx Html'), ('mkdocs', 'Mkdocs (Markdown)'), ('sphinx_htmldir', 'Sphinx HtmlDir'), ('sphinx_singlehtml', 'Sphinx Single Page HTML')], default='sphinx', help_text='Type of documentation you are building. <a href="http://www.sphinx-doc.org/en/stable/builders.html#sphinx.builders.html.DirectoryHTMLBuilder">More info</a>.', max_length=20, verbose_name='Documentation type'),
),
migrations.AlterField(
model_name='project',
name='language',
field=models.CharField(choices=[('aa', 'Afar'), ('ab', 'Abkhaz'), ('af', 'Afrikaans'), ('am', 'Amharic'), ('ar', 'Arabic'), ('as', 'Assamese'), ('ay', 'Aymara'), ('az', 'Azerbaijani'), ('ba', 'Bashkir'), ('be', 'Belarusian'), ('bg', 'Bulgarian'), ('bh', 'Bihari'), ('bi', 'Bislama'), ('bn', 'Bengali'), ('bo', 'Tibetan'), ('br', 'Breton'), ('ca', 'Catalan'), ('co', 'Corsican'), ('cs', 'Czech'), ('cy', 'Welsh'), ('da', 'Danish'), ('de', 'German'), ('dz', 'Dzongkha'), ('el', 'Greek'), ('en', 'English'), ('eo', 'Esperanto'), ('es', 'Spanish'), ('et', 'Estonian'), ('eu', 'Basque'), ('fa', 'Iranian'), ('fi', 'Finnish'), ('fj', 'Fijian'), ('fo', 'Faroese'), ('fr', 'French'), ('fy', 'Western Frisian'), ('ga', 'Irish'), ('gd', 'Scottish Gaelic'), ('gl', 'Galician'), ('gn', 'Guarani'), ('gu', 'Gujarati'), ('ha', 'Hausa'), ('hi', 'Hindi'), ('he', 'Hebrew'), ('hr', 'Croatian'), ('hu', 'Hungarian'), ('hy', 'Armenian'), ('ia', 'Interlingua'), ('id', 'Indonesian'), ('ie', 'Interlingue'), ('ik', 'Inupiaq'), ('is', 'Icelandic'), ('it', 'Italian'), ('iu', 'Inuktitut'), ('ja', 'Japanese'), ('jv', 'Javanese'), ('ka', 'Georgian'), ('kk', 'Kazakh'), ('kl', 'Kalaallisut'), ('km', 'Khmer'), ('kn', 'Kannada'), ('ko', 'Korean'), ('ks', 'Kashmiri'), ('ku', 'Kurdish'), ('ky', 'Kyrgyz'), ('la', 'Latin'), ('ln', 'Lingala'), ('lo', 'Lao'), ('lt', 'Lithuanian'), ('lv', 'Latvian'), ('mg', 'Malagasy'), ('mi', 'Maori'), ('mk', 'Macedonian'), ('ml', 'Malayalam'), ('mn', 'Mongolian'), ('mr', 'Marathi'), ('ms', 'Malay'), ('mt', 'Maltese'), ('my', 'Burmese'), ('na', 'Nauru'), ('ne', 'Nepali'), ('nl', 'Dutch'), ('no', 'Norwegian'), ('oc', 'Occitan'), ('om', 'Oromo'), ('or', 'Oriya'), ('pa', 'Panjabi'), ('pl', 'Polish'), ('ps', 'Pashto'), ('pt', 'Portuguese'), ('qu', 'Quechua'), ('rm', 'Romansh'), ('rn', 'Kirundi'), ('ro', 'Romanian'), ('ru', 'Russian'), ('rw', 'Kinyarwanda'), ('sa', 'Sanskrit'), ('sd', 'Sindhi'), ('sg', 'Sango'), ('si', 'Sinhala'), ('sk', 'Slovak'), ('sl', 'Slovenian'), ('sm', 'Samoan'), ('sn', 'Shona'), ('so', 'Somali'), ('sq', 'Albanian'), ('sr', 'Serbian'), ('ss', 'Swati'), ('st', 'Southern Sotho'), ('su', 'Sudanese'), ('sv', 'Swedish'), ('sw', 'Swahili'), ('ta', 'Tamil'), ('te', 'Telugu'), ('tg', 'Tajik'), ('th', 'Thai'), ('ti', 'Tigrinya'), ('tk', 'Turkmen'), ('tl', 'Tagalog'), ('tn', 'Tswana'), ('to', 'Tonga'), ('tr', 'Turkish'), ('ts', 'Tsonga'), ('tt', 'Tatar'), ('tw', 'Twi'), ('ug', 'Uyghur'), ('uk', 'Ukrainian'), ('ur', 'Urdu'), ('uz', 'Uzbek'), ('vi', 'Vietnamese'), ('vo', 'Volapuk'), ('wo', 'Wolof'), ('xh', 'Xhosa'), ('yi', 'Yiddish'), ('yo', 'Yoruba'), ('za', 'Zhuang'), ('zh', 'Chinese'), ('zu', 'Zulu'), ('nb_NO', 'Norwegian Bokmal'), ('pt_BR', 'Brazilian Portuguese'), ('es_MX', 'Mexican Spanish'), ('uk_UA', 'Ukrainian'), ('zh_CN', 'Simplified Chinese'), ('zh_TW', 'Traditional Chinese')], default='en', help_text="The language the project documentation is rendered in. Note: this affects your project's URL.", max_length=20, verbose_name='Language'),
),
migrations.AlterField(
model_name='project',
name='privacy_level',
field=models.CharField(choices=[('public', 'Public'), ('protected', 'Protected'), ('private', 'Private')], default='public', help_text='Level of privacy that you want on the repository. Protected means public but not in listings.', max_length=20, verbose_name='Privacy Level'),
),
migrations.AlterField(
model_name='project',
name='python_interpreter',
field=models.CharField(choices=[('python', 'CPython 2.x'), ('python3', 'CPython 3.x')], default='python', help_text='The Python interpreter used to create the virtual environment.', max_length=20, verbose_name='Python Interpreter'),
),
migrations.AlterField(
model_name='project',
name='version_privacy_level',
field=models.CharField(choices=[('public', 'Public'), ('protected', 'Protected'), ('private', 'Private')], default='public', help_text='Default level of privacy you want on built versions of documentation.', max_length=20, verbose_name='Version Privacy Level'),
),
]
39 changes: 38 additions & 1 deletion readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
import fnmatch
import logging
import os
from builtins import object # pylint: disable=redefined-builtin

from builtins import object # pylint: disable=redefined-builtin
from django.conf import settings
from django.contrib.auth.models import User
from django.core.urlresolvers import NoReverseMatch, reverse
from django.db import models
from django.utils.encoding import python_2_unicode_compatible
from django.utils.functional import cached_property
from django.utils.translation import ugettext_lazy as _
from future.backports.urllib.parse import urlparse # noqa
from guardian.shortcuts import assign
Expand All @@ -24,6 +25,7 @@
from readthedocs.core.utils import broadcast, slugify
from readthedocs.projects import constants
from readthedocs.projects.exceptions import ProjectConfigurationError
from readthedocs.projects.managers import HTMLFileManager
from readthedocs.projects.querysets import (
ChildRelatedProjectQuerySet, FeatureQuerySet, ProjectQuerySet,
RelatedProjectQuerySet)
Expand All @@ -32,6 +34,7 @@
from readthedocs.projects.version_handling import (
determine_stable_version, version_windows)
from readthedocs.restapi.client import api
from readthedocs.search.parse_json import process_file
from readthedocs.vcs_support.backends import backend_cls
from readthedocs.vcs_support.utils import Lock, NonBlockingLock

Expand Down Expand Up @@ -916,6 +919,40 @@ def __str__(self):
return '%s: %s' % (self.name, self.project)


class HTMLFile(ImportedFile):

"""
Imported HTML file Proxy model.
This tracks only the HTML files for indexing to search.
"""

class Meta(object):
proxy = True

objects = HTMLFileManager()

@cached_property
def json_file_path(self):
basename = os.path.splitext(self.path)[0]
file_path = basename + '.fjson'

full_json_path = self.project.get_production_media_path(type_='json',
version_slug=self.version.slug,
include_file=False)

file_path = os.path.join(full_json_path, file_path)
return file_path

def get_processed_json(self):
file_path = self.json_file_path
return process_file(file_path)

@cached_property
def processed_json(self):
return self.get_processed_json()


class Notification(models.Model):
project = models.ForeignKey(Project,
related_name='%(class)s_notifications')
Expand Down
13 changes: 12 additions & 1 deletion readthedocs/projects/signals.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-
"""Project signals"""

from __future__ import absolute_import
import django.dispatch
from django.dispatch import receiver

from readthedocs.oauth.utils import attach_webhook


before_vcs = django.dispatch.Signal(providing_args=["version"])
Expand All @@ -14,3 +16,12 @@
project_import = django.dispatch.Signal(providing_args=["project"])

files_changed = django.dispatch.Signal(providing_args=["project", "files"])


@receiver(project_import)
def handle_project_import(sender, **kwargs):
"""Add post-commit hook on project import"""
project = sender
request = kwargs.get('request')

attach_webhook(project=project, request=request)
20 changes: 16 additions & 4 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
absolute_import, division, print_function, unicode_literals)

import datetime
import fnmatch
import hashlib
import json
import logging
Expand All @@ -30,7 +31,7 @@

from .constants import LOG_TEMPLATE
from .exceptions import RepositoryError
from .models import ImportedFile, Project, Domain, Feature
from .models import ImportedFile, Project, Domain, Feature, HTMLFile
from .signals import before_vcs, after_vcs, before_build, after_build, files_changed
from readthedocs.builds.constants import (
BUILD_STATE_BUILDING, BUILD_STATE_CLONING, BUILD_STATE_FINISHED,
Expand Down Expand Up @@ -987,18 +988,24 @@ def _manage_imported_files(version, path, commit):
changed_files = set()
for root, __, filenames in os.walk(path):
for filename in filenames:
if fnmatch.fnmatch(filename, '*.html'):
model_class = HTMLFile
else:
model_class = ImportedFile

dirpath = os.path.join(root.replace(path, '').lstrip('/'),
filename.lstrip('/'))
full_path = os.path.join(root, filename)
md5 = hashlib.md5(open(full_path, 'rb').read()).hexdigest()
try:
obj, __ = ImportedFile.objects.get_or_create(
# pylint: disable=unpacking-non-sequence
obj, __ = model_class.objects.get_or_create(
project=version.project,
version=version,
path=dirpath,
name=filename,
)
except ImportedFile.MultipleObjectsReturned:
except model_class.MultipleObjectsReturned:
log.warning('Error creating ImportedFile')
continue
if obj.md5 != md5:
Expand All @@ -1007,6 +1014,12 @@ def _manage_imported_files(version, path, commit):
if obj.commit != commit:
obj.commit = commit
obj.save()

# Delete the HTMLFile first from previous versions
HTMLFile.objects.filter(project=version.project,
version=version
).exclude(commit=commit).delete()

# Delete ImportedFiles from previous versions
ImportedFile.objects.filter(project=version.project,
version=version
Expand Down Expand Up @@ -1188,7 +1201,6 @@ def sync_callback(_, version_pk, commit, *args, **kwargs):
The first argument is the result from previous tasks, which we discard.
"""
fileify(version_pk, commit=commit)
update_search(version_pk, commit=commit)


@app.task()
Expand Down
21 changes: 12 additions & 9 deletions readthedocs/projects/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,21 @@ def version_from_slug(slug, version):
return v


def find_file(filename):
def find_file(basename, pattern, path):
"""
Recursively find matching file from the current working path.
Recursively find matching file.
:param file: Filename to match
:returns: A list of matching filenames.
:param basename: Basename of a file to match
:param pattern: Pattern to match
:param path: the directory to search for the file
:returns: path of matching file
"""
matches = []
for root, __, filenames in os.walk('.'):
for match in fnmatch.filter(filenames, filename):
matches.append(os.path.join(root, match))
return matches
for root, _, files in os.walk(path):
for filename in files:
file_basename = os.path.splitext(filename)[0]

if fnmatch.fnmatch(filename, pattern) and file_basename == basename:
return os.path.join(root, filename)


def run(*commands):
Expand Down
1 change: 1 addition & 0 deletions readthedocs/search/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SEARCH_EXCLUDED_FILE = ['search.html', 'genindex.html', 'py-modindex.html']
Loading

0 comments on commit 8d7942b

Please sign in to comment.