Changeset 94

Show
Ignore:
Timestamp:
10/24/09 17:19:37 (10 months ago)
Author:
max
Message:

Switch from Sphinx to Haystack for search functionality.

Location:
trunk
Files:
2 added
7 removed
10 modified

Legend:

Unmodified
Added
Removed
  • trunk/courant/core/media/__init__.py

    r1 r94  
     1import search_indexes 
  • trunk/courant/core/news/__init__.py

    r1 r94  
     1import search_indexes 
  • trunk/courant/core/news/search_indexes.py

    r56 r94  
    22from haystack import site 
    33from models import Article 
     4from django.contrib.contenttypes.models import ContentType 
    45 
    56class ArticleIndex(indexes.SearchIndex): 
    67    text = indexes.CharField(document=True, use_template=True) 
    78    title = indexes.CharField(model_attr='heading') 
    8     section = indexes.CharField(model_attr='section__name') 
     9    section = indexes.IntegerField(model_attr='section__pk') 
    910    published_at = indexes.DateTimeField(model_attr='published_at') 
     11    type = indexes.IntegerField() 
     12    staffers = indexes.MultiValueField() 
    1013     
    11     #rendered = indexes.CharField(use_template=True, indexed=False) 
    12      
     14    def prepare_type(self, object): 
     15        return ContentType.objects.get_for_model(Article).pk 
     16       
     17    def prepare_staffers(self, object): 
     18        return [s.id for s in object.authors.all()] 
     19         
    1320    def get_queryset(self): 
    1421        return Article.live.all() 
  • trunk/courant/core/search/__init__.py

    r1 r94  
    1 class AlreadyRegistered(Exception): 
    2     """ 
    3     An attempt was made to register a model for get tag more than once. 
    4     """ 
    5     pass 
    6  
    7 class SearchRegistry(object): 
    8     def __init__(self): 
    9         self._registry = {} 
    10  
    11     def register(self, 
    12                  model, 
    13                  fields=None, 
    14                  filter_fields=None, 
    15                  date_field='published_at', 
    16                  use_delta=False): 
    17         """ 
    18         Sets the given model class up for working with tags. 
    19         """ 
    20         if model in self._registry: 
    21             raise AlreadyRegistered( 
    22                 _('The model %s has already been registered.') % model.__name__) 
    23          
    24         self._registry[model] = {'fields': fields, 
    25                                  'filter_fields': filter_fields, 
    26                                  'date_field': date_field, 
    27                                  'use_delta': use_delta} 
    28          
    29     def unregister(self, model): 
    30         if model in self._registry: 
    31             del self._registry[model]        
    32 search = SearchRegistry() 
  • trunk/courant/core/search/forms.py

    r1 r94  
    22from django.forms.extras.widgets import SelectDateWidget 
    33import datetime 
     4from haystack.forms import FacetedSearchForm 
     5from courant.core.news.models import Article 
    46 
    5 class SearchForm(forms.Form): 
    6     q = forms.CharField(min_length=2, max_length=100, label='Search Terms', required=False) 
    7     start_date = forms.DateField(required=False, )#widget=SelectDateWidget(years=range(1996,datetime.date.today().year+1))) 
    8     end_date = forms.DateField(required=False, )#widget=SelectDateWidget(years=range(1996,datetime.date.today().year+1))) 
    9     sort_by = forms.ChoiceField(choices=( 
    10                                         ('relevance', 'Relevance',), 
    11                                         ('date', 'Date',) 
    12                                         ,), 
    13                                 required=False, 
    14                                ) 
    15     indexes = forms.MultipleChoiceField(choices=( 
    16                                         ('articles', 'Articles',), 
    17                                         ('events', 'Events',) 
    18                                         ,), 
    19                                 label="What To Search", 
    20                                 widget=forms.CheckboxSelectMultiple, 
    21                                 required=False, 
    22                                ) 
     7class CourantSearchForm(FacetedSearchForm): 
     8    start_date = forms.DateField(required=False, widget=SelectDateWidget(years=range(Article.objects.order_by('published_at')[0].published_at.year,datetime.date.today().year+1))) 
     9    end_date = forms.DateField(required=False, widget=SelectDateWidget(years=range(Article.objects.order_by('published_at')[0].published_at.year,datetime.date.today().year+1))) 
     10     
     11    def __init__(self, *args, **kwargs): 
     12        super(CourantSearchForm, self).__init__(*args, **kwargs) 
     13        oldest_article_date = Article.objects.order_by('published_at')[0].published_at 
     14        self.fields['start_date'].widget = SelectDateWidget(years=range(oldest_article_date.year,datetime.date.today().year+1)) 
     15        self.fields['end_date'].widget = SelectDateWidget(years=range(oldest_article_date.year,datetime.date.today().year+1)) 
     16     
     17    def search(self): 
     18        sqs = super(CourantSearchForm, self).search() 
     19         
     20        if self.cleaned_data['start_date']: 
     21            sqs = sqs.filter(published_at__gte=self.cleaned_data['start_date']) 
     22         
     23        if self.cleaned_data['end_date']: 
     24            sqs = sqs.filter(published_at__lte=self.cleaned_data['end_date']) 
     25             
     26        return sqs 
  • trunk/courant/core/search/templatetags/search.py

    r33 r94  
    11from django.template import Library, Node, Variable 
    2 from courant.core.search.forms import SearchForm 
     2from courant.core.search.forms import CourantSearchForm 
    33 
    44register = Library() 
     5 
     6class SearchFacetCheck(Node): 
     7    def __init__(self, facet, value, varname): 
     8        self.facet = facet 
     9        self.value = value 
     10        self.varname = varname 
     11         
     12    def render(self, context): 
     13        request = context['request'] 
     14        facets = request.GET.getlist('selected_facets') 
     15        found = False 
     16        facet_type = unicode(Variable(self.facet).resolve(context)) 
     17        value = unicode(Variable(self.value).resolve(context)) 
     18        for facet in facets: 
     19            name, id = facet.split(':') 
     20            if name == facet_type and id == value: 
     21                found = True 
     22                break 
     23        context[self.varname] = found 
     24        return '' 
     25     
     26def do_search_facet_check(parser, token): 
     27    bits = token.contents.split() 
     28    if not len(bits) == 5: 
     29        raise TemplateSyntaxError, "search_facet_check syntax error" 
     30    return SearchFacetCheck(bits[1], bits[2], bits[4]) 
     31do_search_facet_check = register.tag('search_facet_check', do_search_facet_check) 
     32 
     33def strip_facet(url, facet, value): 
     34    to_remove = "&selected_facets=%s:%s" % (facet, value) 
     35    return url.replace('%3A', ':').replace(to_remove, '') 
     36register.simple_tag(strip_facet) 
    537 
    638class SearchFormNode(Node): 
     
    840        self.varname = varname 
    941    def render(self, context): 
    10         context[self.varname] = SearchForm(context['request'].GET) 
     42        context[self.varname] = CourantSearchForm(context['request'].GET) 
    1143        return '' 
    1244 
  • trunk/courant/core/search/urls.py

    r59 r94  
    11from django.conf.urls.defaults import * 
    2 from courant.core.search.views import * 
    32 
    4 from haystack.forms import ModelSearchForm 
     3import datetime 
     4 
    55from haystack.query import SearchQuerySet 
    6 from haystack.views import SearchView 
     6 
     7from courant.core.search.views import CourantSearchView 
     8from courant.core.search.forms import CourantSearchForm 
    79 
    810urlpatterns = patterns('', 
    911    url(r'', CourantSearchView(template='search/results_page.html', 
    10                                form_class=ModelSearchForm, 
    11                                searchqueryset=SearchQuerySet().all()), name="search"), 
     12                               form_class=CourantSearchForm, 
     13                               searchqueryset=SearchQuerySet().facet('section').facet('staffers').facet('type')), name="search"), 
    1214) 
  • trunk/courant/core/search/views.py

    r57 r94  
    1 from courant.core.search.forms import SearchForm 
     1from courant.core.search.forms import CourantSearchForm 
    22from courant.core.utils import render 
    33 
    4 from haystack.forms import ModelSearchForm 
    54from haystack.query import SearchQuerySet 
    6 from haystack.views import SearchView 
     5from haystack.views import FacetedSearchView 
    76 
    8 class CourantSearchView(SearchView): 
     7class CourantSearchView(FacetedSearchView): 
     8    def __call__(self, request): 
     9        self.request = request 
     10         
     11        self.date_sort = (request.GET.get('order', '') == 'date') 
     12         
     13        self.form = self.build_form() 
     14        self.query = self.get_query() 
     15        self.results = self.get_results() 
     16         
     17        return self.create_response() 
     18         
     19    def get_results(self): 
     20        if self.query: 
     21            if self.date_sort: 
     22                return self.form.search().order_by('-published_at') 
     23            return self.form.search() 
     24         
     25        return [] 
    926     
    10     def get_results(self): 
    11         r = super(CourantSearchView, self).get_results() 
    12         if isinstance(r, SearchQuerySet): 
    13             return r.load_all() 
    14         return r 
     27    def extra_context(self): 
     28        extra = {} 
     29         
     30        if self.query: 
     31            facets = self.form.search().facet_counts() 
     32             
     33            for field, values in facets['fields'].items(): 
     34                # sort in descending order 
     35                values.sort(lambda x,y:cmp(y[1],x[1])) 
     36                 
     37                # remove any null values or empty facets 
     38                # e.g., media don't have sections, so section facets show up as null 
     39                to_remove = [] 
     40                for index, value in enumerate(values): 
     41                    if value[0] == None or value[1] == 0: 
     42                        # must delete from end of list towards front, or else indexes will change 
     43                        to_remove.insert(0, index)  
     44                for index in to_remove: 
     45                    del values[index] 
    1546     
    16     def build_page(self): 
    17         return (None, self.results) 
    18  
    19 #import copy 
    20 #def search(request): 
    21 #    results = {} 
    22 #    params = request.GET.copy() #Since request.GET is immutable, we need to create a copy to manipulate 
    23 #     
    24 #    params.setlistdefault('indexes', ['articles',]) #Make sure at least articles is checked 
    25 #    indexes = copy.deepcopy(params.getlist('indexes')) #deepcopy so articles_delta isn't injected into params, messing up the form 
    26 #    if 'articles' in indexes: 
    27 #        indexes.append('articles_delta') 
    28 #    indexes = str(' '.join(indexes)) #Sphinx wants a string, not a unicode 
    29 #     
    30 #    form = SearchForm(params) 
    31 #     
    32 #    if request.GET['q']: 
    33 #        if form.is_valid(): 
    34 #            results = SphinxQuerySet(index=indexes).query(form.cleaned_data['q']).set_options(mode=SPH_MATCH_EXTENDED) 
    35 #            if form.cleaned_data['end_date']: 
    36 #                results = results.filter(date__lte=form.cleaned_data['end_date']) 
    37 #            if form.cleaned_data['start_date']: 
    38 #                results = results.filter(date__gte=form.cleaned_data['start_date']) 
    39 #            if form.cleaned_data['sort_by'] == 'date': 
    40 #                results = results.order_by('-date') 
    41 #    return render(request, ['search/results_page'], {'form': form, 'results':results, 'terms':request.GET['q'] }) 
     47            extra['facets'] = facets 
     48            extra['results'] = self.results 
     49            extra['sort_order'] = 'date' if self.date_sort else 'relevance' 
     50        return extra 
  • trunk/courant/core/staff/__init__.py

    r1 r94  
     1import search_indexes 
  • trunk/docs/ref/core/search.txt

    r14 r94  
    88    :synopsis: Powerful search of all content types. 
    99     
    10 Courant News currently builds upon `django-sphinx <http://code.google.com/p/django-sphinx/>`_, 
    11 but plans to switch to `haystack <http://haystacksearch.org/>`_ in the near future. 
     10Courant News currently builds upon `Haystack <http://haystacksearch.org/>`_ 
     11search library, which allows for use of various search backend options 
     12depending on your hosting situation and needs. 
     13 
     14For example, if you are on shared hosting that does not allow the installation 
     15of Java or custom C++ programs, you can use the `Whoosh <http://whoosh.ca/>`_ 
     16engine, which is python-only. However, if you can use Java, it is recommended 
     17that you use the `Solr <http://lucene.apache.org/solr/>`_ backend, which is far 
     18faster and can handle larger datasets. Read the Haystack documentation 
     19for more information on choosing a backend. 
     20 
     21Courant News makes some assumptions about what information to display on the 
     22search results page, but if you desire different behavior, simply define 
     23your own search views, forms, and URLs and include them in your site URLconf.