Jump to content

User:SBachenberg

From mediawiki.org

My Projects:

  1. SMW Extension:SolrStore

My Public SSH Key

How to use SolrStore as Remote Autocomplete for Semantic Forms

[edit]
  1. Edit your Schema.xml
    1. Add new FieldTypes needed for a good Autocompletetion
      <fieldType name="text_suggest" class="solr.TextField" positionIncrementGap="100">
              <analyzer type="index">
                  <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
                  <tokenizer class="solr.StandardTokenizerFactory"/>
                  <filter class="solr.WordDelimiterFilterFactory"
                      generateWordParts="1"
                      generateNumberParts="1"
                      catenateWords="1"
                      catenateNumbers="1"
                      catenateAll="1"
                      splitOnCaseChange="1"
                      splitOnNumerics="1"
                      preserveOriginal="1"
                  />
                  <filter class="solr.LowerCaseFilterFactory"/>
                  <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement=" " replace="all"/>
              </analyzer>
              <analyzer type="query">
                  <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
                  <tokenizer class="solr.StandardTokenizerFactory"/>
                  <filter class="solr.WordDelimiterFilterFactory"
                      generateWordParts="0"
                      generateNumberParts="0"
                      catenateWords="0"
                      catenateNumbers="0"
                      catenateAll="0"
                      splitOnCaseChange="0"
                      splitOnNumerics="0"
                  />
                  <filter class="solr.LowerCaseFilterFactory"/>
                  <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement=" " replace="all"/>
              </analyzer>
          </fieldType>    
      
          <fieldType name="text_suggest_edge" class="solr.TextField">
              <analyzer type="index">
                  <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
                  <tokenizer class="solr.KeywordTokenizerFactory"/>
                  <filter class="solr.LowerCaseFilterFactory"/>
                  <filter class="solr.PatternReplaceFilterFactory" pattern="([\.,;:-_])" replacement=" " replace="all"/>
                  <filter class="solr.EdgeNGramFilterFactory" maxGramSize="30" minGramSize="1"/>
                  <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/>
              </analyzer>
              <analyzer type="query">
                  <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
                  <tokenizer class="solr.KeywordTokenizerFactory"/>
                  <filter class="solr.LowerCaseFilterFactory"/>
                  <filter class="solr.PatternReplaceFilterFactory" pattern="([\.,;:-_])" replacement=" " replace="all"/>
                  <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/>
                  <filter class="solr.PatternReplaceFilterFactory" pattern="^(.{30})(.*)?" replacement="$1" replace="all"/>
              </analyzer>
          </fieldType>    
      
          <fieldType name="text_suggest_ngram" class="solr.TextField">
              <analyzer type="index">
                  <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
                  <tokenizer class="solr.StandardTokenizerFactory"/>
                  <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
                  <filter class="solr.LowerCaseFilterFactory"/>
                  <filter class="solr.EdgeNGramFilterFactory" maxGramSize="20" minGramSize="1"/>
                  <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/>
              </analyzer>
              <analyzer type="query">
                  <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
                  <tokenizer class="solr.StandardTokenizerFactory"/>
                  <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
                  <filter class="solr.LowerCaseFilterFactory"/>
                  <filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/>
              <filter class="solr.PatternReplaceFilterFactory" pattern="^(.{20})(.*)?" replacement="$1" replace="all"/>
              </analyzer>
          </fieldType>     
      
          <fieldType name="alphaNumericSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
              <analyzer>
                  <tokenizer class="solr.KeywordTokenizerFactory"/>
                  <filter class="solr.LowerCaseFilterFactory" />
                  <filter class="solr.TrimFilterFactory" />
                  <filter class="solr.PatternReplaceFilterFactory" pattern="^(a |the |les |la |le |l'|de la |du |des )" replacement="" replace="all" />
                  <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z0-9])" replacement="" replace="all"    />
              </analyzer>
          </fieldType>    
      
          <fieldType class="solr.TextField" name="text_auto">
           <analyzer>
            <tokenizer class="solr.KeywordTokenizerFactory"/>
            <filter class="solr.LowerCaseFilterFactory"/>
           </analyzer>
          </fieldType>
      
    2. Add the Fields needed
      <field name="person_suggest" type="text_suggest" indexed="true" stored="true"/>
      <field name="person_suggest_edge" type="text_suggest_edge" indexed="true" stored="true"/>
      <field name="person_suggest_ngram" type="text_suggest_ngram" indexed="true" stored="true"/>
      <field name="Person_s" type="text_auto" indexed="true" stored="true" />
      

      The Field we use in our Wiki is called "Person", so the automatically generated field is called "Person_s", because it's a string. We need do define it on our selfs because the auto generated field is multivalued and this doesn't work with multivalued fields!

    3. Create CopyFields for your new defined Fields
      <copyField source="Person_s" dest="person_suggest" />
      <copyField source="Person_s" dest="person_suggest_edge" />
      <copyField source="Person_s" dest="person_suggest_ngram" />
      
  2. Edit your solrconfig.xml
    <requestHandler name="/suggestPerson" class="solr.SearchHandler">
            <lst name="defaults">
                <str name="echoParams">explicit</str>
                <str name="defType">edismax</str>
                <str name="rows">10</str>
                <str name="fl">Person_s,dbkey,score</str>
                <str name="qf">person_suggest^15 person_suggest_ngram^25.0</str>
                <str name="pf">person_suggest_edge^25.0</str>
                <str name="sort">score desc,dbkey asc</str>
            </lst>
        </requestHandler>
    
  3. Restart your Solr and start a re-index
  4. Edit your Semantic Forms Extension
    1. go to <your-wiki>/extensions/SemanticForms/includes/
    2. Edit SF_Utils.php line 615 before:
      $return_values = array();
      foreach ( $data->sfautocomplete as $val ) {
             $return_values[] = (array)$val;
      }
      return $return_values;
      

      after:

      $return_values = array();
      if ( strpos($external_url_alias, 'solr') === false) {
             foreach ( $data->sfautocomplete as $val ) {
                  $return_values[] = (array)$val;
             }
      } else {        
             foreach ( $data->response->docs as $val ) {
                  $return_values[] = array( "title" => $val->{'Person_s'} );
             }
      }
      return $return_values;
      
  5. Edit your LocalSettings.php
    $sfgAutocompletionURLs['solrPerson'] = "$wgSolrUrl/suggestPerson?q=<substr>&wt=json&indent=true&omitHeader=true";
    
  6. Edit your Form
    {{{field|Person|input type=textarea with autocomplete|autogrow|values from url=solrPerson}}}