<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
 <record>
  <leader>     caa a22        4500</leader>
  <controlfield tag="001">463251431</controlfield>
  <controlfield tag="003">CHVBK</controlfield>
  <controlfield tag="005">20180405153341.0</controlfield>
  <controlfield tag="007">cr unu---uuuuu</controlfield>
  <controlfield tag="008">170326e20071201xx      s     000 0 eng  </controlfield>
  <datafield tag="024" ind1="7" ind2="0">
   <subfield code="a">10.1007/s10791-007-9033-9</subfield>
   <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="035" ind1=" " ind2=" ">
   <subfield code="a">(NATIONALLICENCE)springer-10.1007/s10791-007-9033-9</subfield>
  </datafield>
  <datafield tag="100" ind1="1" ind2=" ">
   <subfield code="a">Savoy</subfield>
   <subfield code="D">Jacques</subfield>
   <subfield code="u">Computer Science Department, University of Neuchatel, Rue Emile Argand 11, 2009, Neuchatel, Switzerland</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="245" ind1="1" ind2="0">
   <subfield code="a">Searching strategies for the Bulgarian language</subfield>
   <subfield code="h">[Elektronische Daten]</subfield>
   <subfield code="c">[Jacques Savoy]</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
   <subfield code="a">This paper reports on the underlying IR problems encountered when indexing and searching with the Bulgarian language. For this language we propose a general light stemmer and demonstrate that it can be quite effective, producing significantly better MAP (around + 34%) than an approach not applying stemming. We implement the GL2 model derived from the Divergence from Randomness paradigm and find its retrieval effectiveness better than other probabilistic, vector-space and language models. The resulting MAP is found to be about 50% better than the classical tf idf approach. Moreover, increasing the query size enhances the MAP by around 10% (from T to TD). In order to compare the retrieval effectiveness of our suggested stopword list and the light stemmer developed for the Bulgarian language, we conduct a set of experiments on another stopword list and also a more complex and aggressive stemmer. Results tend to indicate that there is no statistically significant difference between these variants and our suggested approach. This paper evaluates other indexing strategies such as 4-gram indexing and indexing based on the automatic decompounding of compound words. Finally, we analyze certain queries to discover why we obtained poor results, when indexing Bulgarian documents using the suggested word-based approach.</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
   <subfield code="a">Springer Science+Business Media, LLC, 2007</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Cross-language information retrieval</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Bulgarian IR</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Stemmer</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Evaluation</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Morphology</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="773" ind1="0" ind2=" ">
   <subfield code="t">Information Retrieval</subfield>
   <subfield code="d">Springer Netherlands</subfield>
   <subfield code="g">10/6(2007-12-01), 509-529</subfield>
   <subfield code="x">1386-4564</subfield>
   <subfield code="q">10:6&lt;509</subfield>
   <subfield code="1">2007</subfield>
   <subfield code="2">10</subfield>
   <subfield code="o">10791</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://doi.org/10.1007/s10791-007-9033-9</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">Onlinezugriff via DOI</subfield>
  </datafield>
  <datafield tag="908" ind1=" " ind2=" ">
   <subfield code="D">1</subfield>
   <subfield code="a">research-article</subfield>
   <subfield code="2">jats</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">856</subfield>
   <subfield code="E">40</subfield>
   <subfield code="u">https://doi.org/10.1007/s10791-007-9033-9</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">Onlinezugriff via DOI</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">100</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Savoy</subfield>
   <subfield code="D">Jacques</subfield>
   <subfield code="u">Computer Science Department, University of Neuchatel, Rue Emile Argand 11, 2009, Neuchatel, Switzerland</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">773</subfield>
   <subfield code="E">0-</subfield>
   <subfield code="t">Information Retrieval</subfield>
   <subfield code="d">Springer Netherlands</subfield>
   <subfield code="g">10/6(2007-12-01), 509-529</subfield>
   <subfield code="x">1386-4564</subfield>
   <subfield code="q">10:6&lt;509</subfield>
   <subfield code="1">2007</subfield>
   <subfield code="2">10</subfield>
   <subfield code="o">10791</subfield>
  </datafield>
  <datafield tag="900" ind1=" " ind2="7">
   <subfield code="a">Metadata rights reserved</subfield>
   <subfield code="b">Springer special CC-BY-NC licence</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="898" ind1=" " ind2=" ">
   <subfield code="a">BK010053</subfield>
   <subfield code="b">XK010053</subfield>
   <subfield code="c">XK010000</subfield>
  </datafield>
  <datafield tag="949" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="F">NATIONALLICENCE</subfield>
   <subfield code="b">NL-springer</subfield>
  </datafield>
 </record>
</collection>
