<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
 <record>
  <leader>     caa a22        4500</leader>
  <controlfield tag="001">605477949</controlfield>
  <controlfield tag="003">CHVBK</controlfield>
  <controlfield tag="005">20210128100403.0</controlfield>
  <controlfield tag="007">cr unu---uuuuu</controlfield>
  <controlfield tag="008">210128e20151001xx      s     000 0 eng  </controlfield>
  <datafield tag="024" ind1="7" ind2="0">
   <subfield code="a">10.1007/s10994-014-5476-6</subfield>
   <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="035" ind1=" " ind2=" ">
   <subfield code="a">(NATIONALLICENCE)springer-10.1007/s10994-014-5476-6</subfield>
  </datafield>
  <datafield tag="245" ind1="0" ind2="0">
   <subfield code="a">Additive regularization of topic models</subfield>
   <subfield code="h">[Elektronische Daten]</subfield>
   <subfield code="c">[Konstantin Vorontsov, Anna Potapenko]</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
   <subfield code="a">Probabilistic topic modeling of text collections has been recently developed mainly within the framework of graphical models and Bayesian inference. In this paper we introduce an alternative semi-probabilistic approach, which we call additive regularization of topic models (ARTM). Instead of building a purely probabilistic generative model of text we regularize an ill-posed problem of stochastic matrix factorization by maximizing a weighted sum of the log-likelihood and additional criteria. This approach enables us to combine probabilistic assumptions with linguistic and problem-specific requirements in a single multi-objective topic model. In the theoretical part of the work we derive the regularized EM-algorithm and provide a pool of regularizers, which can be applied together in any combination. We show that many models previously developed within Bayesian framework can be inferred easier within ARTM and in some cases generalized. In the experimental part we show that a combination of sparsing, smoothing, and decorrelation improves several quality measures at once with almost no loss of the likelihood.</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
   <subfield code="a">The Author(s), 2014</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Probabilistic topic modeling</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Regularization of ill-posed problems</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Probabilistic latent sematic analysis</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Latent Dirichlet allocation</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">EM-algorithm</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Vorontsov</subfield>
   <subfield code="D">Konstantin</subfield>
   <subfield code="u">Department of Intelligent Systems at Dorodnicyn Computing Centre of RAS, Institute of Physics and Technology, Moscow, Russia</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Potapenko</subfield>
   <subfield code="D">Anna</subfield>
   <subfield code="u">Computer Science Department, The Higher School of Economics, Moscow, Russia</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="773" ind1="0" ind2=" ">
   <subfield code="t">Machine Learning</subfield>
   <subfield code="d">Springer US; http://www.springer-ny.com</subfield>
   <subfield code="g">101/1-3(2015-10-01), 303-323</subfield>
   <subfield code="x">0885-6125</subfield>
   <subfield code="q">101:1-3&lt;303</subfield>
   <subfield code="1">2015</subfield>
   <subfield code="2">101</subfield>
   <subfield code="o">10994</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://doi.org/10.1007/s10994-014-5476-6</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">Onlinezugriff via DOI</subfield>
  </datafield>
  <datafield tag="898" ind1=" " ind2=" ">
   <subfield code="a">BK010053</subfield>
   <subfield code="b">XK010053</subfield>
   <subfield code="c">XK010000</subfield>
  </datafield>
  <datafield tag="900" ind1=" " ind2="7">
   <subfield code="a">Metadata rights reserved</subfield>
   <subfield code="b">Springer special CC-BY-NC licence</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="908" ind1=" " ind2=" ">
   <subfield code="D">1</subfield>
   <subfield code="a">research-article</subfield>
   <subfield code="2">jats</subfield>
  </datafield>
  <datafield tag="949" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="F">NATIONALLICENCE</subfield>
   <subfield code="b">NL-springer</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">856</subfield>
   <subfield code="E">40</subfield>
   <subfield code="u">https://doi.org/10.1007/s10994-014-5476-6</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">Onlinezugriff via DOI</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Vorontsov</subfield>
   <subfield code="D">Konstantin</subfield>
   <subfield code="u">Department of Intelligent Systems at Dorodnicyn Computing Centre of RAS, Institute of Physics and Technology, Moscow, Russia</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Potapenko</subfield>
   <subfield code="D">Anna</subfield>
   <subfield code="u">Computer Science Department, The Higher School of Economics, Moscow, Russia</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">773</subfield>
   <subfield code="E">0-</subfield>
   <subfield code="t">Machine Learning</subfield>
   <subfield code="d">Springer US; http://www.springer-ny.com</subfield>
   <subfield code="g">101/1-3(2015-10-01), 303-323</subfield>
   <subfield code="x">0885-6125</subfield>
   <subfield code="q">101:1-3&lt;303</subfield>
   <subfield code="1">2015</subfield>
   <subfield code="2">101</subfield>
   <subfield code="o">10994</subfield>
  </datafield>
 </record>
</collection>
