<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
 <record>
  <leader>     caa a22        4500</leader>
  <controlfield tag="001">605478279</controlfield>
  <controlfield tag="003">CHVBK</controlfield>
  <controlfield tag="005">20210128100404.0</controlfield>
  <controlfield tag="007">cr unu---uuuuu</controlfield>
  <controlfield tag="008">210128e20150901xx      s     000 0 eng  </controlfield>
  <datafield tag="024" ind1="7" ind2="0">
   <subfield code="a">10.1007/s10994-015-5484-1</subfield>
   <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="035" ind1=" " ind2=" ">
   <subfield code="a">(NATIONALLICENCE)springer-10.1007/s10994-015-5484-1</subfield>
  </datafield>
  <datafield tag="245" ind1="0" ind2="0">
   <subfield code="a">Policy gradient in Lipschitz Markov Decision Processes</subfield>
   <subfield code="h">[Elektronische Daten]</subfield>
   <subfield code="c">[Matteo Pirotta, Marcello Restelli, Luca Bascetta]</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
   <subfield code="a">This paper is about the exploitation of Lipschitz continuity properties for Markov Decision Processes to safely speed up policy-gradient algorithms. Starting from assumptions about the Lipschitz continuity of the state-transition model, the reward function, and the policies considered in the learning process, we show that both the expected return of a policy and its gradient are Lipschitz continuous w.r.t. policy parameters. By leveraging such properties, we define policy-parameter updates that guarantee a performance improvement at each iteration. The proposed methods are empirically evaluated and compared to other related approaches using different configurations of three popular control scenarios: the linear quadratic regulator, the mass-spring-damper system and the ship-steering control.</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
   <subfield code="a">The Author(s), 2015</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Reinforcement learning</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Markov Decision Process</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Lipschitz continuity</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Policy gradient algorithm</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Pirotta</subfield>
   <subfield code="D">Matteo</subfield>
   <subfield code="u">Department of Electronics, Information, and Bioengineering, Politecnico di Milano, Piazza Leonardo Da Vinci, 32, I-20133, Milan, Italy</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Restelli</subfield>
   <subfield code="D">Marcello</subfield>
   <subfield code="u">Department of Electronics, Information, and Bioengineering, Politecnico di Milano, Piazza Leonardo Da Vinci, 32, I-20133, Milan, Italy</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Bascetta</subfield>
   <subfield code="D">Luca</subfield>
   <subfield code="u">Department of Electronics, Information, and Bioengineering, Politecnico di Milano, Piazza Leonardo Da Vinci, 32, I-20133, Milan, Italy</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="773" ind1="0" ind2=" ">
   <subfield code="t">Machine Learning</subfield>
   <subfield code="d">Springer US; http://www.springer-ny.com</subfield>
   <subfield code="g">100/2-3(2015-09-01), 255-283</subfield>
   <subfield code="x">0885-6125</subfield>
   <subfield code="q">100:2-3&lt;255</subfield>
   <subfield code="1">2015</subfield>
   <subfield code="2">100</subfield>
   <subfield code="o">10994</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://doi.org/10.1007/s10994-015-5484-1</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">Onlinezugriff via DOI</subfield>
  </datafield>
  <datafield tag="898" ind1=" " ind2=" ">
   <subfield code="a">BK010053</subfield>
   <subfield code="b">XK010053</subfield>
   <subfield code="c">XK010000</subfield>
  </datafield>
  <datafield tag="900" ind1=" " ind2="7">
   <subfield code="a">Metadata rights reserved</subfield>
   <subfield code="b">Springer special CC-BY-NC licence</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="908" ind1=" " ind2=" ">
   <subfield code="D">1</subfield>
   <subfield code="a">research-article</subfield>
   <subfield code="2">jats</subfield>
  </datafield>
  <datafield tag="949" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="F">NATIONALLICENCE</subfield>
   <subfield code="b">NL-springer</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">856</subfield>
   <subfield code="E">40</subfield>
   <subfield code="u">https://doi.org/10.1007/s10994-015-5484-1</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">Onlinezugriff via DOI</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Pirotta</subfield>
   <subfield code="D">Matteo</subfield>
   <subfield code="u">Department of Electronics, Information, and Bioengineering, Politecnico di Milano, Piazza Leonardo Da Vinci, 32, I-20133, Milan, Italy</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Restelli</subfield>
   <subfield code="D">Marcello</subfield>
   <subfield code="u">Department of Electronics, Information, and Bioengineering, Politecnico di Milano, Piazza Leonardo Da Vinci, 32, I-20133, Milan, Italy</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Bascetta</subfield>
   <subfield code="D">Luca</subfield>
   <subfield code="u">Department of Electronics, Information, and Bioengineering, Politecnico di Milano, Piazza Leonardo Da Vinci, 32, I-20133, Milan, Italy</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">773</subfield>
   <subfield code="E">0-</subfield>
   <subfield code="t">Machine Learning</subfield>
   <subfield code="d">Springer US; http://www.springer-ny.com</subfield>
   <subfield code="g">100/2-3(2015-09-01), 255-283</subfield>
   <subfield code="x">0885-6125</subfield>
   <subfield code="q">100:2-3&lt;255</subfield>
   <subfield code="1">2015</subfield>
   <subfield code="2">100</subfield>
   <subfield code="o">10994</subfield>
  </datafield>
 </record>
</collection>
