<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
 <record>
  <leader>     caa a22        4500</leader>
  <controlfield tag="001">606159711</controlfield>
  <controlfield tag="003">CHVBK</controlfield>
  <controlfield tag="005">20210128100626.0</controlfield>
  <controlfield tag="007">cr unu---uuuuu</controlfield>
  <controlfield tag="008">210128e20150501xx      s     000 0 eng  </controlfield>
  <datafield tag="024" ind1="7" ind2="0">
   <subfield code="a">10.1007/s00521-014-1738-2</subfield>
   <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="035" ind1=" " ind2=" ">
   <subfield code="a">(NATIONALLICENCE)springer-10.1007/s00521-014-1738-2</subfield>
  </datafield>
  <datafield tag="245" ind1="0" ind2="2">
   <subfield code="a">A data-based online reinforcement learning algorithm satisfying probably approximately correct principle</subfield>
   <subfield code="h">[Elektronische Daten]</subfield>
   <subfield code="c">[Yuanheng Zhu, Dongbin Zhao]</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
   <subfield code="a">This paper proposes a probably approximately correct (PAC) algorithm that directly utilizes online data efficiently to solve the optimal control problem of continuous deterministic systems without system parameters for the first time. The dependence on some specific approximation structures is crucial to limit the wide application of online reinforcement learning (RL) algorithms. We utilize the online data directly with the kd-tree technique to remove this limitation. Moreover, we design the algorithm in the PAC principle. Complete theoretical proofs are presented, and three examples are simulated to verify its good performance. It draws the conclusion that the proposed RL algorithm specifies the maximum running time to reach a near-optimal control policy with only online data.</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
   <subfield code="a">The Natural Computing Applications Forum, 2014</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Reinforcement learning</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Probably approximately correct</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Kd-tree</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Zhu</subfield>
   <subfield code="D">Yuanheng</subfield>
   <subfield code="u">The State Key Laboratory of Management and Control for Complex Systems, Institution of Automation, Chinese Academy of Sciences, Beijing, China</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Zhao</subfield>
   <subfield code="D">Dongbin</subfield>
   <subfield code="u">The State Key Laboratory of Management and Control for Complex Systems, Institution of Automation, Chinese Academy of Sciences, Beijing, China</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="773" ind1="0" ind2=" ">
   <subfield code="t">Neural Computing and Applications</subfield>
   <subfield code="d">Springer London</subfield>
   <subfield code="g">26/4(2015-05-01), 775-787</subfield>
   <subfield code="x">0941-0643</subfield>
   <subfield code="q">26:4&lt;775</subfield>
   <subfield code="1">2015</subfield>
   <subfield code="2">26</subfield>
   <subfield code="o">521</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://doi.org/10.1007/s00521-014-1738-2</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">Onlinezugriff via DOI</subfield>
  </datafield>
  <datafield tag="898" ind1=" " ind2=" ">
   <subfield code="a">BK010053</subfield>
   <subfield code="b">XK010053</subfield>
   <subfield code="c">XK010000</subfield>
  </datafield>
  <datafield tag="900" ind1=" " ind2="7">
   <subfield code="a">Metadata rights reserved</subfield>
   <subfield code="b">Springer special CC-BY-NC licence</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="908" ind1=" " ind2=" ">
   <subfield code="D">1</subfield>
   <subfield code="a">research-article</subfield>
   <subfield code="2">jats</subfield>
  </datafield>
  <datafield tag="949" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="F">NATIONALLICENCE</subfield>
   <subfield code="b">NL-springer</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">856</subfield>
   <subfield code="E">40</subfield>
   <subfield code="u">https://doi.org/10.1007/s00521-014-1738-2</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">Onlinezugriff via DOI</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Zhu</subfield>
   <subfield code="D">Yuanheng</subfield>
   <subfield code="u">The State Key Laboratory of Management and Control for Complex Systems, Institution of Automation, Chinese Academy of Sciences, Beijing, China</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Zhao</subfield>
   <subfield code="D">Dongbin</subfield>
   <subfield code="u">The State Key Laboratory of Management and Control for Complex Systems, Institution of Automation, Chinese Academy of Sciences, Beijing, China</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">773</subfield>
   <subfield code="E">0-</subfield>
   <subfield code="t">Neural Computing and Applications</subfield>
   <subfield code="d">Springer London</subfield>
   <subfield code="g">26/4(2015-05-01), 775-787</subfield>
   <subfield code="x">0941-0643</subfield>
   <subfield code="q">26:4&lt;775</subfield>
   <subfield code="1">2015</subfield>
   <subfield code="2">26</subfield>
   <subfield code="o">521</subfield>
  </datafield>
 </record>
</collection>
