<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
 <record>
  <leader>     naa a22        4500</leader>
  <controlfield tag="001">528783246</controlfield>
  <controlfield tag="005">20180924065515.0</controlfield>
  <controlfield tag="007">cr unu---uuuuu</controlfield>
  <controlfield tag="008">180924e201706  xx      s     100 0 eng  </controlfield>
  <datafield tag="024" ind1="7" ind2="0">
   <subfield code="a">10.3929/ethz-b-000234616</subfield>
   <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="035" ind1=" " ind2=" ">
   <subfield code="a">(ETHRESEARCH)oai:www.research-collection.ethz.ch:20.500.11850/234616</subfield>
  </datafield>
  <datafield tag="245" ind1="0" ind2="0">
   <subfield code="a">Mison: A Fast JSON Parser for Data Analytics</subfield>
   <subfield code="h">[Elektronische Daten]</subfield>
   <subfield code="c">[Yinan Li, Nikos Katsipoulakis, Badrish Chandramouli, Jonathan Goldstein, Donald Kossman]</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
   <subfield code="b">Association for Computing Machinery</subfield>
   <subfield code="c">2017</subfield>
  </datafield>
  <datafield tag="506" ind1=" " ind2=" ">
   <subfield code="a">Open access</subfield>
   <subfield code="2">ethresearch</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
   <subfield code="a">The growing popularity of the JSON format has fueled increased interest in loading and processing JSON data within analytical data processing systems.  However, in many applications, JSON pars- ing dominates performance and cost.  In this paper, we present a new JSON parser called Mison that is particularly tailored to this class of applications,  by pushing down both projection and filter operators  of  analytical  queries  into  the  parser.   To  achieve  these features,  we  propose  to  deviate  from  the  traditional  approach  of building parsers using finite state machines (FSMs).  Instead, we follow  a  two-level  approach  that  enables  the  parser  to  jump  di- rectly to the correct position of a queried field without having to perform expensive tokenizing steps to find the field.  At the upper level, Mison speculatively predicts the logical locations of queried fields based on previously seen patterns in a dataset.  At the lower level, Mison builds structural indices on JSON data to map logi- cal locations to physical locations.  Unlike all existing FSM-based parsers, building structural indices converts control flow into data flow, thereby largely eliminating inherently unpredictable branches in the program and exploiting the parallelism available in modern processors. We experimentally evaluate Mison using representative real-world  JSON  datasets  and  the  TPC-H  benchmark,  and  show that Mison produces significant performance benefits over the best existing JSON parsers;  in some cases, the performance improve- ment is over one order of magnitude.</subfield>
  </datafield>
  <datafield tag="520" ind1="2" ind2=" ">
   <subfield code="a">43rd International Conference on Very Large Data Bases (VLDB 2017) in Munich, Germany (August 28 - September 1, 2017)</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
   <subfield code="a">Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International</subfield>
   <subfield code="u">http://creativecommons.org/licenses/by-nc-nd/4.0</subfield>
   <subfield code="2">ethresearch</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Li</subfield>
   <subfield code="D">Yinan</subfield>
   <subfield code="e">joint author</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Katsipoulakis</subfield>
   <subfield code="D">Nikos</subfield>
   <subfield code="e">joint author</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Chandramouli</subfield>
   <subfield code="D">Badrish</subfield>
   <subfield code="e">joint author</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Goldstein</subfield>
   <subfield code="D">Jonathan</subfield>
   <subfield code="e">joint author</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Kossman</subfield>
   <subfield code="D">Donald</subfield>
   <subfield code="e">joint author</subfield>
  </datafield>
  <datafield tag="773" ind1="0" ind2=" ">
   <subfield code="t">Proceedings of the VLDB Endowment</subfield>
   <subfield code="d">Association for Computing Machinery</subfield>
   <subfield code="g">pp. 1118-1129</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">http://hdl.handle.net/20.500.11850/234616</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">WWW-Backlink auf das Repository (Open access)</subfield>
  </datafield>
  <datafield tag="908" ind1=" " ind2=" ">
   <subfield code="D">1</subfield>
   <subfield code="a">Conference Paper</subfield>
   <subfield code="2">ethresearch</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">ETHRESEARCH</subfield>
   <subfield code="P">856</subfield>
   <subfield code="E">40</subfield>
   <subfield code="u">http://hdl.handle.net/20.500.11850/234616</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">WWW-Backlink auf das Repository (Open access)</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">ETHRESEARCH</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Li</subfield>
   <subfield code="D">Yinan</subfield>
   <subfield code="e">joint author</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">ETHRESEARCH</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Katsipoulakis</subfield>
   <subfield code="D">Nikos</subfield>
   <subfield code="e">joint author</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">ETHRESEARCH</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Chandramouli</subfield>
   <subfield code="D">Badrish</subfield>
   <subfield code="e">joint author</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">ETHRESEARCH</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Goldstein</subfield>
   <subfield code="D">Jonathan</subfield>
   <subfield code="e">joint author</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">ETHRESEARCH</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Kossman</subfield>
   <subfield code="D">Donald</subfield>
   <subfield code="e">joint author</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">ETHRESEARCH</subfield>
   <subfield code="P">773</subfield>
   <subfield code="E">0-</subfield>
   <subfield code="t">Proceedings of the VLDB Endowment</subfield>
   <subfield code="d">Association for Computing Machinery</subfield>
   <subfield code="g">pp. 1118-1129</subfield>
  </datafield>
  <datafield tag="898" ind1=" " ind2=" ">
   <subfield code="a">BK010053</subfield>
   <subfield code="b">XK010053</subfield>
   <subfield code="c">XK010000</subfield>
  </datafield>
  <datafield tag="949" ind1=" " ind2=" ">
   <subfield code="B">ETHRESEARCH</subfield>
   <subfield code="F">ETHRESEARCH</subfield>
   <subfield code="b">ETHRESEARCH</subfield>
   <subfield code="j">Conference Paper</subfield>
   <subfield code="c">Open access</subfield>
  </datafield>
 </record>
</collection>
