<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
 <record>
  <leader>     caa a22        4500</leader>
  <controlfield tag="001">463251350</controlfield>
  <controlfield tag="003">CHVBK</controlfield>
  <controlfield tag="005">20180405153341.0</controlfield>
  <controlfield tag="007">cr unu---uuuuu</controlfield>
  <controlfield tag="008">170326e20070101xx      s     000 0 eng  </controlfield>
  <datafield tag="024" ind1="7" ind2="0">
   <subfield code="a">10.1007/s10791-006-9004-6</subfield>
   <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="035" ind1=" " ind2=" ">
   <subfield code="a">(NATIONALLICENCE)springer-10.1007/s10791-006-9004-6</subfield>
  </datafield>
  <datafield tag="245" ind1="0" ind2="0">
   <subfield code="a">Extending WHIRL with background knowledge for improved text classification</subfield>
   <subfield code="h">[Elektronische Daten]</subfield>
   <subfield code="c">[Sarah Zelikovitz, William Cohen, Haym Hirsh]</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
   <subfield code="a">Intelligent use of the many diverse forms of data available on the Internet requires new tools for managing and manipulating heterogeneous forms of information. This paper uses WHIRL, an extension of relational databases that can manipulate textual data using statistical similarity measures developed by the information retrieval community. We show that although WHIRL is designed for more general similarity-based reasoning tasks, it is competitive with mature systems designed explicitly for inductive classification. In particular, WHIRL is well suited for combining different sources of knowledge in the classification process. We show on a diverse set of tasks that the use of appropriate sets of unlabeled background knowledge often decreases error rates, particularly if the number of examples or the size of the strings in the training set is small. This is especially useful when labeling text is a labor-intensive job and when there is a large amount of information available about a particular problem on the World Wide Web.</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
   <subfield code="a">Springer Science + Business Media, LLC, 2006</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Text categorization</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Background knowledge</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Semi-supervised learning</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Information retrieval</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="690" ind1=" " ind2="7">
   <subfield code="a">Query processing</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Zelikovitz</subfield>
   <subfield code="D">Sarah</subfield>
   <subfield code="u">Computer Science Department, College of Staten Island of CUNY, 2800 Victory Blvd, 10314, Staten Island, NY, USA</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Cohen</subfield>
   <subfield code="D">William</subfield>
   <subfield code="u">Center for Automated Learning and Discovery, Carnegie Mellon University, 500 Forbes Avenue, 15213, Pittsburgh, PA, USA</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="700" ind1="1" ind2=" ">
   <subfield code="a">Hirsh</subfield>
   <subfield code="D">Haym</subfield>
   <subfield code="u">Department of Computer Science, Rutgers University, 110 Frelinghuysen Road, 08854-8019, Piscataway, NJ, USA</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="773" ind1="0" ind2=" ">
   <subfield code="t">Information Retrieval</subfield>
   <subfield code="d">Kluwer Academic Publishers</subfield>
   <subfield code="g">10/1(2007-01-01), 35-67</subfield>
   <subfield code="x">1386-4564</subfield>
   <subfield code="q">10:1&lt;35</subfield>
   <subfield code="1">2007</subfield>
   <subfield code="2">10</subfield>
   <subfield code="o">10791</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2="0">
   <subfield code="u">https://doi.org/10.1007/s10791-006-9004-6</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">Onlinezugriff via DOI</subfield>
  </datafield>
  <datafield tag="908" ind1=" " ind2=" ">
   <subfield code="D">1</subfield>
   <subfield code="a">research-article</subfield>
   <subfield code="2">jats</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">856</subfield>
   <subfield code="E">40</subfield>
   <subfield code="u">https://doi.org/10.1007/s10791-006-9004-6</subfield>
   <subfield code="q">text/html</subfield>
   <subfield code="z">Onlinezugriff via DOI</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Zelikovitz</subfield>
   <subfield code="D">Sarah</subfield>
   <subfield code="u">Computer Science Department, College of Staten Island of CUNY, 2800 Victory Blvd, 10314, Staten Island, NY, USA</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Cohen</subfield>
   <subfield code="D">William</subfield>
   <subfield code="u">Center for Automated Learning and Discovery, Carnegie Mellon University, 500 Forbes Avenue, 15213, Pittsburgh, PA, USA</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">700</subfield>
   <subfield code="E">1-</subfield>
   <subfield code="a">Hirsh</subfield>
   <subfield code="D">Haym</subfield>
   <subfield code="u">Department of Computer Science, Rutgers University, 110 Frelinghuysen Road, 08854-8019, Piscataway, NJ, USA</subfield>
   <subfield code="4">aut</subfield>
  </datafield>
  <datafield tag="950" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="P">773</subfield>
   <subfield code="E">0-</subfield>
   <subfield code="t">Information Retrieval</subfield>
   <subfield code="d">Kluwer Academic Publishers</subfield>
   <subfield code="g">10/1(2007-01-01), 35-67</subfield>
   <subfield code="x">1386-4564</subfield>
   <subfield code="q">10:1&lt;35</subfield>
   <subfield code="1">2007</subfield>
   <subfield code="2">10</subfield>
   <subfield code="o">10791</subfield>
  </datafield>
  <datafield tag="900" ind1=" " ind2="7">
   <subfield code="a">Metadata rights reserved</subfield>
   <subfield code="b">Springer special CC-BY-NC licence</subfield>
   <subfield code="2">nationallicence</subfield>
  </datafield>
  <datafield tag="898" ind1=" " ind2=" ">
   <subfield code="a">BK010053</subfield>
   <subfield code="b">XK010053</subfield>
   <subfield code="c">XK010000</subfield>
  </datafield>
  <datafield tag="949" ind1=" " ind2=" ">
   <subfield code="B">NATIONALLICENCE</subfield>
   <subfield code="F">NATIONALLICENCE</subfield>
   <subfield code="b">NL-springer</subfield>
  </datafield>
 </record>
</collection>
