<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>Unlocking data</title>
	<atom:link href="http://unlockdata.wordpress.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://unlockdata.wordpress.com</link>
	<description>Web services to find data in your data</description>
	<lastBuildDate>Thu, 22 Jul 2010 09:25:37 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='unlockdata.wordpress.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://s2.wp.com/i/buttonw-com.png</url>
		<title>Unlocking data</title>
		<link>http://unlockdata.wordpress.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://unlockdata.wordpress.com/osd.xml" title="Unlocking data" />
	<atom:link rel='hub' href='http://unlockdata.wordpress.com/?pushpress=hub'/>
		<item>
		<title>Moving the Unlock blog</title>
		<link>http://unlockdata.wordpress.com/2010/04/30/moving-the-unlock-blog-to-posterous/</link>
		<comments>http://unlockdata.wordpress.com/2010/04/30/moving-the-unlock-blog-to-posterous/#comments</comments>
		<pubDate>Fri, 30 Apr 2010 15:44:10 +0000</pubDate>
		<dc:creator>Jo Walsh</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://unlockdata.wordpress.com/2010/04/30/moving-the-unlock-blog-to-posterous/</guid>
		<description><![CDATA[We have proper blog hosting set up at EDINA so we&#8217;re moving the Unlock service blog to a new home: http://unlock.blogs.edina.ac.uk/ The past contents will stay here and also be duplicated at the new blog. Thanks.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=232&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>We have proper blog hosting set up at <a href="http://edina.ac.uk">EDINA</a> so we&#8217;re moving the <a href="http://unlock.edina.ac.uk">Unlock service</a> blog to a new home:</p>
<h2><a title="Unlock service blog" href="http://unlock.blogs.edina.ac.uk/"><strong>http://unlock.blogs.edina.ac.uk/</strong></a></h2>
<p>The past contents will stay here and also be duplicated at the new blog. Thanks.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unlockdata.wordpress.com/232/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unlockdata.wordpress.com/232/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unlockdata.wordpress.com/232/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unlockdata.wordpress.com/232/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unlockdata.wordpress.com/232/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unlockdata.wordpress.com/232/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unlockdata.wordpress.com/232/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unlockdata.wordpress.com/232/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unlockdata.wordpress.com/232/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unlockdata.wordpress.com/232/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unlockdata.wordpress.com/232/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unlockdata.wordpress.com/232/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unlockdata.wordpress.com/232/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unlockdata.wordpress.com/232/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=232&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unlockdata.wordpress.com/2010/04/30/moving-the-unlock-blog-to-posterous/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6510bf72cbae293e2d25fc17b2a75e15?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">unlockjo</media:title>
		</media:content>
	</item>
		<item>
		<title>Unlock Places API — version 2.2</title>
		<link>http://unlockdata.wordpress.com/2010/04/21/unlock-places-api-version-2-2/</link>
		<comments>http://unlockdata.wordpress.com/2010/04/21/unlock-places-api-version-2-2/#comments</comments>
		<pubDate>Wed, 21 Apr 2010 11:42:12 +0000</pubDate>
		<dc:creator>Joe Vernon</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://unlockdata.wordpress.com/?p=222</guid>
		<description><![CDATA[The Unlock Places API was recently upgraded to include Ordnance Survey&#8217;s Open data. This feature rich data from Code-Point Open, Boundary-Line and the 1:50,000 gazetteer includes placenames and locations (points, boxes and shapes) and is now open for all to use! You can just get started with the API. We&#8217;ve also added new functionality to [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=222&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><a href="http://unlockdata.files.wordpress.com/2010/04/edinburgh_feature.png"><img class="size-medium wp-image-224 alignright" title="Feature (9656), Edinburgh, HTML view" src="http://unlockdata.files.wordpress.com/2010/04/edinburgh_feature.png?w=300&#038;h=157" alt="" width="300" height="157" /></a>The Unlock Places API was recently upgraded to include <strong>Ordnance Survey&#8217;s <a href="http://www.ordnancesurvey.co.uk/oswebsite/opendata/">Open</a> data</strong>. This feature rich data from Code-Point Open, Boundary-Line and the 1:50,000 gazetteer includes placenames and locations (points, boxes and shapes) and is now open for all to use! You can <a href="http://unlock.edina.ac.uk/getstarted.html">just get started with the API</a>.</p>
<p>We&#8217;ve also added new functionality to the service, including an HTML view for features, more feature attributes, the ability to request request results in different coordinate systems as well as the usual speed improvements and bug-fixes.</p>
<p>The new data and features are available from Tuesday, 20th April 2010. Please visit the <a href="http://unlock.edina.ac.uk/queries.html">example queries</a> page to try out some of the queries.</p>
<p>We welcome any feedback on the new features &#8211; and if there&#8217;s anything you&#8217;d like to see in future versions of <a href="http://unlock.edina.ac.uk/">Unlock</a>, please let us know.  Alternatively, why not just get in touch to let us know how you&#8217;re using the service, we&#8217;d love to hear from you!</p>
<p>Full details of the changes are listed below the fold.</p>
<p><span id="more-222"></span></p>
<p><strong>Ordnance Survey Open data</strong><br />
Version 2.2. of the Unlock Places API brings a huge open data set from the Ordnance Survey, free for all to use! We have included data from:</p>
<ul>
<li>Boundary Line (open)</li>
<li>Code Point (open)</li>
<li>1:50,000 gazetteer (open)</li>
<li>Meridian2</li>
<li>OS Grid References</li>
</ul>
<p>Users from Digimap subscribing academic institutions can still register for an API key, which will grant them further access to additional, richer Ordnance Survey products:</p>
<ul>
<li>Boundary Line (non-open)</li>
<li>1:50,000 gazetteer (non-open)</li>
<li>Code Point Polygon (non-open)</li>
<li>MasterMap</li>
</ul>
<p>We&#8217;re working on including richer sources of data that provide global coverage, including the <a href="http://www.naturalearthdata.com/">Natural Earth</a> administrative boundary shapes, and shapes of things in <a href="http://www.openstreetmap.org/">OpenStreetmap</a>. Please <a href="http://unlock.edina.ac.uk/contact.html">contact us</a> if you know an open data source you&#8217;d really like to see included.</p>
<p><strong>HTML feature &amp; footprint views</strong><br />
A new HTML output format is available for features and footprints. This provides a web based view of the search results, with brief information. When performing a &#8216;featureLookup&#8217; to retrieve a single feature (or if a search returns a single result), more detailed information is listed with a web map showing the available geometry for its footprint. Links to footprint geometries in different output formats are also available. To see this format, simply add the format parameter &#8216;format=html&#8217; to your request, for example:</p>
<p><a href="http://unlockdata.files.wordpress.com/2010/04/edinburgh_121_found.png"><img class="aligncenter size-medium wp-image-227" title="Searching for 'Edinburgh', 121 features found" src="http://unlockdata.files.wordpress.com/2010/04/edinburgh_121_found.png?w=300&#038;h=182" alt="" width="300" height="182" /></a></p>
<p style="text-align:center;"><a href="http://unlock.edina.ac.uk/ws/nameSearch?name=edinburgh&amp;format=html">http://unlock.edina.ac.uk/ws/nameSearch?name=edinburgh&amp;format=html</a></p>
<p style="text-align:left;">Features also still have unique URLs, which can also be addressed as:</p>
<p style="text-align:left;"><a href="http://unlock.edina.ac.uk/feature/9657.html">http://unlock.edina.ac.uk/feature/9657.html</a></p>
<p><strong>Administration Boundaries</strong><br />
Features now include a hierarchy of administration boundaries (counties, unitary authorities, countries etc) to which they belong. For example, the village of Aviemore (feature 9762049) exists in:</p>
<ul>
<li>Level 1: Scotland Euro Region</li>
<li>Level 2: Highland</li>
</ul>
<p><strong>Source IDs and Alternative feature IDs</strong><br />
Feature data responses also contain some additional attributes, including the source identifier number &#8211; i.e. the original feature ID from Ordnance Survey and GeoNames.</p>
<p>Where available, feature data now also includes a link to the identifiers of duplicate features from different gazetteers. There may be entries for the same real-world feature in multiple gazetteers &#8211; this feature allows us to see entries matching the same place.</p>
<p><strong>Spatial Reference System (srs) parameter</strong><br />
Requests can now include an SRS parameter, which currently supports 4326, 27700 and 900913. Coordinates will be reprojected and returned in the desired spatial reference system &#8211; for example:</p>
<p><a href="http://unlock.edina.ac.uk/ws/nameSearch?name=Edinburgh&amp;srs=27700">http://unlock.edina.ac.uk/ws/nameSearch?name=Edinburgh&amp;srs=27700</a></p>
<p><strong>Various bug fixes, refactoring and speed improvements and updates to webpages.</strong></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unlockdata.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unlockdata.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unlockdata.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unlockdata.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unlockdata.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unlockdata.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unlockdata.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unlockdata.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unlockdata.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unlockdata.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unlockdata.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unlockdata.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unlockdata.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unlockdata.wordpress.com/222/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=222&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unlockdata.wordpress.com/2010/04/21/unlock-places-api-version-2-2/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<georss:point>55.943593 -3.181962</georss:point>
		<geo:lat>55.943593</geo:lat>
		<geo:long>-3.181962</geo:long>
		<media:content url="http://0.gravatar.com/avatar/ad7559c147ce3d26503b44cdfc26710b?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joevernon</media:title>
		</media:content>

		<media:content url="http://unlockdata.files.wordpress.com/2010/04/edinburgh_feature.png?w=300" medium="image">
			<media:title type="html">Feature (9656), Edinburgh, HTML view</media:title>
		</media:content>

		<media:content url="http://unlockdata.files.wordpress.com/2010/04/edinburgh_121_found.png?w=300" medium="image">
			<media:title type="html">Searching for 'Edinburgh', 121 features found</media:title>
		</media:content>
	</item>
		<item>
		<title>Linking Placename Authorities</title>
		<link>http://unlockdata.wordpress.com/2010/04/09/linking-placename-authorities/</link>
		<comments>http://unlockdata.wordpress.com/2010/04/09/linking-placename-authorities/#comments</comments>
		<pubDate>Fri, 09 Apr 2010 10:22:01 +0000</pubDate>
		<dc:creator>Jo Walsh</dc:creator>
				<category><![CDATA[gazetteer]]></category>
		<category><![CDATA[Historic]]></category>
		<category><![CDATA[JISC]]></category>
		<category><![CDATA[Linked Data]]></category>
		<category><![CDATA[placenames]]></category>

		<guid isPermaLink="false">http://unlockdata.wordpress.com/?p=208</guid>
		<description><![CDATA[Putting together a proposal for JISC call 02/10 based on a suggestion from Paul Ell at CDDA in Belfast. Why post it here? I think there&#8217;s value in working on these things in a more public way, and I&#8217;d like to know who else would find the work useful. Summary Generating a gazetteer of historic [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=208&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><img src="http://unlockdata.files.wordpress.com/2010/04/epns1.png?w=450" alt="" title="EPNS"   class="alignright size-full wp-image-214" /><br />
Putting together a proposal for <a href="http://www.jisc.ac.uk/fundingopportunities/funding_calls/2010/03/210depositexpose.aspx">JISC call 02/10</a> based on a suggestion from Paul Ell at <a href="http://www.qub.ac.uk/research-centres/CentreforDataDigitisationandAnalysis/">CDDA</a> in Belfast. Why post it here? I think there&#8217;s value in working on these things in a more public way, and I&#8217;d like to know who else would find the work useful.</p>
<h3>Summary</h3>
<p>Generating a gazetteer of historic UK placenames, linked to documents and authority files in Linked Data form. Both working with existing placename <a href="http://en.wikipedia.org/wiki/Authority_control">authority files</a>, and generating new authority files by extracting geographic names from text documents. Using the Edinburgh Geoparser to &#8220;georesolve&#8221; placenames and link them to widely-used geographic entities on the Linked Data web.</p>
<h4>Background</h4>
<p><a href="http://geodigref.edina.ac.uk/">GeoDigRef</a> was a JISC project to extract references to people and places from several very large digitised collections, to make them easier to search. The <a href="http://unlockdata.wordpress.com/2010/03/04/the-edinburgh-geoparser-and-the-stormont-hansards/">Edinburgh Geoparser</a> was adapted to extract place references from large collections.</p>
<p>One roadblock in this and other projects has been the lack of open historic placename gazetteer for the UK.</p>
<p>Placenames in authority files, and placenames text-mined from documents, can be turned into geographic links that connect items in collections with each other and with the Linked Data web; a historic gazetteer for the UK can be built as a byproduct.</p>
<h4>Proposal</h4>
<p><strong>Firstly</strong>, working with placename authority files from existing collections, starting with the existing digitised volumes from the <a href="http://www.nottingham.ac.uk/~aezins//survey/">English Place Name Survey</a> as a basis.</p>
<p>Where place names are found, they can be linked to the corresponding Linked Data entity in <a href="http://geonames.org/">geonames.org</a>, the motherlode of place name links on the Linked Data web, using the <em>georesolver</em> component of the Edinburgh Geoparser.  </p>
<p><strong>Secondly</strong>, using the geoparser to extract placename references from documents and using those placenames to seed an authority file, which can then be resolved in the same way.</p>
<p>An open source web-based tool will help users link places to one another, remove false positives found by the geoparser, and publish the results as RDF using an open data license. </p>
<p>Historic names will be imported back into the Unlock place search service.</p>
<h4>Context</h4>
<p>This will leave behind a toolset for others to use, as well as creating new reference data. </p>
<p>Building on work done at the Open Knowledge Foundation to convert MARC/MADS bibliographic resources to RDF and add geographic links.</p>
<p>Making re-use of existing digitised resources from CDDA to help make them discoverable, provide a path in to researchers.</p>
<p>Geonames.org has some historic coverage, but it is hit and miss (E.g. &#8220;London&#8221; has &#8220;Londinium&#8221; as an alternate name, but at the contemporary location). The new OS OpenData sources are all contemporary.</p>
<p>Once a placename is found in a text, it may not be found in a gazetteer. The more places correctly located, the higher the likelihood that other places mentioned in a document will also be correctly located. More historic coverage means better georeferencing for more archival collections.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unlockdata.wordpress.com/208/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unlockdata.wordpress.com/208/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unlockdata.wordpress.com/208/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unlockdata.wordpress.com/208/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unlockdata.wordpress.com/208/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unlockdata.wordpress.com/208/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unlockdata.wordpress.com/208/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unlockdata.wordpress.com/208/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unlockdata.wordpress.com/208/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unlockdata.wordpress.com/208/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unlockdata.wordpress.com/208/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unlockdata.wordpress.com/208/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unlockdata.wordpress.com/208/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unlockdata.wordpress.com/208/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=208&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unlockdata.wordpress.com/2010/04/09/linking-placename-authorities/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6510bf72cbae293e2d25fc17b2a75e15?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">unlockjo</media:title>
		</media:content>

		<media:content url="http://unlockdata.files.wordpress.com/2010/04/epns1.png" medium="image">
			<media:title type="html">EPNS</media:title>
		</media:content>
	</item>
		<item>
		<title>Work in progress with OS Open Data</title>
		<link>http://unlockdata.wordpress.com/2010/04/02/work-in-progress/</link>
		<comments>http://unlockdata.wordpress.com/2010/04/02/work-in-progress/#comments</comments>
		<pubDate>Fri, 02 Apr 2010 13:23:28 +0000</pubDate>
		<dc:creator>Jo Walsh</dc:creator>
				<category><![CDATA[gazetteer]]></category>
		<category><![CDATA[open data]]></category>
		<category><![CDATA[OS data]]></category>
		<category><![CDATA[placenames]]></category>

		<guid isPermaLink="false">http://unlockdata.wordpress.com/?p=193</guid>
		<description><![CDATA[The April 1st release of many Ordnance Survey datasets as open data is great news for us at Unlock. As hoped for, Boundary-Line (administrative boundaries), the 50K gazetteer of placenames and a modified version of Code-Point (postal locations) are now open data. We&#8217;ll be putting these datasets into the open access part of Unlock Places, [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=193&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>The April 1st release of many <a href="http://opendata.ordnancesurvey.co.uk/">Ordnance Survey datasets as open data</a> is great news for us at Unlock. As hoped for, Boundary-Line (administrative boundaries), the 50K gazetteer of placenames and a modified version of Code-Point (postal locations) are now open data.</p>
<p><img class="alignright size-medium wp-image-197" title="Boundary Line of Edinburgh shown on Google earth. Contains Ordnance Survey data © Crown copyright and database right 2010" src="http://unlockdata.files.wordpress.com/2010/04/unlock_screenshot_1.png?w=150&#038;h=89" alt="Boundary Line of Edinburgh shown on Google earth. Contains Ordnance Survey data © Crown copyright and database right 2010" width="150" height="89" /></p>
<p>We&#8217;ll be putting these datasets into the open access part of <a href="http://unlock.edina.ac.uk/places.html">Unlock Places</a>, our place search service, and opening up <a href="http://unlock.edina.ac.uk/geocodes.html">Unlock Geocodes</a> based on Code-Point Open. However, this is going to take a week or two, because we&#8217;re also adding some new features to Unlock&#8217;s search and results.</p>
<p>Currently, registered academic users are able to:</p>
<ul>
<li>Grab shapes and bounding boxes in KML or GeoJSON &#8211; no need for GIS software, re-use in web applications</li>
<li>Search by bounding box and feature type as well as place name</li>
<li>See properties of shapes (area, perimeter, central point) useful for statistics visualisation</li>
</ul>
<p>And in soon we&#8217;ll be publishing these new features currently in testing:</p>
<ul>
<li>Relationships between places &#8211; cities, counties and regions containing found places &#8211; in the default results</li>
<li>Re-project points and shapes into different coordinate reference systems</li>
</ul>
<p>These have been added so we can finally plug the Unlock Places search into EDINA&#8217;s <a href="http://digimap.edina.ac.uk/">Digimap</a> service.</p>
<p>Having Boundary-Line shapes in our open data gazetteer will mean we can return bounding boxes or polygons through <a href="http://unlock.edina.ac.uk/text.html">Unlock Text</a>, which extracts placenames from documents and metadata. This will help to open up new research directions for our work with the <a href="http://www.ltg.ed.ac.uk/">Language Technology Group</a> at Informatics in Edinburgh.</p>
<p>There are some organisations we&#8217;d love to collaborate with (almost next door, the <a href="http://geo.nls.uk/">Map Library at the National Library of Scotland</a> and the <a href="http://www.scotlandsplaces.gov.uk">Royal Commission on Ancient and Historical Monuments of Scotland</a>) but have been unable to, because Unlock and its predecessor GeoCrossWalk were limited by license to academic use only. I look forward to seeing all the things the OS Open Data release has now made possible.</p>
<p>I&#8217;m also excited to see what re-use we and others could make of the <a href="http://data.ordnancesurvey.co.uk/">Linked Data published by Ordnance Survey Research</a>, and what their approach will be to connecting shapes to their administrative model.</p>
<p>MasterMap, the highest-detail OS dataset, wasn&#8217;t included in the open release. Academic subscribers to the Digimap Ordnance Survey Collection get access to places extracted from MasterMap, and improvements to other datasets created using MasterMap, with an <a href="http://unlock.edina.ac.uk/register">Unlock Places API key</a>.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unlockdata.wordpress.com/193/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unlockdata.wordpress.com/193/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unlockdata.wordpress.com/193/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unlockdata.wordpress.com/193/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unlockdata.wordpress.com/193/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unlockdata.wordpress.com/193/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unlockdata.wordpress.com/193/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unlockdata.wordpress.com/193/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unlockdata.wordpress.com/193/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unlockdata.wordpress.com/193/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unlockdata.wordpress.com/193/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unlockdata.wordpress.com/193/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unlockdata.wordpress.com/193/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unlockdata.wordpress.com/193/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=193&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unlockdata.wordpress.com/2010/04/02/work-in-progress/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<georss:point>0.000000 0.000000</georss:point>
		<geo:lat>0.000000</geo:lat>
		<geo:long>0.000000</geo:long>
		<media:content url="http://0.gravatar.com/avatar/6510bf72cbae293e2d25fc17b2a75e15?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">unlockjo</media:title>
		</media:content>

		<media:content url="http://unlockdata.files.wordpress.com/2010/04/unlock_screenshot_1.png?w=150" medium="image">
			<media:title type="html">Boundary Line of Edinburgh shown on Google earth. Contains Ordnance Survey data © Crown copyright and database right 2010</media:title>
		</media:content>
	</item>
		<item>
		<title>Notes from EEO talk on population modelling with GIS</title>
		<link>http://unlockdata.wordpress.com/2010/03/22/notes-from-eeo-talk-on-population-modelling-with-gis/</link>
		<comments>http://unlockdata.wordpress.com/2010/03/22/notes-from-eeo-talk-on-population-modelling-with-gis/#comments</comments>
		<pubDate>Mon, 22 Mar 2010 11:42:04 +0000</pubDate>
		<dc:creator>Jo Walsh</dc:creator>
				<category><![CDATA[census]]></category>
		<category><![CDATA[statistics]]></category>

		<guid isPermaLink="false">http://unlockdata.wordpress.com/?p=187</guid>
		<description><![CDATA[David Martin spoke in the EEO seminar series last Friday. Here are my notes: In the last decades we have become &#8220;sophisticated in our tools, but our fundamental techniques and results aren&#8217;t very different&#8221;. Census data is not the same as demographic data, however census approaches to modelling population have become dominant &#8211; a &#8220;long-term [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=187&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><a href="http://www.southampton.ac.uk/geography/staff_profiles/academic/djm1.html">David Martin</a> spoke in the EEO seminar series last Friday. Here are my notes:</p>
<p>In the last decades we have become &#8220;sophisticated in our tools, but our fundamental techniques and results aren&#8217;t very different&#8221;. Census data is not the same as demographic data, however census approaches to modelling population have become dominant &#8211; a &#8220;long-term reliance on census-based shaded area map to inform spatial decision-making.</p>
<p>Importance of small area population mapping for policy &#8211; resource allocation and site location decisions, calculation of prevalence rates. &#8220;Who is present in a small area, and what characteristics do they have&#8221;. A house or flat becomes a &#8220;proxy&#8221; for a person, who is tied to the space.</p>
<p>This doesn&#8217;t give a clear usage picture, specifically it is night-time activity rather than day time which has very different patterns of repetition and variation of movement.</p>
<p>More general problems with census-taking &#8211; </p>
<ul>
<li>underenumeration</li>
<li>infrequency</li>
<li>spatially concentrated error</li>
</ul>
<p>&#8220;We could cut the city differently and produce variations in the pattern&#8221; &#8211; research in automated generation of census zones, looking for areas with social homogeneity, size, population, based on previous samplings.</p>
<p>&#8220;Population distribution is not space-filling but is quasi-continuous&#8221;. </p>
<p>&#8220;Interest in surfaces, grids and <a href="http://en.wikipedia.org/wiki/Dasymetric_map">dasymetric</a> approaches&#8221;. Using a grid to slice and visualise population data. The grid gives us a finer grained depiction of actualy activity.</p>
<p>Interestingly, shift in government policy regarding census taking. Rapid development of space, and new tech, cause problems &#8211; people are more mobile, with multiple bases; concerns about data privacy are more mainstream.<br />
The US Census Bureau has dropped the &#8220;long-form&#8221; return which used to go to one in six recipients. In France the idea of a periodic census has been dropped completely, they now conduct a &#8220;rolling census&#8221; compiled from different data sources.</p>
<p>&#8220;Register-based sources&#8221; &#8211; e.g. demographic data is held by health services, local government, transport providers, business associations, communications companies. It&#8217;s possible to &#8220;produce something census-like&#8221;, but richer, by correlating these sources.</p>
<p>Also the cross-section of other sources gives an idea of where census records are flawed and persistently inaccurate, e.g. council tax records not corresponding to where people claim they live.</p>
<h4>Towards new representations of time-space</h4>
<p>Temporal issues still neglected by geodata specialists, in fact some of the issues are gnarlier and trickier than spatial representation is.</p>
<p><a href="http://www.southampton.ac.uk/geography/research/rssa/pop247/index.html">space–time specific population surface modelling</a>.</p>
<p>Dr Martin identified &#8220;emergent issues&#8221; affecting this practise- &#8220;Spatial units, data sources as streams, representational concepts&#8221;. His group has a some software in development to document the algorithm for gridding data space &#8211; I wanted to ask whether the software and implicitly the algorithm would be released as open source.</p>
<p>A thought about gridded data is that it&#8217;s straightforward to recombine (given grid cells for different sources are the same size). Something like <a href="http://www.opengeospatial.org/standards/wcs">OGC WCS</a> but much, simpler.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unlockdata.wordpress.com/187/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unlockdata.wordpress.com/187/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unlockdata.wordpress.com/187/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unlockdata.wordpress.com/187/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unlockdata.wordpress.com/187/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unlockdata.wordpress.com/187/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unlockdata.wordpress.com/187/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unlockdata.wordpress.com/187/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unlockdata.wordpress.com/187/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unlockdata.wordpress.com/187/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unlockdata.wordpress.com/187/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unlockdata.wordpress.com/187/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unlockdata.wordpress.com/187/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unlockdata.wordpress.com/187/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=187&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unlockdata.wordpress.com/2010/03/22/notes-from-eeo-talk-on-population-modelling-with-gis/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6510bf72cbae293e2d25fc17b2a75e15?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">unlockjo</media:title>
		</media:content>
	</item>
		<item>
		<title>OpenSearch Geospatial in progress</title>
		<link>http://unlockdata.wordpress.com/2010/03/15/opensearch-geospatial/</link>
		<comments>http://unlockdata.wordpress.com/2010/03/15/opensearch-geospatial/#comments</comments>
		<pubDate>Mon, 15 Mar 2010 16:59:57 +0000</pubDate>
		<dc:creator>Jo Walsh</dc:creator>
				<category><![CDATA[gazetteer]]></category>
		<category><![CDATA[opensearch]]></category>
		<category><![CDATA[search]]></category>

		<guid isPermaLink="false">http://unlockdata.wordpress.com/?p=171</guid>
		<description><![CDATA[One promising presentation I saw last week at the Jornadas SIG Libre &#8211; Oscar Fonts&#8217; work in the Geographic Information Group at the Universitat Jaume I building on OpenSearch Geospatial interfaces to different services. The demonstrator showed during the talk was an OpenLayers map display hooked up to various OpenSearch Geo services. Some are &#8220;native&#8221; [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=171&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>One promising presentation I saw last week at the <a href="http://www.sigte.udg.edu/jornadassiglibre/programa">Jornadas SIG Libre</a> &#8211; Oscar Fonts&#8217; work in the <a href="http://www.geoinfo.uji.es/">Geographic Information Group at the Universitat Jaume I</a> building on OpenSearch Geospatial interfaces to different services. <a href="http://unlockdata.files.wordpress.com/2010/03/opensearch-edinburgh.png"><img src="http://unlockdata.files.wordpress.com/2010/03/opensearch-edinburgh.png?w=300&#038;h=150" alt="OpenSearch geo query of OSM" title="opensearch-edinburgh" width="300" height="150" class="alignright size-medium wp-image-179" /></a></p>
<p>The <a href="http://geoportal.dlsi.uji.es/OpenSearch/">demonstrator showed during the talk</a> <a href="http://geoportal.dlsi.uji.es/OpenSearch/"></a> was an <a href="http://www.openlayers.org/">OpenLayers</a> map display hooked up to various OpenSearch Geo services.</p>
<p>Some are &#8220;native&#8221; OpenSearch services, like the <a href="http://www.geocommons.com">GeoCommons</a> data deposit and mapmaking service, the interfaces published by Terradue as part of the European <a href="http://www.genesi-dr.eu/">GENESI-DR</a> earth observation distributed data repository project. </p>
<p>The UJI demo also includes an API adapter for sensationally popular web services with geographic contents. Through the portal one can search for tweets, geotagged Flickr photos, or individual shapes from OpenStreetmap. </p>
<p>Oscar&#8217;s talk highlighted the problem of seeming incompatibility between the <a href="http://www.opensearch.org/Specifications/OpenSearch/Extensions/Geo/1.0/Draft_1">original draft of the OpenSearch Geospatial extensions</a>, and the version making its way through the Open Geospatial Consortium&#8217;s Catalog working group as a &#8220;part document&#8221; included in the next Catalog Services for the Web specification. </p>
<p>The issues currently breaking backwards-compatibility between the versions are these:</p>
<ul>
<ol>
<code>geo:locationString</code> became <code>geo:name</code> in the OGC draft version.</ol>
<ol>
<code>geo:polygon</code> was omitted from the OGC draft version, and replaced with <code>geo:geometry</code> which allows for complex geometries (including multi-polygons) to be passed through using Well Known Text.</ol>
</ul>
<p>1) looks like syntactic sugar &#8211; <strong>geo:name</strong> is less typing, and reads better. geo:locationString can be deprecated but supported. </p>
<p>2) <strong>geo:geometry</strong> was introduced into the spec as a result of work on the GENESI-DR project, which had a strong requirement to support multi-polygons (specifically, passes over the earth of a satellite, which crossed the dateline and thus were made up of two polygons meeting on either side of the dateline). </p>
<p><b>geo:polygon</b> has a much simpler syntax, just a list of (latitude, longitude) pairs which join up to make a shape. This also restricts queries to two dimensions.</p>
<p>This seems to be the nub of the discussion &#8211; <em>should geo:polygon be included in the updated version</em> &#8211; risking it being seen as clashing with or superfluous to geo:geometry, leading to end user confusion?  </p>
<p>There is always a balance to be met between simplicity and complexity, Oscar pointed out in his talk what I have heard in OGC Catalog WG discussions too &#8211; that as soon as a use case becomes sufficiently complex, then CSW is available and likely fitter for the job. <code>geo:geometry</code> is already at the top end of acceptable complexity.</p>
<p>It&#8217;s about a year since I helped turn Andrew Turner&#8217;s original draft into an OGC consumable form. Anecdotally it seems like a lot more people are interested in seeing what can be done with OpenSearch Geo now.</p>
<p>The OGC version is not a fork. The wiki draft was turned into a draft OGC spec after talking with Andrew and Raj Singh about the proposed changes, partly on the <a href="http://groups.google.com/group/opensearch/browse_thread/thread/efaac36185fd610/3c251b44a4620e44?lnk=gst&amp;q=geo#3c251b44a4620e44">OpenSearch Google Group</a>. The geo:relation parameter was added on the basis of feedback from the GeoNetwork and GeoTools communities. There&#8217;s been a <a href="http://www.opensearch.org/Specifications/OpenSearch/Extensions/Geo/1.0/Draft_2">Draft 2 page, as yet unmodified, on the OpenSearch wiki</a> since that time.</p>
<p>In order to build the confidence of potential adopters, these backwards-incompatibilities do need to be addressed. Personal point of view would be to update the wiki draft, deprecating locationString and including both polygon and geometry parameters. </p>
<p>I was impressed by the work of Oscar and collaborators, though wondering if they are going to move in to aggregation and indexing, search-engine-style, of the results, or just use the OpenSearch interface to search in realtime fairly fast moving sources of data. I wish I&#8217;d asked this question in the session, now. It all offers reinforcement and inspiration for putting OpenSearch Geo interfaces on services nearby &#8211; <a href="http://gogeo.ac.uk/">Go-Geo!</a>, <a href="http://ckan.net/">CKAN</a>. The <a href="http://ndg.nerc.ac.uk/services/discovery">NERC Data Discovery Service</a> could benefit, as could <a href="http://www.scran.ac.uk/">SCRAN</a>. We&#8217;ll get to see what happens, which I&#8217;m glad of. </p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unlockdata.wordpress.com/171/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unlockdata.wordpress.com/171/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unlockdata.wordpress.com/171/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unlockdata.wordpress.com/171/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unlockdata.wordpress.com/171/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unlockdata.wordpress.com/171/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unlockdata.wordpress.com/171/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unlockdata.wordpress.com/171/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unlockdata.wordpress.com/171/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unlockdata.wordpress.com/171/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unlockdata.wordpress.com/171/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unlockdata.wordpress.com/171/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unlockdata.wordpress.com/171/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unlockdata.wordpress.com/171/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=171&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unlockdata.wordpress.com/2010/03/15/opensearch-geospatial/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6510bf72cbae293e2d25fc17b2a75e15?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">unlockjo</media:title>
		</media:content>

		<media:content url="http://unlockdata.files.wordpress.com/2010/03/opensearch-edinburgh.png?w=300" medium="image">
			<media:title type="html">opensearch-edinburgh</media:title>
		</media:content>
	</item>
		<item>
		<title>Notes on Linked Data and Geodata Quality</title>
		<link>http://unlockdata.wordpress.com/2010/03/15/notes-on-linked-data-and-geodata-quality/</link>
		<comments>http://unlockdata.wordpress.com/2010/03/15/notes-on-linked-data-and-geodata-quality/#comments</comments>
		<pubDate>Mon, 15 Mar 2010 13:43:53 +0000</pubDate>
		<dc:creator>Jo Walsh</dc:creator>
				<category><![CDATA[Data quality]]></category>
		<category><![CDATA[ESDIN]]></category>
		<category><![CDATA[Linked Data]]></category>
		<category><![CDATA[open data]]></category>

		<guid isPermaLink="false">http://unlockdata.wordpress.com/?p=143</guid>
		<description><![CDATA[This is a long post talking about geospatial data quality background before moving on to Linked Data about halfway. I should probably try to break this down into smaller posts &#8211; &#8220;if I had more time, I would write less&#8221;. Through EDINA&#8216;s involvement with the ESDIN project between mapping and cadastral agencies (NMCAs) across Europe, [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=143&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><em>This is a long post talking about geospatial data quality background before moving on to Linked Data about halfway. I should probably try to break this down into smaller posts &#8211; &#8220;if I had more time, I would write less&#8221;.</em></p>
<p>Through <a href="http://edina.ac.uk/">EDINA</a>&#8216;s involvement with the <a href="http://www.esdin.eu/">ESDIN</a> project between mapping and cadastral agencies (NMCAs) across Europe, I&#8217;ve picked up a bit about data quality theory (at least as it applies to geography). One of ESDIN&#8217;s goals is a common quality model for the network of cooperating NMCAs.</p>
<p>I&#8217;ve also been admiring Muki Haklay&#8217;s work on <a href="http://povesham.wordpress.com/tag/spatial-data-quality/">assessing data quality of collaborative OpenStreetmap data</a> using comparable national mapping agency data. His <a href="http://povesham.wordpress.com/2010/01/29/haiti-%E2%80%93-further-comparisons-and-the-usability-of-geographic-information-in-emergency-situations/">recent assessment of OSM and Google MapMaker&#8217;s Haiti streetmaps</a> showed the benefit of analytical data quality work, helping users assess how what they have matches the world, assisting with <em>conflation</em> to join different spatial databases together.</p>
<p>Today I was pointed at <a href="http://http://www.slideshare.net/mvexel/zen-of-map-quality">Martijn Van Exel&#8217;s presentation at WhereCamp EU</a> on &#8220;map quality&#8221;, ending with a consideration of how to measure quality in OpenStreetmap. Are map and underlying data quite different when we think about quality?</p>
<p>The ISO specs for data quality have their origins in industrial and military quality assurance &#8211;  &#8220;acceptable lot quality&#8221; for samples from a production line. One measurement, &#8220;<a href="http://en.wikipedia.org/wiki/Circular_error_probable">circular error probable</a>&#8220;, comes from ballistics design &#8211; the circle of error was once a literal circle round successive shots from an automatic weapon, indicating how wide a distance between shots, thus inaccuracy in the weapon, was tolerable.</p>
<p>The ISO 19138 quality models apply to highly detailed data created by national mapping agencies. There&#8217;s a need for reproducible quality assessment of other kinds of data, less detailed and less complete, from both commercial and open sources.</p>
<p>The ISO model presents measures of &#8220;completeness&#8221; and &#8220;consistency&#8221;. For completeness, an object or an attribute of an object is either present, or not present. </p>
<p>Consistency is a bit more complicated than that. In the ISO model there are error <em>elements</em>, and error <em>measures</em>. The elements are different kinds of error &#8211; logical, temporal, positional and thematic. The measures describe how the errors should be reported &#8211; as a total count, as a relative rate for a given lot, as a &#8220;circular error probable&#8221;.</p>
<p>Geographic data quality in this formal sense can be measured, either by a <em>full inspection</em> of a data set or in samples from it, in several ways:</p>
<ul>
<li>Comparison to another data set, ideally of known and high quality</li>
<li>Comparing the contents of the dataset, using rules to describe what is expected.</li>
<li>Comparing samples of the dataset to the world, e.g. by intensive surveying.</li>
</ul>
<p>The ISO specs feature a data production process view of quality measurement. NMCAs apply rules and take measurements before publishing data, submitting data to cross-border efforts with neighbouring EU countries, and later after correcting the data to make sure roads join up. Practitioners definitely think in terms of spatial information as networks or graphs, not in terms of maps.</p>
<h3>Collaborative Quality Mapping</h3>
<p>Muki Haklay&#8217;s group used different comparison techniques &#8211; in one instance comparing variable-quality data to known high-quality data, in another comparing the relative completeness of two variable-quality data sources.</p>
<p>Not so much thought has gone into the data user&#8217;s needs from quality information, as opposed to the data maintainer&#8217;s clearer needs. Relatively few specialised users will benefit from knowing the rate of consistency errors vs topological errors &#8211; for most people this level of detail won&#8217;t provide the confidence needed to reuse the information. The fundamental question is &#8220;<strong>how good is good enough?</strong>&#8221; and there is a wide spectrum of answers depending on the goals of each re-user of data.</p>
<p>I also see several use cases for use of quality information to flag up data which is interesting for research or search purposes, but not appropriate to use for navigation or surveying purposes, where errors can be costly.</p>
<p>An example: the <a href="http://code.flickr.com/blog/2008/10/30/the-shape-of-alpha/">&#8220;alpha shapes&#8221; that were produced by Flickr</a> based on the distribution of geo-tagged images attached to a placename in a gazetteer. </p>
<p>Another example: polygon data produced by bleeding-edge auto-generalisation techniques that may have good results in some areas but bizarre errors in others.</p>
<p>Somewhat obviously, data quality information would be very useful to a data quality improvement drive. <a href="http://tools.geofabrik.de/">GeoFabrik</a> made the OpenStreetmap Inspector tool, highlighting areas where nodes are disconnected or names and feature types for shapes are missing.</p>
<h3>Quality testing</h3>
<p>What about quality testing? When I worked as a perl programmer I enjoyed the <a href="http://search.cpan.org/~pjcj/Devel-Cover-0.65/lib/Devel/Cover.pm">test coverage</a> and <a href="http://search.cpan.org/~rclamp/Pod-Coverage-0.20/lib/Pod/Coverage.pm">documentation coverage</a> packages. A visual interface to show how much progress you&#8217;ve made on clearly documenting your code, to show how many decisions that should be tested for integrity remain untested.</p>
<p>Software packages come with a set of tests &#8211; ideally these tests will have helped with the development process, as well as providing the user with examples of correct and efficient use of the code, and aiding in automatic installation of packages.</p>
<p>Donald Knuth promoted the idea of &#8220;<a href="http://en.wikipedia.org/wiki/Literate_programming">literate programming</a>&#8220;, where code fully explains what it is doing. For code, this concept can be extended to &#8220;literate testing&#8221; of how well software is doing what is expected of it.</p>
<p>At the <a href="http://edina.ac.uk/events/digimap10/">Digimap 10th Birthday</a> event, Glen Hart from <a href="http://www.ordnancesurvey.co.uk/oswebsite/ontology/">Ordnance Survey Research</a> talked about increasing <strong>data usability</strong> for Linked Data efforts. I want to link to this the idea of &#8220;<em>literate data</em>&#8220;, and think about a data-driven approach to quality.</p>
<p>A registry based on <a href="http://ckan.net/">CKAN</a>, like <a href="http://data.gov.uk/">data.gov.uk</a>, could benefit from a quality audit. How can one take a quality approach to Linked Data?</p>
<p>To start with, each record has a set of attributes and to reach completeness they should all be filled in. This ranges from data license to maintainer contact information to resource download. Many records in<a href="http://ckan.net/">CKAN.net</a>are incomplete. Automated tests could be run on the presence or absence of properties for each package. The results can be display on the web, with option to view the relative quality of package collections belonging groups, or tags. The process would help identify areas that needed focus and followup. It would help to plan and follow progress on turning records into downloadable data packages.  Quality testing could help reward groups that were being diligent in maintaining metadata. </p>
<p>The values of properties will have constraints, these can be used to test for quality &#8211; links should be reachable, email contact addresses should make at least one response. Locations in the dataset should be near locations in the metadata. Time ranges matching, ditto. Values that should be numbers, actually are numbers.</p>
<p>Some datasets listed in the data.gov.uk catalogues have URLs that don&#8217;t dereference, i.e. are links that don&#8217;t work. It&#8217;s difficult to find out what packages these datasets are attached to, where to get the actual data or contact the maintainers.</p>
<p>To see this in real data, visit the bare SPARQL endpoint at <a href="http://services.data.gov.uk/analytics/sparql">http://services.data.gov.uk/analytics/sparql</a> and paste this query into the search box (it&#8217;s looking for everything described as a Dataset, using the <a href="http://semanticweb.org/wiki/Scovo">scovo</a> vocabulary for statistical data):</p>
<blockquote><p><code>PREFIX scv: &lt;http://purl.org/NET/scovo#&gt;</p>
<p>SELECT DISTINCT ?p<br />
WHERE {<br />
    ?p a scv:Dataset .<br />
}</code></p></blockquote>
<p>The response shows a set of URIs which, when you try to look them up to get a full description, return a &#8220;Resource not found&#8221; error. The presence of a quality test suite would catch this kind of incompleteness early in the release schedule, help provide metrics of how fast identified issues with incompleteness and inconsistency were being fixed. </p>
<p>The presence of more information about a resource, from a link, can be agreed on as a quality rule for Linked Data &#8211; it is one of the <a href="http://www.w3.org/DesignIssues/LinkedData.html">Four Principles</a> after all, that one should be able to follow a link and get useful information. </p>
<p>With OWL schemas there is already some modelling of data objects and attributes and their relations. There are rules languages from W3C and elsewhere that could be used to automate some quality measurement &#8211; <a href="http://www.w3.org/2005/rules/wiki/RIF_FAQ">RIF</a> and <a href="http://www.w3.org/Submission/SWRL/">SWRL</a>. These languages require a high level of buy-in to the standards, a rules engine, expertise.</p>
<p>Data package testing be viewed like software package testing. The rules are built up, piece by piece, growing as the code does, ideally. The methods used can be quite ad-hoc, use different frameworks and structures, as long as the results are repeatable and the coverage is thorough.</p>
<p>Not everyone will have the time or patience to run quality tests on their local copy of the data before use, so we need some way to convey the results. This could be an overall score, a count of completeness errors &#8211; something like the results of a software test run:</p>
<blockquote><p><code>3 items had no tests...<br />
9 tests in 4 items.<br />
9 passed and 0 failed.<br />
Test passed.</code></p></blockquote>
<p>For quality improvement, one needs to see the detail of what is missing. Essentially this is a picture of a data model with missing pieces. It would look a bit like the content of a SPARQL query:</p>
<blockquote><p>
<code> a scv:Dataset .<br />
 dc:title ?title .<br />
 scv:datasetOf ?package .<br />
etc...</code>
</p></blockquote>
<p>After writing this I was pointed at <A href="http://www4.wiwiss.fu-berlin.de/bizer/WIQA/index.htm">WIQA</a>, a Linked Data quality specification language by the group behind <a href="http://dbpedia.org/">dbpedia</a> and <a href="http://linkedgeodata.org/About">Linked GeoData</a>, which basically implements this with a SPARQL-like syntax. I would like to know more about in-the-wild use of WIQA and integration back into annotation tools&#8230;</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unlockdata.wordpress.com/143/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unlockdata.wordpress.com/143/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unlockdata.wordpress.com/143/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unlockdata.wordpress.com/143/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unlockdata.wordpress.com/143/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unlockdata.wordpress.com/143/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unlockdata.wordpress.com/143/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unlockdata.wordpress.com/143/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unlockdata.wordpress.com/143/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unlockdata.wordpress.com/143/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unlockdata.wordpress.com/143/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unlockdata.wordpress.com/143/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unlockdata.wordpress.com/143/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unlockdata.wordpress.com/143/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=143&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unlockdata.wordpress.com/2010/03/15/notes-on-linked-data-and-geodata-quality/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6510bf72cbae293e2d25fc17b2a75e15?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">unlockjo</media:title>
		</media:content>
	</item>
		<item>
		<title>&#8220;At risk&#8221; 7-8am March 23rd, 8-9am March 30th</title>
		<link>http://unlockdata.wordpress.com/2010/03/09/at-risk-7-8am-march-23rd-8-9am-march-30th/</link>
		<comments>http://unlockdata.wordpress.com/2010/03/09/at-risk-7-8am-march-23rd-8-9am-march-30th/#comments</comments>
		<pubDate>Tue, 09 Mar 2010 10:02:02 +0000</pubDate>
		<dc:creator>Jo Walsh</dc:creator>
				<category><![CDATA[Status]]></category>

		<guid isPermaLink="false">http://unlockdata.wordpress.com/?p=148</guid>
		<description><![CDATA[The Unlock services will have a couple of one-hour periods at risk of downtime for network maintenance at the end of this month. 7-8am Tue Mar 23rd a router upgrade planned on the University of Edinburgh network may disrupt access to the Unlock service. 8-9am Tue 30th March is an &#8220;at risk&#8221; period for our [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=148&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>The Unlock services will have a couple of one-hour periods at risk of downtime for network maintenance at the end of this month.</p>
<p><strong>7-8am Tue Mar 23rd</strong> a router upgrade planned on the University of Edinburgh network may disrupt access to the Unlock service.</p>
<p><strong>8-9am Tue 30th March</strong> is an &#8220;at risk&#8221; period for our connection to JANET while it undergoes <a href="http://www.managed-services-index.ed.ac.uk/alerts/index.cfm?fuseaction=view_alert&amp;alert_id=196">resilience testing</a>.</p>
<p>That&#8217;s all. </p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unlockdata.wordpress.com/148/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unlockdata.wordpress.com/148/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unlockdata.wordpress.com/148/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unlockdata.wordpress.com/148/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unlockdata.wordpress.com/148/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unlockdata.wordpress.com/148/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unlockdata.wordpress.com/148/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unlockdata.wordpress.com/148/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unlockdata.wordpress.com/148/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unlockdata.wordpress.com/148/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unlockdata.wordpress.com/148/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unlockdata.wordpress.com/148/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unlockdata.wordpress.com/148/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unlockdata.wordpress.com/148/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=148&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unlockdata.wordpress.com/2010/03/09/at-risk-7-8am-march-23rd-8-9am-march-30th/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6510bf72cbae293e2d25fc17b2a75e15?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">unlockjo</media:title>
		</media:content>
	</item>
		<item>
		<title>Dev8D: JISC Developer Days</title>
		<link>http://unlockdata.wordpress.com/2010/03/05/dev8d-jisc-developer-days/</link>
		<comments>http://unlockdata.wordpress.com/2010/03/05/dev8d-jisc-developer-days/#comments</comments>
		<pubDate>Fri, 05 Mar 2010 12:21:53 +0000</pubDate>
		<dc:creator>Joe Vernon</dc:creator>
				<category><![CDATA[gazetteer]]></category>
		<category><![CDATA[JISC]]></category>
		<category><![CDATA[Linked Data]]></category>
		<category><![CDATA[open data]]></category>
		<category><![CDATA[conference]]></category>
		<category><![CDATA[EDINA]]></category>
		<category><![CDATA[places]]></category>
		<category><![CDATA[Unlock]]></category>

		<guid isPermaLink="false">http://unlockdata.wordpress.com/?p=136</guid>
		<description><![CDATA[The Unlock development team recently attended the Dev8D: JISC Developer Days conference at University College London. The format of the event is fairly loose, with multiple sessions in parallel and the programme created dynamically as the 4 days progressed. Delegates are encouraged to use their feet to seek out what interests them! The idea is [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=136&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>The Unlock development team recently attended the <a href="http://dev8d.org/" target="_blank">Dev8D: JISC Developer Days</a> conference at University College London. The format of the event is fairly loose, with multiple sessions in parallel and the programme created dynamically as the 4 days progressed. Delegates are encouraged to use their feet to seek out what interests them! The idea is simple: developers, mainly (but not exclusively) from academic organisations come together to share ideas, work together and strengthen professional and social connections.</p>
<p>A series of back-to-back 15 minute &#8216;lightning talks&#8217; ran throughout the conference, I delivered two &#8211; describing EDINA&#8217;s Unlock services and showing users how to get started with the <a title="Unlock Places API" href="http://unlock.edina.ac.uk/places.html" target="_blank">Unlock Places APIs</a>. Discussions after the talk focused on the question of open sourcing and the licensing of Unlock Places software generally &#8211; and what future open gazetteer data sources we plan to include.</p>
<p>In parallel with the lightning talks, workshop sessions were held on a variety of topics such as linked data, iPhone application development, working with <a href="http://www.arduino.cc/" target="_blank">Arduino</a> and the <a href="http://code.google.com/appengine/" target="_blank">Google app engine</a>.</p>
<p><strong>Competitions</strong><br />
Throughout Dev8D, several competitions or &#8216;bounties&#8217; were held around different themes. In our competition, delegates had the chance to win a £200 Amazon voucher by entering a prototype application making use of the Unlock Places API. The most innovative and useful application wins!</p>
<p>I gave a quick announcement at the start of the week to discuss the competition, how to get started using the API and then demonstrated a <a href="http://mobilegeo.wordpress.com/2010/03/03/html5s-local-sql-database-openlayers/" target="_blank">mobile client</a> for the Unlock Places gazetteer as an example of the sort of competition entry we were looking for. This application makes use of the new HTML5 web database functionality &#8211; enabling users to download and store Unlock&#8217;s feature data offline on a mobile device. Here&#8217;s some of the entries:</p>
<p>Marcus Ramsden from Southampton University created a plugin for <a href="http://www.eprints.org/" target="_blank">EPrints</a>, the open access respository software. Using the <a href="http://unlock.edina.ac.uk/text.html" target="_blank">Unlock Text</a> geoparser, &#8216;GeoPrints&#8217; extracts locations from documents uploaded to EPrints then provides a mechanism to browse EPrint documents using maps.</p>
<p>Aidan Slingsby from City University, entered some beautiful <a href="http://gicentre.org/point_mashups/" target="_blank">work</a> displaying point data (in this case a gazetteer of British placenames) shown as as tag-maps, density estimation surfaces and chi surfaces rather than the usual map-pins! The data was based on GeoNames data accessed through the Unlock Places API.</p>
<p>And the winner was&#8230; Duncan Davidson from <a href="http://www.informatics-ventures.com/" target="_blank">Informatics Ventures</a>, University of Edinburgh. He used the Unlock Places APIs together with <a href="http://pipes.yahoo.com/pipes/" target="_blank">Yahoo Pipes</a> to present data on new start-ups and projects around Scotland. Enabling the conversion of data containing local council names into footprints, Unlock Places allowed the data to be mapped using KML and Google Maps, enabling his users to navigate around the data using maps &#8211; and search the data using spatial constraints.</p>
<p><strong>Some other interesting items at Dev8D&#8230;</strong></p>
<ul>
<li>&lt;sameAs&gt;<br />
Hugh Glaser from the University of Southampton discussed how <a href="http://sameas.org" target="_blank">sameAs.org</a> works to establish linkage between datasets by managing multiple URIs for Linked Data without an authority. Hugh demonstrated using sameAs.org to locate co-references between different data sets.</li>
<li>Mendeley<a href="http://www.mendeley.com/" target="_blank"><br />
Mendeley</a> is a research network built around the same principle as last.fm. Jan Reichelt and Ben Dowling discussed how by tracking, sharing and organising journal/article history, Mendeley is designed to help users to discover and keep in touch with similarly minded researchers. I heard of Mendeley last year and was surprised by the large (and rapidly increasing) user base &#8211; the collective data from its users is already proving a very powerful resource.</li>
<li>Processing<br />
Need to do rapid visualisation of images, animations or interactions? <a href="http://processing.org/" target="_blank">Processing</a> is Java based sketchbox/IDE which will help you to to visualise your data much quicker. Ross McFarlane from the University of Liverpool gave a quick tutorial of <a href="http://processingjs.org" target="_blank">Processing.js</a>, a JavaScript port using <a href="http://dev.w3.org/html5/spec/Overview.html" target="_blank">&lt;Canvas&gt;</a>, illustrating the power and versatility of this library.</li>
<li>Genetic Programming<br />
This session centred around some basic aspects of Genetic Algorithms/Evolutionary Computing and Emergent properties of evolutionary systems. Delegates focused on creating virtual ants (with <a href="http://python.org" target="_blank">Python</a>) to solve mazes and by visualising their creatures with Processing (above), Richard Jones enabled developers to work on something a bit different!</li>
<li>Web Security<br />
Ben Charlton from the University of Kent delivered an excellent <a href="http://spod.cx/dev8d2010/websec.html" target="_blank">walk-through</a> of the most significant and very common threats to web applications. Working from the <a href="http://www.owasp.org/index.php/Top_10_2007" target="_blank">OWASP Top 10 project</a>, he discussed each threat with real world examples. Great stuff &#8211; important for all developers to see.</li>
<li>Replicating 3D Printer: RepRap<br />
Adrian Bowyer demonstrated <a href="http://reprap.org/bin/view/Main/WebHome" target="_blank">RepRap</a> &#8211; short for Replicating Rapid-prototyper. It&#8217;s an open source (GPL) device, able to create robust 3D plastic components (including around half of its own components). Its novel capability of being able to self-copy, with material costs of only €350 makes it accessible to small communities in the developing world as well as individuals in the developed world. His inspiring talk was well received and this super illustration of open information&#8217;s far reaching implications captured everyone&#8217;s imagination.</li>
</ul>
<p>All in all, a great conference. A broad spread of topics, with the right mix of sit-and-listen to get-involved activities. Whilst Dev8D is a fairly chaotic event, it&#8217;s clear that it generates a wealth of great ideas, contacts and even new products and services for academia. See Dev8D&#8217;s <a title="Happy Stories" href="http://wiki.2010.dev8d.org/w/Happy_Stories" target="_blank">Happy Stories</a> page for a record of some of the outcomes. I&#8217;m now looking forward to seeing how some of the prototypes evolve and I&#8217;m definitely looking forward to Dev8D 2011.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unlockdata.wordpress.com/136/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unlockdata.wordpress.com/136/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unlockdata.wordpress.com/136/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unlockdata.wordpress.com/136/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unlockdata.wordpress.com/136/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unlockdata.wordpress.com/136/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unlockdata.wordpress.com/136/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unlockdata.wordpress.com/136/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unlockdata.wordpress.com/136/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unlockdata.wordpress.com/136/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unlockdata.wordpress.com/136/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unlockdata.wordpress.com/136/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unlockdata.wordpress.com/136/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unlockdata.wordpress.com/136/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=136&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unlockdata.wordpress.com/2010/03/05/dev8d-jisc-developer-days/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
		<georss:point>55.943593 -3.181962</georss:point>
		<geo:lat>55.943593</geo:lat>
		<geo:long>-3.181962</geo:long>
		<media:content url="http://0.gravatar.com/avatar/ad7559c147ce3d26503b44cdfc26710b?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joevernon</media:title>
		</media:content>
	</item>
		<item>
		<title>The Edinburgh Geoparser and the Stormont Hansards</title>
		<link>http://unlockdata.wordpress.com/2010/03/04/the-edinburgh-geoparser-and-the-stormont-hansards/</link>
		<comments>http://unlockdata.wordpress.com/2010/03/04/the-edinburgh-geoparser-and-the-stormont-hansards/#comments</comments>
		<pubDate>Thu, 04 Mar 2010 12:12:49 +0000</pubDate>
		<dc:creator>Jo Walsh</dc:creator>
				<category><![CDATA[Historic]]></category>

		<guid isPermaLink="false">http://unlockdata.wordpress.com/?p=117</guid>
		<description><![CDATA[Stuart Dunn (of the Centre for e-Research at Kings College London) organised a stimulating workshop on the Edinburgh Geoparser. We discussed the work done extracting and mapping location references in several recently digitised archives (including the Stormont Papers, debates from the Stormont Parliament which ran in Northern Ireland from 1921 to 1972.) Paul Ell talked [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=117&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Stuart Dunn (of the <a href="http://www.kcl.ac.uk/iss/cerch">Centre for e-Research</a> at Kings College London) organised a stimulating workshop on the Edinburgh Geoparser. We discussed the work done extracting and mapping location references in several recently digitised archives (including the Stormont Papers, debates from the <a href="http://en.wikipedia.org/wiki/Parliament_of_Northern_Ireland">Stormont Parliament</a> which ran in Northern Ireland from 1921 to 1972.)</p>
<p>Paul Ell talked about the role of the <a href="http://www.qub.ac.uk/research-centres/CentreforDataDigitisationandAnalysis/">Centre for Digitisation and Data Analysis</a> in Belfast in accelerating the &#8220;digital deluge&#8221; &#8211; over the last 3 or 4 years they have seen a dramatic decrease in digitisation cost, accompanied by an increase in quality and verifiability of the results. </p>
<p>However, as Paul commented later in the day, research funding invested in &#8220;<strong>development of digital resources has not followed through with a step change in scholarship</strong>&#8220;. So the work by the Language Technology Group in the Edinburgh geoparser, and other research groups such as the <a href="http://www.nactem.ac.uk/">National Centre for Text Mining</a> in Manchester, becomes essential to &#8220;interrogate [digital archives] in different ways&#8221;, including spatially.</p>
<p>&#8220;<strong>Changing an image into knowledge</strong>&#8220;, and translating an image into a machine-readable text is only the beginning of this process.</p>
<p>There was mention of a Westminster-funded project to digitise and extract reference data from historic Hansards (parliamentary proceedings) &#8211; it would be a kind of &#8220;They Worked For You&#8221;. I found this <a href="http://hansard.millbanksystems.com/">prototype site which looks inactive</a> and the <a href="http://www.hansard-archive.parliament.uk/">source data from the Hansard archives</a> &#8211; perhaps this is a new effort at exploiting the data-richness in the archives.</p>
<p>The place search service used was GeoCrossWalk, the predecessor to <a href="http://unlock.edina.ac.uk/places.html">Unlock Places</a>. The Edinburgh Geoparser, written by the <a href="http://www.ltg.ed.ac.uk/">Language Technology Group</a> in the School of Informatics, sits behind the <a href="http://unlock.edina.ac.uk/text.html">Unlock Text</a> geo-text-mining service, which uses the Places service to search for places across gazetteers.</p>
<p>Claire Grover spoke about LTG&#8217;s work on <em>event extraction</em>, making it clear that the geoparser does a subset of what LTG&#8217;s full toolset is capable of. LTG has some work in development extracting events from textual metadata associated with news imagery in the <a href="http://newsfilm.bufvc.ac.uk/">NewsFilmOnline</a> archive. </p>
<p>This includes some automated parsing of relative time expressions, like &#8220;last Tuesday&#8221;, &#8220;next year&#8221;, grounding events against a timeline and connecting them with action words in the text. I&#8217;m really looking forward to seeing the results of this &#8211; mostly because &#8220;Unlock Time&#8221; will be a great name for an online service.</p>
<p>The big takeaway for me was the idea of searching and linking value implicit in the non-narrative parts of digitised works &#8211; indexes, footnotes, lists of participants, tables of statistics. If the OCR techniques are smart enough to (mostly) automatically drop this reference data into spreadsheets, without much more effort it can become Linked Data, pointing back to passages in the text at paragraph or sentence level.</p>
<p>At several points during the workshop there were pleas for more historical gazetteer of placename and location information, available for re-use outside a pure research context (such as enriching the archives of the Northern Irish assembly). Claire raised the intriguing possibility of generating names for a gazetteer, or placename authority files, automatically as a result of the geo-text-parsing process &#8211; &#8220;the authority file is in effect derived from the sources&#8221;.</p>
<p>At this point the idea of a gazetteer goes back beyond simply place references, to include references to people, to concepts, and to events. One could begin to call this an ontology, but for some that has a <a href="http://en.wikipedia.org/wiki/Web_Ontology_Language">very specific technical meaning</a>.</p>
<p>The closing session discussed research challenges, including the challenge of getting support for further work. On the one hand we have scholarly infrastructure, on the other scholarly applications. There are a breadth of disciplines who can benefit from infrastructure, but they need applications; applications may be developed for small research niches, but have as yet unknown benefit for researchers looking at the same places or times in different ways.</p>
<p>Links:<br />
<a href="http://www.jisc.ac.uk/media/documents/programmes/sharedservices/geoxwalk_embedding_report_final.pdf">Embedding GeoCrossWalk final report (PDF)<br />
</a></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unlockdata.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unlockdata.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unlockdata.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unlockdata.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unlockdata.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unlockdata.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unlockdata.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unlockdata.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unlockdata.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unlockdata.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unlockdata.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unlockdata.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unlockdata.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unlockdata.wordpress.com/117/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unlockdata.wordpress.com&amp;blog=10290434&amp;post=117&amp;subd=unlockdata&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unlockdata.wordpress.com/2010/03/04/the-edinburgh-geoparser-and-the-stormont-hansards/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/6510bf72cbae293e2d25fc17b2a75e15?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">unlockjo</media:title>
		</media:content>
	</item>
	</channel>
</rss>
