<?xml version="1.0" encoding="utf-8" ?>
<project>
	<comment>This is the transform for article pages</comment>
	<verbose>false</verbose>
	
	<debug>
		<enabled>false</enabled>
		<displayScript>true</displayScript>
		<displayContent>true</displayContent>
		<logFile>output.log</logFile>
	</debug>

  <input>
    <path>C:\developer\afloatBC\AFLOATMagazine\AFLOATMagazine</path>
    <pattern>*.htm</pattern>
    <recursive>true</recursive>
    <exclude>Register</exclude> <!-- Exclude any directory with Register in the name -->
    <includeDepth>5</includeDepth> <!-- We only want files which occur 5 folders deep. -->
  </input>

  <output>
    <path>C:\developer\afloatBC\AFLOATMagazine\ArticleOutput</path>
    <clean>true</clean>
    <flatten>true</flatten>
    <renameRules>@drop:startsWith=_tabid,@extension:match=html:replaceWith=htm</renameRules>
  </output>


	<!--
	This is the work here!
	-->
  <transformations>
	
		<!--
		Set aside the template to work with later
		-->
		<inject>
				<toLabel>template</toLabel>
				<filename>C:\developer\afloatBC\article_template.html</filename>
		</inject>

		<!--
		Hard work goes here looking for patterns in the content and using that as the 
		basis for our rules of extraction. We look for patterns and then store the result
		in labels which we then use to apply in to the template container.
		
		I have created 'tags' in the template and named my labels here exactly the same to 
		make sure there is no confusion where this content is being inserted in to the templates.
		-->

		<!--
		Grab the links for the month
		-->
		<store>
			<toLabel>monthLinks</toLabel>
			<startsWith><![CDATA[<ul id="menu">]]></startsWith>
			<endsWith><![CDATA[</ul>]]></endsWith>
		</store>

		<!--
		Grab the cover image
		-->
		<store>
			<toLabel>coverSmallImg</toLabel>
			<startsWith><![CDATA[/a/Portals/0/assets/MAGAZINE]]></startsWith>
			<endsWith><![CDATA[AFLOAT-cover.jpg]]></endsWith>	
		</store>

		<!-- 
		Front cover description 
		-->		
		<store>
			<toLabel>frontCoverDescription</toLabel>
			<startsWith><![CDATA[Front cover:&nbsp;]]></startsWith>
			<endsWith><![CDATA[</div]]></endsWith>
			<skipEnd><![CDATA[</div]]></skipEnd>
		</store>

		<delete>
			<fromLabel>frontCoverDescription</fromLabel>
			<toLabel>frontCoverDescription</toLabel>
			<startsWith><![CDATA[<]]></startsWith>
			<endsWith><![CDATA[>]]></endsWith>
		</delete>

		<!--
		Article content (and associated content clean up
		Note: it is important to understand HOW the delete and replace 
		commands work. If you use the startsWith and endsWith, it will 
		look for the text in the startsWith and the NEXT INSTANCE of the
		endsWith defines the block of text being manipulated. This is REALLY
		powerful and is the basis for how we process the cleanup.
		-->
		<store>
			<toLabel>articleText</toLabel>
			<startsWith><![CDATA[<div id="mainContentContainer">]]></startsWith>
			<endsWith><![CDATA[<div id="bottomLeftContentContainer">]]></endsWith>
		</store>
		
		<delete>
			<fromLabel>articleText</fromLabel>
			<toLabel>articleText</toLabel>
			<startsWith><![CDATA[<div]]></startsWith>
			<endsWith><![CDATA[>]]></endsWith>
		</delete>

		<delete>
			<fromLabel>articleText</fromLabel>
			<toLabel>articleText</toLabel>
			<startsWith><![CDATA[<span]]></startsWith>
			<endsWith><![CDATA[>]]></endsWith>
		</delete>
		
		<delete>
			<fromLabel>articleText</fromLabel>
			<toLabel>articleText</toLabel>
			<startsWith><![CDATA[<!-]]></startsWith>
			<endsWith><![CDATA[>]]></endsWith>
		</delete>

		<delete>
			<fromLabel>articleText</fromLabel>
			<toLabel>articleText</toLabel>
			<startsWith><![CDATA[</span]]></startsWith>
			<endsWith><![CDATA[>]]></endsWith>
		</delete>
		
		<delete>
			<fromLabel>articleText</fromLabel>
			<toLabel>articleText</toLabel>
			<startsWith><![CDATA[</div]]></startsWith>
			<endsWith><![CDATA[>]]></endsWith>
		</delete>
		
		<!--
		Retro article content
		<iframe id="dnn_ctr859_IFrame_IFrame_htmIFrame" src="../../../../../../../Portals/0/assets/MAGAZINE/2002/0802/0802_jc_Balmain_coalmine.pdf"  webstripperwas="/a/Portals/0/assets/MAGAZINE/2002/0802/0802_jc_Balmain_coalmine.pdf" frameborder="no" width="790" allowtransparency="False" name="AFLOAT August 2002 Jack Clark - Sydney's Forgotten Coal Mine" scrolling="auto" height="1095">Your browser does not support inline frames</iframe>
		-->
		<store>
			<toLabel>iframe</toLabel>
			<startsWith><![CDATA[<iframe]]></startsWith>
			<endsWith><![CDATA[</iframe>]]></endsWith>
		</store>

		<!--
		Here we grab the stuff we stashed away and start putting it
		all in to the template. The ${thing} refers to the text in
		the template we have inserted. And the replaceWithLabel will
		grab the content we stashed earlier from the original page.
		-->
		<replace>
			<fromLabel>template</fromLabel>
			<toLabel>template</toLabel>
			<match>${monthLinks}</match>
			<replaceWithLabel>monthLinks</replaceWithLabel>
		</replace>
		
		<replace>
			<fromLabel>template</fromLabel>
			<toLabel>template</toLabel>
			<match>${coverSmallImg}</match>
			<replaceWithLabel>coverSmallImg</replaceWithLabel>
		</replace>

		<replace>
			<fromLabel>template</fromLabel>
			<toLabel>template</toLabel>
			<match>${frontCoverDescription}</match>
			<replaceWithLabel>frontCoverDescription</replaceWithLabel>
		</replace>

		<replace>
			<fromLabel>template</fromLabel>
			<toLabel>template</toLabel>
			<match>${articleText}</match>
			<replaceWithLabel>articleText</replaceWithLabel>
		</replace>
		
		<replace>
			<fromLabel>template</fromLabel>
			<toLabel>template</toLabel>
			<match>${iframe}</match>
			<replaceWithLabel>iframe</replaceWithLabel>
		</replace>

		<!--
		When we are done with the template we put the finished template 
		in to the main content pipeline and start cleaning it up...
		-->
		<load>
			<fromLabel>template</fromLabel>	
		</load>
  	
  	<!-- 
  	Global fixes to the final page should go here. Things like image references which
  	could be screwed up everywhere. Notice that we use the CDATA sections only where
  	we need them due to funny HTML chatacter we are trying to match.
  	
  	Also note that we are not referring to labels in these commands because we are 
  	working with the final output.
  	 -->
    <replace>
      <startsWith><![CDATA[="../]]></startsWith>
      <endsWith><![CDATA[webstripperwas="]]></endsWith>
      <replaceWith><![CDATA[="]]></replaceWith>
    </replace>
    
    <!-- Image tidy -->
    <replace>
    	<match>/a/Portals/0/assets/</match>
    	<replaceWith>/images/magazine-articles/</replaceWith>
    </replace>
    
    <!-- URL tidy -->
    <replace>
    		<match>http://preview.afloat.com.au/</match>
    		<replaceWith>http://www.afloat.com.au</replaceWith>
		</replace>
		
		<replace>
				<startsWith>/tabid</startsWith>
				<endsWith>.aspx</endsWith>
				<replaceWith>.htm</replaceWith>
		</replace>

		<replace>
			<match>href="Default.html"  webstripperwas="</match>
			<replaceWith>href="</replaceWith>
			<ignoreCase>true</ignoreCase>
		</replace>

		<replace>
				<startsWith>http://www.afloat.com.au/a/HOME/AFLOATMagazine</startsWith>
				<endsWith>.htm</endsWith>
				<skipStart>http://www.afloat.com.au/a/HOME/AFLOATMagazine</skipStart>
				<skipEnd>.htm</skipEnd>
				<match>/</match>	
				<replaceWith>_</replaceWith>
		</replace>

		<replace>
				<match>http://www.afloat.com.au/a/HOME/AFLOATMagazine</match>
				<replaceWith>/afloat-magazine/archive/</replaceWith>
		</replace>
		
		<replace>
			<match>/afloat-magazine/archive/_</match>
			<replaceWith>/afloat-magazine/archive/</replaceWith>
		</replace>
		
		<delete>
				<startsWith>DESIGNTIMEURL="</startsWith>
				<endsWith>"</endsWith>
		</delete>

				    
  </transformations>
</project>
