

var clipper = {

	Web_Page: {

		// Format the web page

		$fmt: function(h) {

			return h

				.replace("\00","")								// Eliminate null characters

				.DeleteTag("SCRIPT")							// Remove SCRIPT TAG contents

				.DeleteTag("HEAD")								// Remove HEAD TAG contents

				.DeleteTag("STYLE")								// Remove STYLE TAG contents

				.DeleteR("<!--","-->")							// Remove HTML comments

				.strip(Tag("FONT|HTML"),EndTag("FONT|HTML"))	// Remove some of the formatting

				.replace(/([\s'"])on\w+\s*=/gi,"$1onevent=")	// Disable inline JavaScript

				.replace(/\sid\w+\s*=/gi," id_=")				// Disable IDs so they don't conflict with namespace

				.replace(/<body/gi,"<DIV")						// Change BODY to a DIV

				.replace(/<\/body/gi,"</DIV")					// Change BODY to a DIV

				.replace(/<form /gi,"<form onsubmit='EVENT.onsubmit(this)' ")	// Assign custom onsubmit handler

				;

		},



		$frameset: function() {

			var h = getEntire()

				.DeleteTag("SCRIPT")							// Remove SCRIPT TAG contents

				.DeleteTag("STYLE")								// Remove STYLE TAG contents

				.DeleteR("<!--","-->")							// Remove HTML comments

				.Tag("FRAMESET");

			if (!h) return "";

			var u = getEntire("$url2");			// In case the actual URL loaded was different from the original request

			// Convert to explicit URL references so that the src attributes will be absolute references

			return HTML.expandRefs(h,u);

		},



		// Individual frames from the frameset

		$f0: {_Copye: "Web_Frame",$n: 0},

		$f1: {_Copye: "Web_Frame",$n: 1},

		$f2: {_Copye: "Web_Frame",$n: 2},

		$f3: {_Copye: "Web_Frame",$n: 3},

		$f4: {_Copye: "Web_Frame",$n: 4},

		$f5: {_Copye: "Web_Frame",$n: 5},

		$f6: {_Copye: "Web_Frame",$n: 6},

		$f7: {_Copye: "Web_Frame",$n: 7},

		$f8: {_Copye: "Web_Frame",$n: 8},

		$f9: {_Copye: "Web_Frame",$n: 9}

	},

	Web_Section: {

		_listing: {

			url: "../dialog/input_custom.htm",

			name:"剪辑",

			dim:{h:620,w:800},

			input: {

				//js: (GLOBAL.ver < .3 ? null : "web_section" )

			}

		},

		_noCacheInputs: true,

		_multiInputs: true,	

		_TimeOut: 10,



		_Name: "网页剪辑",



		// Input properties

		$Obj: {_inputType:5, _Name:" "},



		// Extraction function

		// Returns an array of string values

		//		h:String - HTLM source

		//		url:String - URL that loaded the page

		//		fmt:String - Format to use for results

		$choices: function(html,url,fmt) {

			// Pre-strup portions of the page

			var h = this.$strip(html,url);

		//	alert(h);



			// Start with an empty array

			var a = [];	



			// Template for individual extractions items

			var obj = {x:{},h:""};

			var x = obj.x;



			// Remember the source URL for all items

			x.url = url;



			if (fmt == "BODY") {

				// need this condition to fix bug 2189

				if (h) {

					x.xf = "Web_Page";

					obj.h = clipper.Web_Page.$fmt(h);				// Use standard web page function

					a[0] = obj;

				}

				return a;

			}



			if (fmt == "A") x.xa = "A";



			var match = null;								// Matching pattern

			var formMatch = null;

			if (fmt == "FORM") {

				x.xf = "FORM";

				match = /\saction\s*=\s*'?"?([^'"\s>]+)/i;

			//	formMatch = /name='?"?wq_replace'?"?\svalue='?"?([^'"\s>]+)/i;

			//	match = formMatch;

				a = h.BTA(match,"FORM");

			} else {

				if (url.IsContains("my.yahoo.com")) {

					x.xf = "yahoo_my";

					match = /&\.m=(\w+)&/;

					//a = h.BTA(/edit\.yahoo\.com\/config\/del_left/,"TABLE");

					a = h.BTA(match,"TABLE",1,2);

				} else if (url.IsContains("www.excite.com")) {

					x.xf = "excite_my";

					match = /\/mesp\/change\/excite\/(x\d\/\w+)/;

					a = h.BTA(match,"TABLE",2);

				} else if (url.IsContains("www.msn.com")) {

					x.xf = "msn_my";

					match = /delCM\.asp\?CAT=(\d+)/;

					a = h.BTA(match,"TABLE",1,2);

				} else if (url.IsContains("my.aol.com")) {

					x.xf = "aol_my";

					match = /map>\s*(\w[\w\s]+)</i;		// Match the section title

					a = h.BTA(match,"TABLE");

				} else {

					x.xf = "TagByNum";

					//x.xf = "error";

					x.TAG = "TABLE";

					match = />\s*(\w[^<]+)\s*</;

					a = h.arrayByTag("TABLE");

				}

			}



			var a1 = [];		// Array of short sections

			var a2 = [];		// Array of long sections

			var a3 = [];		// Array of sections that are 1 row tables



			var big = Math.round(h.length / 2);

			// Creates objects corresponding to each choice

			if (a) {		

			//	alert(a.length);

				for (var i=0; i < a.length; i++) {



					var an = a1;		// Default to adding to the first array

					

					var r = {};											// Start with empty object

					o.applyPropsHier(r,obj);							// Copy standard values

					r.x.seq = i;										// Remember sequence value

				

					var h2 = a[i];

				//	alert(h2);

				//	continue;

					if (match) {

						r.x.id = h2.Match(match,1,1);

					//	alert(r.x.id);

						r.x.n = h2.Range(Tag.TABLE,match).count(Tag.TABLE) + 1;

						r.x.len = h2.length;

					}

					

					/*

					if (x.xf == "TagByNum") {

						switch (fmt) {

						case "TABLE":

							if (h2.count(Tag("TR")) < 2) an = a3;				// Put at end if there are not 2 rows in the table

							if (h2.count(Tag.TABLE) > 3) h2 = "";				// Omit if table IsContains 2 tables

							if (!h2.replace(TAG.all2,"").trim()) h2 = "";	// Omit if it does not contain displayed values;

							break;

						case "A":

						//	alert("ok");

							if (h2.count(Tag.TABLE) > 3) h2 = "";				// Omit if table IsContains 2 tables

							if (h2.count(Tag.A) < 3) h2 = "";					// Omit if <3 links

							break;

						}

					}

					*/

					if (h2) {

					

					//	h2 = this.$xf(h,r.x);						// Extract the value again to make sure application of formula works

						r.h = this.$fmt(h2,r.x);						// Format the value



						if (an == a1 && h2.length > big) {

							an = a2;

						}



						an[an.length] = r;

					}



				}

			}

		//	return null;

			// concatenate a2 and a3 onto a1

			for (i=0; i<a2.length; i++) {

				a1[a1.length] = a2[i];

			}

			for (i=0; i<a3.length; i++) {

				a1[a1.length] = a3[i];

			}



			return a1;

		},



		// Pre-strip advertisements or other content that should be ignored

		$strip: function(h,url) {

			h = h

				.replace("\00","")		// Eliminate null characters

				.DeleteTag("HEAD")				// Remove HEAD TAG contents

				.DeleteTag("SCRIPT")			// Remove SCRIPT TAG contents

				.DeleteR("<!--","-->")		// Remove HTML comments

				;

			return h;

		},



		// Extraction functions

		$xf: function(html,x) {



			if (x.id) {

				x.id = x.id.replace(/[^\w]/g,".");		// Make sure the ID does not contain characters that cause the expression to fail on IE4 (e.g. char(10))

			}

		//	alert("html="+html);

			html = this.$strip(html);	// Strip certain content from the page

			switch (x.xf) {				// Check which extraction function was specified

			case "TABLE":

			case "TagByNum":

				if (x.id) {

					var xn = 1;

					try{

						if(typeof x.n!="undefined")

							xn = x.n;

					}catch(err){xn = 1;}

					

					var a = html.BTA(new RegExp(">\\s*" + x.id + "\\s*<"),"TABLE",xn);

					if(typeof a=="undefined")return "no";

					if (a.length == 1) {

						return a[0];

					} else if (a.length > 1) {

						// Get the match that is closest in length to the original match

						var n = Math.abs(x.len - a[0].length);

						var h = a[0];

						for (var i=1; i<a.length; i++) {

							var n2 = Math.abs(x.len - a[i].length);

							if (n2 < n) {

								n = n2;

								h = a[i];

							}

						}

						return h;

					}

				}

				return html.TagByNum(x.TAG,x.seq);



			case "FORM":				// Find the closest match to the original action

				if (x.id) {

					var xn = 1;

					try{

						if(typeof x.n!="undefined")

							xn = x.n;

					}catch(err){xn = 1;}

					var a = html.BTA(new RegExp("\\saction\\s*=\\s*'?\"?" + x.id,"i"),"FORM",xn);

					if(typeof a=="undefined")return "no";

					try{

						if (a.length == 1) {

							return a[0];

						} else if (a.length > 1) {

							// Get the match that is closest in length to the original match

							var n = Math.abs(x.len - a[0].length);

							var h = a[0];

							for (var i=1; i<a.length; i++) {

								var n2 = Math.abs(x.len - a[i].length);

								if (n2 < n) {

									n = n2;

									h = a[i];

								}

							}

							return h;

						}

					}catch(err){}

				}

				if(typeof x.TAG == 'undefined')

					x.TAG = "FORM"

				return html.TagByNum(x.TAG,x.seq);



			case "Web_Page":

				return clipper.Web_Page.$fmt(html);



			case "yahoo_my":

				return html.BT("&.m=" + x.id + "&","TABLE",1,2);

			case "msn_my":

				return html.BT("delCM.asp?CAT=" + x.id,"TABLE",1,2);

			case "aol_my":

				return html.BT(new RegExp("map>\\s*" + x.id + "<","i"),"TABLE");

			case "excite_my":

				return html.BT("/mesp/change/excite/" + x.id,"TABLE",2)

			}

			return "";					// In case of invalid extraction function

		},



		// Formatting functions

		// h:String - HTLM source to format

		// bOut:Boolean - flag that result is for output (vs. selection in chooser)

		$fmt: function(html,x,bOut) {



			if (!html) return "";



			var xa = x.xa;

			if (!xa) xa = "";



			switch (xa) {



			case "A":

			//	alert(html.arrayByTag("A").apply);

				html = html

					.arrayByTag("A")					// Extract the A TAG contents

					.apply(fa.clean.A)			// Clean the A tags

					.deleteDup(1)					// Eliminate duplicates

					.clean()						// Eliminate blank entries

					;

				if (bOut) {						// Check if it is being displayed within a section ...

					html = html.toUL();				// Display as bullets with More... indicator

				} else {						// ... or in the element chooser

					html = html.toList();				// Diplay as a list

				}

				break;



			default:



				// Clean up the HTML contents

				html = html

					.strip(Tag("FONT"),EndTag("FONT"))					// Remove some of the formatting

					.replace(/\sbgcolor\s*=/gi," bgcolorx=")			// Disable background color

					.replace(/([\s'"])on\w+\s*=/gi,"$1onevent=")			// Disable inline JavaScript

				//	.replace(/<form\s/gi,"<form onsubmit='EVENT.onsubmit(this)' ")	// Assign custom onsubmit handler

					;

				break;



			}

			return html;

		},



		$url: function() {

			var s = getEntire("/$Obj");

			if (!s) return "";



			s = s.replace(/\\054/g,",");				// Restore commas that have been escaped

			s = s.replace(/\\073/g,";");				// Restore semicolons that have been escaped



			var obj = WEBBROWSER.eval(s);						// Convert to an object



			// Make sure the object could be evaluated

			if (!obj || !obj.url) return "";



			return obj.url;

		},



		// Output properties

		_DefaultV: {_Title:" ", _Function:function() {



			var s = getEntire("/$Obj");

			if (!s) return "<div nowrap>点击 “剪辑”可把从网页中剪辑的内容粘贴到此视窗中。</div>";



			s = s.replace(/\\054/g,",");				// Restore commas that have been escaped

			s = s.replace(/\\073/g,";");				// Restore semicolons that have been escaped



			var x = WEBBROWSER.eval(s);							// Convert to an object



			// Make sure the object could be evaluated

			if (!x) return "";



			var h = getEntire("_r");

			if (!h) return "";



			// Extract the content

			h = $r("Web_Section").$xf(h,x);



			// Format the content

			h = $r("Web_Section").$fmt(h,x,true);



			// Convert to explicit URL references

			h = HTML.expandRefs(h,getEntire("$baseURL"));



			// Format the results

			return (h) ? h : "<div class='sUrl'>Source: <a href='" + x.url + "'>" + x.url + "</a></div>";



		}}

	}

}
