diff -r -u ./Makefile Makefile
--- ./Makefile	2005-01-14 18:27:00.000000000 +0100
+++ Makefile	2006-06-29 13:05:26.000000000 +0200
@@ -1,4 +1,4 @@
-VERSION=	1.4.4
+VERSION=	1.4.8
 
 CHAR_SIZE=16
 
diff -r -u ./ctype16.h ctype16.h
--- ./ctype16.h	2004-03-17 16:53:21.000000000 +0100
+++ ctype16.h	2005-11-04 15:53:06.000000000 +0100
@@ -1,7 +1,7 @@
 #ifndef CTYPE16_H
 #define CTYPE16_H
 
-#ifndef FOR_LT
+#ifndef STD_API
 #define STD_API
 #endif
 
diff -r -u ./dtd.c dtd.c
--- ./dtd.c	2004-11-02 19:20:20.000000000 +0100
+++ dtd.c	2006-05-30 17:11:06.000000000 +0200
@@ -469,6 +469,7 @@
     e->xml_space_attribute = 0;
     e->xml_lang_attribute = 0;
     e->xml_id_attribute = 0;
+    e->xml_base_attribute = 0;
     e->notation_attribute = 0;
     e->cached_nsdef = 0;
     e->is_externally_declared = 0;
@@ -635,6 +636,7 @@
     static Char xml_space[] = {'x','m','l',':','s','p','a','c','e',0}; 
     static Char xml_lang[] = {'x','m','l',':','l','a','n','g',0};
     static Char xml_id[] = {'x','m','l',':','i','d',0};
+    static Char xml_base[] = {'x','m','l',':','b','a','s','e',0};
     static Char xmlns[] = {'x','m','l','n','s',0};
     Char *t;
 
@@ -695,6 +697,8 @@
 	element->xml_lang_attribute = a;
     else if(Strcmp(name, xml_id) == 0)
 	element->xml_id_attribute = a;
+    else if(Strcmp(name, xml_base) == 0)
+	element->xml_base_attribute = a;
 
     a->cached_nsdef = 0;
 
diff -r -u ./dtd.h dtd.h
--- ./dtd.h	2004-11-02 18:46:07.000000000 +0100
+++ dtd.h	2006-05-30 17:09:37.000000000 +0200
@@ -147,6 +147,7 @@
     AttributeDefinition xml_space_attribute; /* xml:space attribute, if it has one */
     AttributeDefinition xml_lang_attribute; /* xml:lang attribute, if it has one */
     AttributeDefinition xml_id_attribute; /* xml:id attribute, if it has one */
+    AttributeDefinition xml_base_attribute; /* xml:base attribute, if it has one */
     AttributeDefinition notation_attribute; /* NOTATION attribute, if it has one */
     NSElementDefinition cached_nsdef;
     const Char *prefix, *local;
diff -r -u ./input.c input.c
--- ./input.c	2005-01-14 17:10:00.000000000 +0100
+++ input.c	2005-10-05 13:44:46.000000000 +0200
@@ -740,7 +740,10 @@
 	/* There are never more characters than bytes in the input */
 	if(s->line_alloc < s->line_length + (s->insize - s->nextin))
 	{
-	    s->line_alloc = s->line_length + (s->insize - s->nextin);
+	    if(s->line_alloc == 0)
+		s->line_alloc = 1024;
+	    while(s->line_alloc < s->line_length + (s->insize - s->nextin))
+		s->line_alloc *= 2;
 	    s->line = Realloc(s->line, s->line_alloc * sizeof(Char));
 	}
 
diff -r -u ./rxp.1 rxp.1
--- ./rxp.1	2004-12-14 14:29:03.000000000 +0100
+++ rxp.1	2006-03-28 13:17:34.000000000 +0200
@@ -171,7 +171,8 @@
 set the initial mode for catalog processing; the default is \f3system\f1.
 
 If the variable
-.B RXPURL is set, it is used as the URL of the document to parse.  This may
+.B RXPURL
+is set, it is used as the URL of the document to parse.  This may
 be useful in CGI scripts and the like to avoid shell parsing of a
 user-supplied argument.
 
diff -r -u ./rxp.c rxp.c
--- ./rxp.c	2005-01-14 17:57:49.000000000 +0100
+++ rxp.c	2006-06-12 16:14:59.000000000 +0200
@@ -37,7 +37,7 @@
 void print_ns_attrs(NamespaceBinding ns, int count);
 void print_namespaces(NamespaceBinding ns);
 void print_attrs(ElementDefinition e, Attribute a);
-void print_text(Char *text);
+void print_text(Char *text, int is_attr);
 int printable(int c);
 void print_special(int c);
 void print_text_bit(Char *text);
@@ -51,7 +51,7 @@
     attr_defaults = 0, merge = 0, strict_xml = 0, tree = 0, validate = 0,
     xml_space = 0, namespaces = 0, simple_error = 0, experiment = 0,
     read_dtd = 0, unicode_check = 0, xml_id = 0;
-enum {o_unspec, o_none, o_bits, o_plain, o_can1, o_can2, o_can3, o_infoset, o_diff} output_format = o_unspec;
+enum {o_unspec, o_none, o_bits, o_plain, o_can1, o_can2, o_can3, o_infoset, o_diff, o_diff2} output_format = o_unspec;
 char *enc_name = 0, *base_uri = 0, *my_dtd_name, *my_dtd_sysid = 0;
 CharacterEncoding encoding = CE_unknown;
 InputSource source = 0;
@@ -140,6 +140,9 @@
 		case 'd':
 		    output_format = o_diff;
 		    break;
+		case 'D':
+		    output_format = o_diff2;
+		    break;
 		default:
 		    fprintf(stderr, "bad output format %s\n", argv[i]);
 		    return 1;
@@ -181,7 +184,7 @@
 		}
 		time_limit = atoi(argv[i]);
 #else
-		fprintf("-R not supported on this system\n");
+		fprintf(stderr,"-R not supported on this system\n");
 		return 1;
 #endif
 		break;
@@ -423,7 +426,7 @@
 	    return 1;
 	}
     }
-    else if(strict_xml)
+    else if(strict_xml || canonical_output)
 	encoding = CE_UTF_8;
     else
 	encoding = source->entity->encoding;
@@ -688,37 +691,58 @@
 	    print_ns_attrs(bit->ns_dict, bit->nsc);
 	    if(bit->type == XBIT_start)
 		Printf(">");
-	    else if(canonical_output)
+	    else if(canonical_output && output_format < o_diff)
+		Printf("></%S>", bit->element_definition->name);
+	    else if(output_format == o_diff)
 		Printf("></%S>", bit->element_definition->name);
+	    else if(output_format > o_diff)
+		Printf(">\n</%S>", bit->element_definition->name);
 	    else
 		Printf("/>");
+	    if(output_format == o_diff2)
+		Printf("\n");
 	    break;
 	case XBIT_end:
 	    Printf("</%S>", bit->element_definition->name);
+	    if(output_format == o_diff2)
+		Printf("\n");
 	    break;
 	case XBIT_pi:
 	    Printf("<?%S %S%s", 
 		   bit->pi_name, bit->pi_chars, nsgml ? ">" : "?>");
-	    if(p->state <= PS_prolog2 && !canonical_output)
+	    if((p->state <= PS_prolog2 && !canonical_output) ||
+	       output_format == o_diff2)
 		Printf("\n");
 	    break;
 	case XBIT_cdsect:
 	    if(canonical_output)
 		/* Print CDATA sections as plain PCDATA in canonical XML */
-		print_text(bit->cdsect_chars);
+		print_text(bit->cdsect_chars, 0);
 	    else
 		Printf("<![CDATA[%S]]>", bit->cdsect_chars);
+	    if(output_format == o_diff2)
+		Printf("\n");
 	    break;
 	case XBIT_pcdata:
-	    if(output_format != o_can3 || !bit->pcdata_ignorable_whitespace)
-		print_text(bit->pcdata_chars);
+	    if(output_format == o_diff2)
+	    {
+		if(bit->pcdata_chars[0] == '\n')
+		    /* we have already printed a linefeed */
+		    print_text(bit->pcdata_chars+1, 0);
+		else
+		    print_text(bit->pcdata_chars, 0);
+		if(bit->pcdata_chars[Strlen(bit->pcdata_chars) - 1] != '\n')
+		    Printf("\n");
+	    }
+	    else if(output_format != o_can3 || !bit->pcdata_ignorable_whitespace)
+		print_text(bit->pcdata_chars, 0);
 	    break;
 	case XBIT_comment:
 	    if(canonical_output)
 		/* no comments in canonical XML */
 		break;
 	    Printf("<!--%S-->", bit->comment_chars);
-	    if(p->state <= PS_prolog2)
+	    if(p->state <= PS_prolog2 || output_format == o_diff2)
 		Printf("\n");
 	    break;
 	default:
@@ -764,7 +788,7 @@
 		   aa[i]->definition->local);
 	else
 	    Printf(" %S=\"", aa[i]->definition->name);
-	print_text(aa[i]->value);
+	print_text(aa[i]->value, 1);
 	Printf("\"");
     }
 
@@ -801,7 +825,7 @@
     VectorPush(dtd_bits, copy);
 }
 	
-void print_text(Char *text)
+void print_text(Char *text, int is_attr)
 {
     Char *pc, *last;
     
@@ -816,15 +840,18 @@
 	int c = *pc, type = 0;
 	
 	if(c == '&' || c == '<' || c == '>' || c == '"' || 
+	   c == 0x0d ||
 	   (c > 127 && !printable(c)))
 	    type = 1;
-	else if(c == 9 || c == 10 || c == 13)
+	else if(c == 9 || c == 10)
 	    type = 2;
-	else if(c < 0x20 || (c >= 0x7f && c < 0xa0))
+	else if(c < 0x20 || (c >= 0x7f && c < 0xa0) ||
+		c == 0x85 || c == 0x2028)
 	    type = 3;
 
 	if(type == 1 ||
-	   (canonical_output && output_format != o_diff && type == 2) ||
+	   (canonical_output && output_format < o_diff && type == 2) ||
+	   (is_attr && type == 2) ||
 	   (xml_version > XV_1_0 && type == 3))
 	{
 	    if(pc > last)
diff -r -u ./stdio16.c stdio16.c
--- ./stdio16.c	2005-01-14 17:10:54.000000000 +0100
+++ stdio16.c	2005-10-04 17:47:23.000000000 +0200
@@ -55,6 +55,8 @@
 
 #define BufferSize 4096
 
+static char8 null = 0;
+
 typedef int ReadProc(FILE16 *file, unsigned char *buf, int max_count);
 typedef int WriteProc(FILE16 *file, const unsigned char *buf, int count);
 typedef int SeekProc(FILE16 *file, long offset, int ptrname);
@@ -106,6 +108,12 @@
 static int StringClose(FILE16 *file);
 static int StringFlush(FILE16 *file);
 
+static int MStringRead(FILE16 *file, unsigned char *buf, int max_count);
+static int MStringWrite(FILE16 *file, const unsigned char *buf, int count);
+static int MStringSeek(FILE16 *file, long offset, int ptrname);
+static int MStringClose(FILE16 *file);
+static int MStringFlush(FILE16 *file);
+
 #if defined(WIN32) && ! defined(__CYGWIN__)
 #ifdef SOCKETS_IMPLEMENTED
 static int WinsockRead(FILE16 *file, unsigned char *buf, int max_count);
@@ -1158,8 +1166,6 @@
 
 static int StringClose(FILE16 *file)
 {
-    static char8 null = 0;
-
     if(file->flags & FILE16_write)
 	ConvertASCII(&null, 1, file); /* null terminate */
 
@@ -1171,6 +1177,115 @@
 
 static int StringFlush(FILE16 *file)
 {
+    if(file->flags & FILE16_write)
+    {
+	/* null terminate, but leave position unchanged */
+	int save = file->handle2;
+	ConvertASCII(&null, 1, file);
+	file->handle2 = save;
+    }
+
+    return 0;
+}
+
+FILE16 *MakeStringFILE16(const char *type)
+{
+    FILE16 *file;
+
+    if(!(file = MakeFILE16(type)))
+	return 0;
+
+    file->read = MStringRead;
+    file->write = MStringWrite;
+    file->seek = MStringSeek;
+    file->close = MStringClose;
+    file->flush = MStringFlush;
+
+    file->handle = 0;
+    file->handle2 = 0;
+    file->handle3 = 0;
+
+    return file;
+}
+
+void *StringFILE16String(FILE16 *file)
+{
+    return file->handle;
+}
+
+int StringFILE16StringLength(FILE16 *file)
+{
+    return file->handle2;
+}
+
+static int MStringRead(FILE16 *file, unsigned char *buf, int max_count)
+{
+    return 0;
+}
+
+static int MStringWrite(FILE16 *file, const unsigned char *buf, int count)
+{
+    char *p;
+
+    if(file->handle2 + count > file->handle3)
+    {
+	int newsize = (file->handle3 == 0 ? 32 : file->handle3);
+	
+	while(file->handle2 + count > newsize)
+	    newsize *= 2;
+
+	file->handle = Realloc(file->handle, newsize);
+	if(!file->handle)
+	    return -1;
+	file->handle3 = newsize;
+    }
+
+    p = (char *)file->handle + file->handle2;
+    memcpy(p, buf, count);
+    file->handle2 += count;
+
+    return 0;
+}
+
+static int MStringSeek(FILE16 *file, long offset, int ptrname)
+{
+    switch(ptrname)
+    {
+    case SEEK_CUR:
+	offset = file->handle2 + offset;
+	break;
+    case SEEK_END:
+	if(file->handle3 < 0)
+	    return -1;
+	offset = file->handle3 + offset;
+	break;
+    }
+
+    if(file->handle3 >= 0 && offset > file->handle3)
+	return -1;
+
+    file->handle2 = offset;
+
+    return 0;
+}
+
+static int MStringClose(FILE16 *file)
+{
+    Free(file->handle);
+
+    return 0;
+}
+
+static int MStringFlush(FILE16 *file)
+{
+    if(file->flags & FILE16_write)
+    {
+	/* null terminate, but leave position unchanged */
+	int save = file->handle2;
+	ConvertASCII(&null, 1, file);
+	file->handle2 = save;
+    }
+
     return 0;
 }
 
diff -r -u ./stdio16.h stdio16.h
--- ./stdio16.h	2002-10-03 16:37:44.000000000 +0200
+++ stdio16.h	2005-10-04 17:46:45.000000000 +0200
@@ -15,6 +15,9 @@
 STD_API FILE16 *MakeFILE16FromFILE(FILE *f, const char *type);
 STD_API FILE16 *MakeFILE16FromFD(int fd, const char *type);
 STD_API FILE16 *MakeFILE16FromString(void *buf, long size, const char *type);
+STD_API FILE16 *MakeStringFILE16(const char *type);
+STD_API void *StringFILE16String(FILE16 *file);
+STD_API int StringFILE16StringLength(FILE16 *file);
 #ifdef WIN32
 #ifdef SOCKETS_IMPLEMENTED
 STD_API FILE16 *MakeFILE16FromWinsock(int sock, const char *type);
diff -r -u ./system.h system.h
--- ./system.h	2003-05-20 02:17:27.000000000 +0200
+++ system.h	2005-11-04 15:53:06.000000000 +0100
@@ -1,11 +1,35 @@
+#if defined(WIN32) && ! defined(__GNUC__)
+#undef HAVE_LONG_LONG
+#else
 #define HAVE_LONG_LONG
+#endif
 
 #define SOCKETS_IMPLEMENTED
 
+#define EXPRT
+#define IMPRT
+
+#if defined(WIN32) && ! defined(__CYGWIN__) && ! defined(STD_API)
+ #ifdef __BORLAND__     
+  #undef EXPRT
+  #undef IMPRT
+  #define EXPRT _export
+  #define IMPRT _import
+ #else
+  #undef STD_API
+  #ifdef _BUILD_STD
+   #define STD_API __declspec(dllexport)
+  #else
+   #define STD_API __declspec(dllimport)
+  #endif
+  #define XML_API STD_API
+ #endif
+#else
+#if ! defined(STD_API)
 #define STD_API
 #define XML_API
-#define WIN_IMP
-#define EXPRT
+#endif
+#endif
 
 void *Malloc(int bytes);
 void *Realloc(void *mem, int bytes);
diff -r -u ./url.c url.c
--- ./url.c	2004-06-11 13:39:17.000000000 +0200
+++ url.c	2006-05-30 18:29:22.000000000 +0200
@@ -175,6 +175,8 @@
  * The parts of the URL are returned in scheme, host, port and path
  * if these are non-null.
  * Caller should free the results.
+ *
+ * XXX we probably don't handle the query component properly.
  */
 
 char *url_merge(const char *url, const char *base,
@@ -255,6 +257,7 @@
 	    if(j - i == 2 && p[i+1] == '.')
 	    {
 		strcpy(&p[i+1], p[j] ? &p[j+1] : &p[j]);
+		i = 0;		/* start again from beginning */
 		continue;
 	    }
 
@@ -559,6 +562,14 @@
     *scheme = *host = *path = 0;
     *port = -1;
 
+    /* Check for degenerate case */
+    
+    if(url[0] == 0)
+    {
+	*path = strdup8("");
+	return;
+    }
+
     /* Does it start with a scheme? */
     
     for(p = (char *)url; *p; p++)
diff -r -u ./version.c version.c
--- ./version.c	2005-01-14 18:09:25.000000000 +0100
+++ version.c	2006-06-29 13:04:14.000000000 +0200
@@ -1,2 +1,2 @@
 char *rxp_version_string = 
-    "RXP 1.4.4 Copyright Richard Tobin, LTG, HCRC, University of Edinburgh";
+    "RXP 1.4.8 Copyright Richard Tobin, LTG, HCRC, University of Edinburgh";
diff -r -u ./xmlparser.c xmlparser.c
--- ./xmlparser.c	2004-11-03 14:47:31.000000000 +0100
+++ xmlparser.c	2005-06-17 20:14:34.000000000 +0200
@@ -1,10 +1,10 @@
-/* 	$Id: xmlparser.c,v 1.131 2004/11/03 13:47:31 richard Exp $
+/* 	$Id: xmlparser.c,v 1.133 2005/06/17 18:14:34 richard Exp $
 */
 
 #define DEBUG_FSM 0
 
 #ifndef lint
-static char vcid[] = "$Id: xmlparser.c,v 1.131 2004/11/03 13:47:31 richard Exp $";
+static char vcid[] = "$Id: xmlparser.c,v 1.133 2005/06/17 18:14:34 richard Exp $";
 #endif /* lint */
 
 /* 
@@ -3436,7 +3436,7 @@
 		      e->name));
     }
     else if(ParserGetFlag(p, Validate) && p->standalone == SDD_yes &&
-	    e->is_externally_declared)
+	    p->state == PS_body && e->is_externally_declared)
     {
 	require(validity_error(p, "Reference to externally declared entity "
 			          "\"%S\" in document declared standalone",
@@ -4257,6 +4257,8 @@
 	   element->xml_id_attribute == a && a->type != AT_id)
 	{
 	    warn(p, "xml:id error: xml:id attribute must be declared as type ID");
+	    /* Fix the declaration so that we treat it as type ID */
+	    a->type = AT_id;
 	}
 	if(ParserGetFlag(p, XMLNamespaces))
 	{
@@ -4517,6 +4519,8 @@
     return 1;
 }
 
+/* NB assumes we are parsing the DTD */
+
 static int parsing_external_subset(Parser p)
 {
     Entity e = p->source->entity;
