使用C / MSXML中的变音符针对嵌入式XSD资源验证XML文件

Validate XML file against embedded XSD resource with umlauts in C++ / MSXML

我想使用MSXML6解析器在C语言中验证XML文件,并按照http://msdn.microsoft.com/zh-cn/library/ms762774(v=vs.85).aspx上的说明进行操作。但是,我正在处理的项目要求将XSD模式嵌入到二进制文件中。

这是XML文件,应进行验证(简化所有文件以进行演示):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
<?xml version="1.0" encoding="UTF-8"?>
<Document xsi:schemaLocation="urn:test schema.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:test">
  <Party>
    <Id>1</Id>
    <Name>Bob</Name>
    <Salary>100.00</Salary>
  </Party>
  <Party>
    <Id>2</Id>
    <Name>Alice</Name>
    <Salary>200.00</Salary>
  </Party>
  <Party>
    <Id>3</Id>
    <Name>G??nther</Name>
    <Salary>300.00</Salary>
  </Party>
</Document>

这是XSD架构:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
<?xml version="1.0" encoding="utf-8" standalone="no"?>
<xs:schema xmlns="urn:test" xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="urn:test" elementFormDefault="qualified">

  <xs:simpleType name="NameType">
    <xs:restriction base="xs:string">
      <xs:pattern value="([A-Za-z0-9???–???¤????]){1,10}"/>
    </xs:restriction>
  </xs:simpleType>

  <xs:complexType name="PartyType">
    <xs:sequence>
      <xs:element name="Id" type="xs:integer"/>
      <xs:element name="Name" type="NameType"/>
      <xs:element name="Salary" type="xs:decimal"/>
    </xs:sequence>
  </xs:complexType>

  <xs:element name="Document">
    <xs:complexType>
      <xs:choice minOccurs="1" maxOccurs="9999999">
        <xs:element name="Party" type="PartyType"/>
      </xs:choice>
    </xs:complexType>
  </xs:element>

</xs:schema>

上述XSD架构作为Win32资源嵌入在可执行文件中,可以通过标识符" IDR_XSDSCHEMA1 "进行引用(请参见选项1的注释行):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#include <stdio.h>
#include <tchar.h>
#include <windows.h>
#import <msxml6.dll>
#include"resource.h"

// Macro that calls a COM method returning HRESULT value.
#define CHK_HR(stmt) do { hr=(stmt); if (FAILED(hr)) return bstrResult; } while(0)

//Method for acquiring own handle
HMODULE GetThisDllHandle()
{
    MEMORY_BASIC_INFORMATION info;
    size_t len = VirtualQueryEx(GetCurrentProcess(), (void*)GetThisDllHandle, &info, sizeof(info));
    return len ? (HMODULE)info.AllocationBase : NULL;
}

_bstr_t validateFile(_bstr_t bstrFile)
{
    //Schema collection
    MSXML2::IXMLDOMSchemaCollectionPtr pXS;

    //XML document
    MSXML2::IXMLDOMDocument2Ptr pXD;

    //XSD document
    MSXML2::IXMLDOMDocument2Ptr pXSD;

    //Validation object
    MSXML2::IXMLDOMParseErrorPtr pErr;

    _bstr_t bstrResult = L"";
    HRESULT hr = S_OK;

    //Load XSD schema from resource
    HMODULE handle = GetThisDllHandle();
    HRSRC rc = ::FindResource(handle, MAKEINTRESOURCE(IDR_XSDSCHEMA1), L"XSDSCHEMA");
    HGLOBAL rcData = ::LoadResource(handle, rc);
    LPVOID data = (::LockResource(rcData));
    ::FreeResource(rcData);

    //Load schema stream into document
    CHK_HR(pXSD.CreateInstance(__uuidof(MSXML2::DOMDocument60), NULL, CLSCTX_INPROC_SERVER));

    if (pXSD->loadXML((LPCSTR)data) != VARIANT_TRUE)
        return bstrResult;

    // Create a schema cache
    CHK_HR(pXS.CreateInstance(__uuidof(MSXML2::XMLSchemaCache60), NULL, CLSCTX_INPROC_SERVER));

    //--> OPTION 1: VALIDATING AGAINST EMBEDDED XSD RESOURCE; DOESN'T WORK <--
    CHK_HR(pXS->add(L"urn:test", pXSD.GetInterfacePtr()));

    //--> OPTION 2: VALIDATING AGAINST PHYSICAL XSD FILE; WORKS FINE <--
    //CHK_HR(pXS->add(L"urn:test", L"schema.xsd"));

    // Create a DOMDocument and set its properties.
    CHK_HR(pXD.CreateInstance(__uuidof(MSXML2::DOMDocument60), NULL, CLSCTX_INPROC_SERVER));

    pXD->async = VARIANT_FALSE;
    pXD->validateOnParse = VARIANT_TRUE;
    pXD->preserveWhiteSpace = VARIANT_TRUE;

    //Assign the schema cache to the Document's schema collection
    pXD->schemas = pXS.GetInterfacePtr();

    //Load XML file
    if(pXD->load(bstrFile) != VARIANT_TRUE)
    {
        pErr = pXD->parseError;

        bstrResult = _bstr_t(L"Validation failed on") + bstrFile +
        _bstr_t(L"\
====================="
) +
        _bstr_t(L"\
Reason:"
) + _bstr_t(pErr->Getreason()) +      
        _bstr_t(L"\
Source:"
) + _bstr_t(pErr->GetsrcText()) +
        _bstr_t(L"\
Line:"
) + _bstr_t(pErr->Getline());
    }

    else
    {
        bstrResult = _bstr_t(L"Validation succeeded for") + bstrFile +
        _bstr_t(L"\
======================\
"
) +
        _bstr_t(pXD->xml);
    }

    return bstrResult;    
}

int _tmain(int argc, _TCHAR* argv[])
{
    HRESULT hr = CoInitialize(NULL);
    if(SUCCEEDED(hr))
    {
        try
        {
            _bstr_t bstrOutput = validateFile(L"Document.xml");
            MessageBox(NULL, bstrOutput, L"schemaCache",MB_OK);
        }

        catch(_com_error &e)
        {
              MessageBox(NULL, e.Description(), L"schemaCache",MB_OK);
        }
        CoUninitialize();
    }
    return 0;
}

不幸的是,我在尝试运行验证例程时遇到了一些奇怪的行为(OPTION 1注释)。似乎XSD资源中的变音符在加载到流中时未正确解码。如以下结果所示,这将导致验证参考混乱:

fail

但是,当直接从磁盘加载模式文件时(OPTION 2注释),验证例程运行得很好:

ok2

我已经尝试将已加载的流从Unicode转换为多字节,反之亦然,但无济于事。我在这里缺少什么吗?还是Win32资源仅限于特定字符集?感谢您的任何建议。


请参见WhozCraig的评论:将MultiByteToWideChar()CP_UTF8用作输入参数将返回有效的Unicode字符串。