本帖最后由 WHY 于 2020-6-27 13:41 编辑
Test.js | var srcDir = 'E:/Test/X42'; | | var dstFile = 'result.csv'; | | var out = []; | | var xml = new ActiveXObject('Microsoft.XMLDOM'); | | var fso = new ActiveXObject('Scripting.FileSystemObject'); | | | | var getXMLData = function(fp) { | | xml.load(fp); | | var reg = /<value>([^<>]*)</g; | | var arr = xml.selectNodes('//char'), Len = arr.length; | | for(var i=0; i<Len; i++){ | | var s = arr[i].xml; | | var a = ['0', ['0', '0'], '0', '0']; | | | | var m = s.match(/<charName>([^<>]*)</); | | if(m) a[0] = '"' + m[1] + '"'; | | | | var j = 0; | | while( m = reg.exec(s) ) a[1][j++] = '"' + m[1] + '"'; | | | | var m = s.match(/[_"]unicode">([^<>]*)</); | | if(m) a[2] = '"' + m[1] + '"'; | | | | var m = s.match(/"PUA">([^<>]*)</); | | if(m) a[3] = '"' + m[1] + '"'; | | | | out.push( a.join(',') ); | | } | | } | | | | var writeToCsv = function(dstFile) { | | var ado = new ActiveXObject('ADODB.Stream'); | | ado.Mode = 3; | | ado.Type = 2; | | ado.Charset = 'utf-8'; | | ado.Open(); | | ado.WriteText(out.join('\r\n')); | | ado.SaveToFile(dstFile, 2); | | } | | | | var getXmlFile = function(fd){ | | var e1 = new Enumerator(fso.getFolder(fd).Files); | | var e2 = new Enumerator(fso.getFolder(fd).SubFolders); | | for(; !e1.atEnd(); e1.moveNext()){ | | var fp = e1.item().Path; | | if( !/\.xml$/i.test(fp) ) continue; | | getXMLData(fp); | | } | | for(; !e2.atEnd(); e2.moveNext())getXmlFile(e2.item().Path) ; | | } | | | | getXmlFile(srcDir); | | writeToCsv(dstFile); | | | | WSH.Echo('Done');COPY |
out.push(a) 效率非常低,改为 out.push(a.join(','))
贴一个不用正则的办法: | $srcDir = 'E:\Test\X42'; | | $dstFile = 'Result.CSV'; | | $fsw = New-Object System.IO.StreamWriter($dstFile, $false, [Text.Encoding]::UTF8); | | $files = dir -Literal $srcDir -Filter '*.xml' -Recurse -File; | | $count = $files.Count; | | | | for($i=0; $i -lt $count; $i++) { | | [xml]$xml = [IO.File]::ReadAllLines($files[$i].FullName, [Text.Encoding]::UTF8); | | $node = $xml.GetElementsByTagName('char'); | | $Len = $node.Count; | | for($j=0; $j -lt $Len; $j++){ | | $arr = @('0', '0', '0', '0', '0'); | | $charName = $node[$j].charName; | | $value = @( $node[$j].charProp.Value ); | | $type = @( $node[$j].mapping.type ); | | $text = @( $node[$j].mapping.innerText ); | | if( $charName -ne $null ) { $arr[0] = '"' + $charName + '"'; } | | for($k=0; $k -lt 2; $k++) { | | if( $value[$k] -ne $null ) { $arr[1+$k] = '"' + $value[$k] + '"'; } | | if( $type[$k] -ne $null ) { | | if( $type[$k].EndsWith('unicode') ){ $arr[3] = '"' + $text[$k] + '"'; } | | if( $type[$k] -eq 'PUA' ) { $arr[4] = '"' + $text[$k] + '"'; } | | } | | } | | $fsw.WriteLine( $arr -join ',' ); | | } | | if($i % 1000 -eq 0 ) { $fsw.Flush(); } | | } | | $fsw.Flush(); | | $fsw.Close(); | | pauseCOPY |
|