use File::Slurp; use File::Basename; use Term::ReadKey; STDOUT->autoflush(1); chdir '.\TEDTXTUNICODE' or chdir 'C:\Users\CH6\Desktop\TEDTXTUNICODE' or quit( $! ); my $path_eng = '.\eng1246'; my $path_chs = '.\chs1203'; my $path_merge = '.\merge'; mkdir $path_merge unless -e $path_merge; my ($en, $cn, $merge); for my $cn ( glob "$path_chs\\*.txt" ) { $en = "$path_eng\\". basename($cn); $merge = "$path_merge\\". basename($cn); merge( $en, $cn, $merge ) if ( -e $en ); } quit("Done"); sub merge { my ( $en, $cn, $merge ) = @_; my @arr = read_file( $en ); my @brr = read_file( $cn ); my @mix; grep { $arr[$_] =~s/\r?\n//; $arr[$_] .= " ". $brr[$_] } ( 0 .. $#arr ); @mix = grep { ( /^\d+\s+\d+$/ or /\d+:\d+:\d+/ or /^\s+$/ ) ? 0 : 1 } @arr; write_file( $merge, @mix ); print "$merge\n"; } sub quit { print $_[0]; ReadKey -1; exit; } |
复制代码
- $SD_eng=Get-ChildItem -Path "C:\Users\CH6\Desktop\TEDTXTUNICODE\eng1246"
- $SD_chs=Get-ChildItem -Path "C:\Users\CH6\Desktop\TEDTXTUNICODE\chs1203"
- $OD_merge="C:\Users\CH6\Desktop\TEDTXTUNICODE\merge"
- foreach($file_eng in $SD_eng){
- $content=$content_eng=$content_chs=$null;
- foreach($file_chs in $SD_chs){
- if($file_eng.name -eq $file_chs.name){
- $content_eng=Get-Content -Path $file_eng.FullName;
- $content_chs=Get-Content -Path $file_chs.FullName;
- break;
- }#if
- }#for2
- if(($content_eng.Count-$content_chs.Count) -ge 0){$count=$content_eng.Count}else{$count=$content_chs.Count;}
- for($i=2;$i -le $count;$i+=4){
- if ($content_eng[$i-2] -ne $content_chs[$i-2]){-join('文件',$file_eng.name,"`t在第",(($i-2)/4+1),'行号不对应');}
- if ($content_eng[$i-1] -ne $content_chs[$i-1]){-join('文件',$file_eng.name,"`t在第",(($i-2)/4+1),'时间轴不对应');}
- if(!($content_eng[$i])){-join('文件',$file_eng.name,"`t在第",(($i-2)/4+1),'无英文字幕');}
- if(!($content_chs[$i])){-join('文件',$file_eng.name,"`t在第",(($i-2)/4+1),'无中文字幕');}
- $content+=-join($content_eng[$i]," ",$content_chs[$i]+"`r`n");
- }
- if($content){$OF_name=$OD_merge+'\'+$file_eng.name;$content|Out-File -FilePath $OF_name;}
- }
- pause;
复制代码
- @echo off
- Setlocal enabledelayedexpansion
- set "txtPath1=C:\Users\CH6\Desktop\TEDTXTUNICODE\eng1246"
- set "txtPath2=C:\Users\CH6\Desktop\TEDTXTUNICODE\chs1203"
- for /f "delims=" %%i in ('dir /b/l/aa %txtPath1%\*.txt') do (
- for /f "delims=" %%a in ('dir /b/l/aa %txtPath2%\*.txt') do (
- if "%%i"=="%%a" (
- echo,%%i %%a 组合后。
- for /f "usebackq tokens=1,* delims=:" %%t in (`findstr /n "." "%txtPath1%\%%i"`) do (
- for /f "usebackq tokens=1,* delims=:" %%c in (`findstr /n "." "%txtPath2%\%%a"`) do (
- if "%%c"=="%%t" echo,%%u %%d
- )
- )
- )
- )
- )
- pause
复制代码
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- public class BRDemo {
- public static void main(String args[]) throws IOException{
- String txtPath1 = "C:\\Users\\CH6\\Desktop\\TEDTXTUNICODE\\eng1246";
- String txtPath2 = "C:\\Users\\CH6\\Desktop\\TEDTXTUNICODE\\chs1203";
- File f1 = new File(txtPath1);
- File f2 = new File(txtPath2);
- File fs1[] = null;
- if(f1.isDirectory()){
- fs1 = f1.listFiles((File ff,String name) -> {if(name.endsWith(".txt")){return true;}return false;});
- }
- File fs2[] = null;
- if(f2.isDirectory()){
- fs2 = f2.listFiles((File ff,String name) -> {if(name.endsWith(".txt")){return true;}return false;});
- }
- for(File fa : fs1){
- System.out.println("------------------------");
- System.out.println(fa.getPath());
- for(File fb : fs2){
- if(fa.getName().equals(fb.getName())){
- BufferedReader bra = new BufferedReader(new FileReader(fa));
- BufferedReader brb = new BufferedReader(new FileReader(fb));
- int na = 0;
- int nb = 0;
- String braStr;
- String brbStr;
- while(bra.ready()){
- na++;
- while(brb.ready()){
- nb++;
- if(nb == na){
- System.out.println(bra.readLine() + " " + brb.readLine());
- }
- nb = 0;
- }
- na = 0;
- }
- }
- }
- }
- }
- }
2. 增加时间轴匹配,时间对应才合并,如果没有匹配到时间会提示 missing $time at $filename复制代码
- chdir '.\TEDTXTUNICODE' or
- chdir 'C:\Users\CH6\Desktop\TEDTXTUNICODE' or quit( $! );
=info 523066680@163.com 匹配时间轴,改善输出提示 =cut use Encode; use File::Basename; use Term::ReadKey; STDOUT->autoflush(1); chdir '.\TEDTXTUNICODE' or chdir 'C:\Users\CH6\Desktop\TEDTXTUNICODE' or quit( $! ); my $path_eng = '.\eng1246'; my $path_chs = '.\chs1203'; my $path_merge = '.\merge'; mkdir $path_merge unless -e $path_merge; my ($en, $cn, $merge); for my $cn ( glob "$path_chs\\*.txt" ) { $en = "$path_eng\\". basename($cn); $merge = "$path_merge\\". basename($cn); merge( $en, $cn, $merge ) if ( -e $en ); } quit("Done"); sub merge { my ( $en, $cn, $merge ) = @_; my ( %ha, %hb, $mix ); print "Processing $merge\n"; load( \%ha, $en ); load( \%hb, $cn ); $mix = ""; for my $time ( sort keys %ha ) { unless ( exists $hb{$time} ) { print " missing $time at $cn\n"; next; } $mix .= $ha{$time} ." ". $hb{$time} ."\r\n"; } open $fh, ">:raw", $merge; print $fh "\xff\xfe". encode('utf16-le', $mix); close $fh; } sub load { my ( $href, $file ) = @_; open my $fh, "<:encoding(utf16-le)", $file; my @arr = <$fh>; close $fh; for my $id ( 0 .. $#arr ) { if ( $arr[$id] =~/(\d+:\d+:\d+).*\d+:\d+:\d+/ ) { $href->{$1} = $arr[$id+1]; $href->{$1} =~s/\r?\n//; } } } sub quit { print $_[0]; ReadKey -1; exit; } |
Processing .\merge\Thor.txt
missing 00:00:54 at .\chs1203\Thor.txt
Now, I know what you're thinking. 我知道你在想什么
"Oh, no! Thor's in a cage. How did this happen?" 不 托尔被关在笼子里了 怎么回事
Well, sometimes you have to get captured 有时 你得先被抓住
just to get a straight answer out of somebody. 才能从某人那里问出个所以然来
It's a long story, but basically, I'm a bit of a hero. 说来话长 但其实 我算是个英雄
我现在的处理方式是按时间段匹配,如果没有对应翻译就不输出。复制代码
- Processing .\merge\A TED speakers worst nightmare.txt
- Processing .\merge\Adam Grosser - A mobile fridge for vaccines.txt
- missing 00:00:28 at .\chs1203\Adam Grosser - A mobile fridge for vaccines.txt
- missing 00:00:29 at .\chs1203\Adam Grosser - A mobile fridge for vaccines.txt
- missing 00:00:31 at .\chs1203\Adam Grosser - A mobile fridge for vaccines.txt
- missing 00:00:33 at .\chs1203\Adam Grosser - A mobile fridge for vaccines.txt
- missing 00:00:37 at .\chs1203\Adam Grosser - A mobile fridge for vaccines.txt
- ...
- Processing .\merge\Ahn Trio - A modern take on piano violin cello.txt
- Done
复制代码
- function MergeFiles($chsFile, $engFile){
- $hash = @{}; $s1 = $s2 = '';
- ForEach($strLine In (type $chsFile)){
- If($s1 -like '[0-9][0-9]:*') { $hash[$s1] = $strLine; }
- $s1 = $strLine;
- }
- ForEach($strLine In (type $engFile)){
- If($s2 -like '[0-9][0-9]:*'){$strLine + ' ' + $hash[$s2];}
- $s2 = $strLine;
- }
- }
- $chsDir = 'C:\Users\CH6\Desktop\TEDTXTUNICODE\chs1203';
- $engDir = 'C:\Users\CH6\Desktop\TEDTXTUNICODE\eng1246';
- $mergeDir = 'C:\Users\CH6\Desktop\TEDTXTUNICODE\merge';
- If(!(Test-Path $mergeDir)){$null = md $mergeDir;}
- ForEach($eng In (dir ($engDir + '\*.txt'))){
- $chsFile = $chsDir + '\' + $eng.Name;
- If(Test-Path $chsFile){
- $arr = MergeFiles $chsFile $eng.FullName;
- Set-Content ($mergeDir + '\' + $eng.Name) $arr -Enc unicode;
- }
- }
- [Console]::Write('Done');
- [Console]::ReadLine()
欢迎光临 批处理之家 (http://bbs.bathome.net/) | Powered by Discuz! 7.2 |