24h購物| | PChome| 登入
2012-10-27 21:24:07| 人氣3,916| 回應1 | 上一篇 | 下一篇

[演算法][程式作業] Edit distance

推薦 0 收藏 0 轉貼0 訂閱站台



課本 page. 406

15-5 Edit distance

Inorder to transform one source string of text x [1 ¬ m] to a target string y [1¬ n], we can perform various transformation operations. Our goal is, given xand y, to produce a series of transformations that change x to y. We use anarray z—assumed to be large enough to hold all the characters it will need—tohold the intermediate results. Initially, z is empty, and at termination, weshould have z[j] = y[j] for j = 1, 2, ..., n. We maintain current indices iinto x and j into z, and the operations are allowed to alter z and theseindices. Initially, i = j = 1. We are required to examine every character in xduring the transformation, which means that at the end of the sequence oftransformation operations, we must have i = m + 1. There are six transformationoperations:
• Copy a character from x to z by setting z[j] ← x[i] and then incrementingboth i and j. This operation examines x[i].
• Replace a character from x by another character c, by setting z[j] ← c, andthen incrementing both i and j. This operation examines x[i].
• Delete a character from x by incrementing i but leaving j alone. Thisoperation examines x[i].
• Insert the character c into z by setting z[j] ← c and then incrementing j,but leaving i alone. This operation examines no characters of x.
• Twiddle (i.e., exchange) the next two characters by copying them from x to zbut in the opposite order; we do so by setting z[j] ← x[i + 1] and z[j + 1] ←x[i] and then setting i ← i + 2 and j ← j + 2. This operation examines x[i] andx [i + 1].
• Kill the remainder of x by setting i ← m + 1. This operation examines allcharacters in x that have not yet been examined. If this operation isperformed, it must be the final operation.


效率 O(n*m), 挺差的

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;
int dp[1000][1000], lx, ly;
int arg_dp[1000][1000];
char x[1000], y[1000], buf[1000];
void printState(int ti, int tj) {
    printf("%-15s", buf);
    static int i;
    for(i = 0; i < lx; i++) {
        if(i == ti) putchar('[');
        printf("%c", x[i]);
        if(i == ti) putchar(']');
    }
    if(i == ti) printf("[]");
    for(; i < 20; i++)  putchar(' ');
    for(i = 0; i < tj; i++)
        printf("%c", y[i]);
    printf("[]");
    puts("");
}
void print(int i, int j) {
    if(i == 0 && j == 0) {
        sprintf(buf, "initial");
        printState(i, j);
        return;
    }
    if(arg_dp[i][j] == -1) {
        print(i-1, j-1);
        sprintf(buf, "copy");
    } else if(arg_dp[i][j] == -2) {
        print(i-1, j-1);
        sprintf(buf, "replace by %c", y[j-1]);
    } else if(arg_dp[i][j] == -3) {
        print(i-1, j);
        sprintf(buf, "delete");
    } else if(arg_dp[i][j] == -4) {
        print(i, j-1);
        sprintf(buf, "insert %c", y[j-1]);
    } else if(arg_dp[i][j] == -5) {
        print(i-2, j-2);
        sprintf(buf, "twiddle");
    } else {
        print(arg_dp[i][j], j);
        sprintf(buf, "kill");
    }
    printState(i, j);
}/*
algorithm altruistic
*/
int main() {
    int i, j;
    while(scanf("%s %s", x, y) == 2) {
        lx = strlen(x), ly = strlen(y);
        for(i = 0; i <= lx; i++) {
            for(j = 0; j <= ly; j++) {
                dp[i][j] = 0xffff;
            }
        }
        dp[0][0] = 0;
        int cost[6] = {1,1,1,1,1,1};
        for(i = 0; i <= lx; i++) {
            for(j = 0; j <= ly; j++) {
                //copy
                if(x[i] == y[j]) {
                    dp[i+1][j+1] = min(dp[i+1][j+1], dp[i][j]+cost[0]);
                    if(dp[i+1][j+1] == dp[i][j]+cost[0])
                        arg_dp[i+1][j+1] = -1;
                }
                //replace
                if(x[i] != y[j]) {
                    dp[i+1][j+1] = min(dp[i+1][j+1], dp[i][j]+cost[1]);
                    if(dp[i+1][j+1] == dp[i][j]+cost[1])
                        arg_dp[i+1][j+1] = -2;
                }
                //delete
                dp[i+1][j] = min(dp[i+1][j], dp[i][j]+cost[2]);
                if(dp[i+1][j] == dp[i][j]+cost[2])
                    arg_dp[i+1][j] = -3;
                //insert
                dp[i][j+1] = min(dp[i][j+1], dp[i][j]+cost[3]);
                if(dp[i][j+1] == dp[i][j]+cost[3])
                    arg_dp[i][j+1] = -4;
                //twiddle
                if(x[i] == y[j+1] && x[i+1] == y[j]) {
                    dp[i+2][j+2] = min(dp[i+2][j+2], dp[i][j]+cost[4]);
                    if(dp[i+2][j+2] == dp[i][j]+cost[4])
                        arg_dp[i+2][j+2] = -5;
                }
                //kill
                dp[lx][j] = min(dp[lx][j], dp[i][j]+cost[5]);
                if(dp[lx][j] == dp[i][j]+cost[5])
                    arg_dp[lx][j] = i;
            }
        }
        printf("min cost %d\n", dp[lx][ly]);
        print(lx, ly);
    }
    return 0;
}


後面有一個延伸問題 DNA Sequence Alignment, 利用原本的 edit distance 求此問題, 很明顯地就是 copy, replace delete 的花費對照
O(n*n)

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;
int dp[1000][1000], lx, ly;
int arg_dp[1000][1000];
char x[1000], y[1000], buf[1000];
char mx[1000], my[1000], *px, *py;
void print(int i, int j) {
    if(i == 0 && j == 0)
        return;
    if(arg_dp[i][j] == -1) {
        print(i-1, j-1);
        *px = x[i-1], *py = y[j-1];
        px++, py++;
    } else if(arg_dp[i][j] == -2) {
        print(i-1, j-1);
        *px = x[i-1], *py = y[j-1];
        px++, py++;
    } else if(arg_dp[i][j] == -3) {
        print(i-1, j);
        *px = x[i-1], *py = ' ';
        px++, py++;
    } else if(arg_dp[i][j] == -4) {
        print(i, j-1);
        *px = ' ', *py = y[j-1];
        px++, py++;
    }
}/*
GATCGGCAT CAATGTGAATC
CAATGTGAATC GATCGGCAT
*/
int main() {
    int i, j;
    while(scanf("%s %s", x, y) == 2) {
        lx = strlen(x), ly = strlen(y);
        for(i = 0; i <= lx; i++) {
            for(j = 0; j <= ly; j++) {
                dp[i][j] = -0xffff;
            }
        }
        dp[0][0] = 0;
        int cost[6] = {1,-1,-2,-2};
        for(i = 0; i <= lx; i++) {
            for(j = 0; j <= ly; j++) {
                //copy
                if(x[i] == y[j]) {
                    dp[i+1][j+1] = max(dp[i+1][j+1], dp[i][j]+cost[0]);
                    if(dp[i+1][j+1] == dp[i][j]+cost[0])
                        arg_dp[i+1][j+1] = -1;
                }
                //replace
                if(x[i] != y[j]) {
                    dp[i+1][j+1] = max(dp[i+1][j+1], dp[i][j]+cost[1]);
                    if(dp[i+1][j+1] == dp[i][j]+cost[1])
                        arg_dp[i+1][j+1] = -2;
                }
                //delete
                dp[i+1][j] = max(dp[i+1][j], dp[i][j]+cost[2]);
                if(dp[i+1][j] == dp[i][j]+cost[2])
                    arg_dp[i+1][j] = -3;
                //insert
                dp[i][j+1] = max(dp[i][j+1], dp[i][j]+cost[3]);
                if(dp[i][j+1] == dp[i][j]+cost[3])
                    arg_dp[i][j+1] = -4;
            }
        }
        printf("max score %d\n", dp[lx][ly]);
        px = mx, py = my;
        print(lx, ly);
        *px = '\0', *py = '\0';
        printf("%s\n%s\n", mx, my);
    }
    return 0;
}


台長: Morris
人氣(3,916) | 回應(1)| 推薦 (0)| 收藏 (0)| 轉寄
全站分類: 不分類 | 個人分類: 其他題目 |
此分類下一篇:[演算法][程式作業] huffman code 壓縮與解壓縮
此分類上一篇:[資料結構][HW] 走迷宮 STACK 找出所有路徑

xem phim online
nice article...
2017-10-03 14:15:58
是 (若未登入"個人新聞台帳號"則看不到回覆唷!)
* 請輸入識別碼:
請輸入圖片中算式的結果(可能為0) 
(有*為必填)
TOP
詳全文