libdiff.c (8137B)
/*
* Copyright (c) 2013 Tatsuhiko Kubo <cubicdaiya@gmail.com>
* Copyright (c) 2018 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "config.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "libdiff.h"
struct onp_coord {
int x;
int y;
int k;
};
struct onp_diff {
const void *a; /* shorter subsequence */
const void *b; /* longer subsequence */
size_t m; /* length of "a" */
size_t n; /* length of "b" */
diff_cmp cmp; /* comparison function */
int *path;
size_t delta;
size_t offset;
size_t size; /* matrix size */
size_t sz; /* data element width */
struct onp_coord *pathcoords;
size_t pathcoordsz;
int swapped; /* seqs swapped from input */
struct diff *result;
};
#define ONP_CMP(_d, _o1, _o2) \
((_d)->cmp((_d)->a + (_d)->sz * (_o1), \
(_d)->b + (_d)->sz * (_o2)))
/*
* Search shortest path and record the path.
*/
static int
onp_snake(struct onp_diff *diff, int k, int above, int below)
{
int r, y, x;
void *pp;
y = above > below ? above : below;
x = y - k;
r = above > below ?
diff->path[k - 1 + diff->offset] :
diff->path[k + 1 + diff->offset];
while (x < (int)diff->m && y < (int)diff->n &&
ONP_CMP(diff, x, y)) {
++x;
++y;
}
diff->path[k + diff->offset] = diff->pathcoordsz;
pp = reallocarray
(diff->pathcoords,
diff->pathcoordsz + 1,
sizeof(struct onp_coord));
if (NULL == pp)
return -1;
diff->pathcoords = pp;
assert(x >= 0);
assert(y >= 0);
diff->pathcoords[diff->pathcoordsz].x = x;
diff->pathcoords[diff->pathcoordsz].y = y;
diff->pathcoords[diff->pathcoordsz].k = r;
diff->pathcoordsz++;
return y;
}
static int
onp_addlcs(struct onp_diff *diff, const void *e)
{
void *pp;
pp = reallocarray
(diff->result->lcs,
diff->result->lcssz + 1,
sizeof(void *));
if (NULL == pp)
return 0;
diff->result->lcs = pp;
diff->result->lcs[diff->result->lcssz] = e;
diff->result->lcssz++;
return 1;
}
static int
onp_addses(struct onp_diff *diff, const void *e,
size_t originIdx, size_t targetIdx, enum difft type)
{
void *pp;
pp = reallocarray
(diff->result->ses,
diff->result->sessz + 1,
sizeof(struct diff_ses));
if (NULL == pp)
return 0;
diff->result->ses = pp;
diff->result->ses[diff->result->sessz].originIdx = originIdx;
diff->result->ses[diff->result->sessz].targetIdx = targetIdx;
diff->result->ses[diff->result->sessz].type = type;
diff->result->ses[diff->result->sessz].e = e;
diff->result->sessz++;
return 1;
}
static int
onp_genseq(struct onp_diff *diff, const struct onp_coord* v, size_t vsz)
{
size_t xpos, ypos;
size_t x_idx, y_idx; /* offset+1 numbers */
int px_idx, py_idx; /* coordinates */
int complete = 0;
int rc;
size_t i;
x_idx = y_idx = 1;
px_idx = py_idx = 0;
xpos = ypos = 0;
assert(vsz);
for (i = vsz - 1; ! complete; --i) {
while (px_idx < v[i].x || py_idx < v[i].y) {
if (v[i].y - v[i].x > py_idx - px_idx) {
rc = ! diff->swapped ?
onp_addses(diff,
diff->b + (ypos * diff->sz),
0, y_idx, DIFF_ADD) :
onp_addses(diff,
diff->b + (ypos * diff->sz),
y_idx, 0, DIFF_DELETE);
++ypos;
++y_idx;
++py_idx;
} else if (v[i].y - v[i].x < py_idx - px_idx) {
rc = ! diff->swapped ?
onp_addses(diff,
diff->a + (xpos * diff->sz),
x_idx, 0, DIFF_DELETE) :
onp_addses(diff,
diff->a + (xpos * diff->sz),
0, x_idx, DIFF_ADD);
++xpos;
++x_idx;
++px_idx;
} else {
rc = ! diff->swapped ?
onp_addses(diff,
diff->a + (xpos * diff->sz),
x_idx, y_idx, DIFF_COMMON) :
onp_addses(diff,
diff->b + (ypos * diff->sz),
y_idx, x_idx, DIFF_COMMON);
if (rc)
rc = ! diff->swapped ?
onp_addlcs(diff, diff->a +
(xpos * diff->sz)) :
onp_addlcs(diff, diff->b +
(ypos * diff->sz));
++xpos;
++ypos;
++x_idx;
++y_idx;
++px_idx;
++py_idx;
}
if ( ! rc)
return -1;
}
complete = 0 == i;
}
return x_idx > diff->m && y_idx > diff->n;
}
static struct onp_diff *
onp_alloc(diff_cmp cmp, size_t sz,
const void *a, size_t alen,
const void *b, size_t blen)
{
struct onp_diff *diff;
diff = calloc(1, sizeof(struct onp_diff));
if (NULL == diff)
return NULL;
if (alen > blen) {
diff->a = b;
diff->b = a;
diff->m = blen;
diff->n = alen;
diff->swapped = 1;
} else {
diff->a = a;
diff->b = b;
diff->m = alen;
diff->n = blen;
diff->swapped = 0;
}
assert(diff->n >= diff->m);
diff->cmp = cmp;
diff->sz = sz;
diff->delta = diff->n - diff->m;
diff->offset = diff->m + 1;
diff->size = diff->m + diff->n + 3;
return diff;
}
static void
onp_free(struct onp_diff *diff)
{
free(diff->path);
free(diff->pathcoords);
free(diff);
}
static int
onp_compose(struct onp_diff *diff, struct diff *result)
{
int rc = 0;
int p = -1;
int k;
int *fp = NULL;
int r;
struct onp_coord *epc = NULL;
size_t epcsz = 0;
size_t i;
void *pp;
/* Initialise the path from origin to target. */
fp = malloc(sizeof(int) * diff->size);
diff->path = malloc(sizeof(int) * diff->size);
diff->result = result;
if (NULL == fp || NULL == diff->path)
goto out;
for (i = 0; i < diff->size; i++)
fp[i] = diff->path[i] = -1;
/*
* Run the actual algorithm.
* This computes the full path in diff->path from the origin to
* the target.
*/
do {
p++;
for (k = -p;
k <= (ssize_t)diff->delta - 1; k++) {
fp[k + diff->offset] = onp_snake(diff, k,
fp[k - 1 + diff->offset] + 1,
fp[k + 1 + diff->offset]);
if (fp[k + diff->offset] < 0)
goto out;
}
for (k = diff->delta + p;
k >= (ssize_t)diff->delta + 1; k--) {
fp[k + diff->offset] = onp_snake(diff, k,
fp[k - 1 + diff->offset] + 1,
fp[k + 1 + diff->offset]);
if (fp[k + diff->offset] < 0)
goto out;
}
fp[diff->delta + diff->offset] =
onp_snake(diff, diff->delta,
fp[diff->delta - 1 + diff->offset] + 1,
fp[diff->delta + 1 + diff->offset]);
if (fp[diff->delta + diff->offset] < 0)
goto out;
} while (fp[diff->delta + diff->offset] != (ssize_t)diff->n);
/* Now compute edit distance. */
assert(p >= 0);
diff->result->editdist = diff->delta + 2 * p;
/*
* Here we compute the shortest edit script and the least common
* subsequence from the path.
*/
r = diff->path[diff->delta + diff->offset];
while(-1 != r) {
pp = reallocarray
(epc, epcsz + 1,
sizeof(struct onp_coord));
if (NULL == pp)
goto out;
epc = pp;
epc[epcsz].x = diff->pathcoords[r].x;
epc[epcsz].y = diff->pathcoords[r].y;
epcsz++;
r = diff->pathcoords[r].k;
}
if (epcsz)
onp_genseq(diff, epc, epcsz);
rc = 1;
out:
free(fp);
free(epc);
return rc;
}
int
diff(struct diff *d, diff_cmp cmp, size_t size,
const void *base1, size_t nmemb1,
const void *base2, size_t nmemb2)
{
struct onp_diff *p;
int rc;
p = onp_alloc(cmp, size, base1, nmemb1, base2, nmemb2);
if (NULL == p)
return 0;
rc = onp_compose(p, d);
onp_free(p);
return rc;
}