Back (Current repo: lowdown)

fork of lowdown from https://kristaps.bsd.lv
To clone this repository:
git clone https://git.viktor1993.net/lowdown.git
Log | Download | Files | Refs | LICENSE

libdiff.c (8137B)


/*
 * Copyright (c) 2013 Tatsuhiko Kubo <cubicdaiya@gmail.com>
 * Copyright (c) 2018 Kristaps Dzonsons <kristaps@bsd.lv>
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include "config.h"

#include <assert.h>
#include <stdlib.h>
#include <string.h>

#include "libdiff.h"

struct 	onp_coord {
	int		 x;
	int		 y;
	int		 k;
};

struct 	onp_diff {
	const void	 *a; /* shorter subsequence */
	const void	 *b; /* longer subsequence */
	size_t		  m; /* length of "a" */
	size_t	 	  n; /* length of "b" */
	diff_cmp	  cmp; /* comparison function */
	int		 *path;
	size_t	 	  delta;
	size_t	 	  offset;
	size_t	 	  size; /* matrix size */
	size_t		  sz; /* data element width */
	struct onp_coord *pathcoords;
	size_t		  pathcoordsz;
	int 		  swapped; /* seqs swapped from input */
	struct diff	 *result;
};

#define ONP_CMP(_d, _o1, _o2) \
	((_d)->cmp((_d)->a + (_d)->sz * (_o1), \
	           (_d)->b + (_d)->sz * (_o2)))

/*
 * Search shortest path and record the path.
 */
static int
onp_snake(struct onp_diff *diff, int k, int above, int below)
{
	int 	 r, y, x;
	void	*pp;

	y = above > below ? above : below;
	x = y - k;

	r = above > below ?
		diff->path[k - 1 + diff->offset] :
		diff->path[k + 1 + diff->offset];

	while (x < (int)diff->m && y < (int)diff->n &&
	       ONP_CMP(diff, x, y)) {
		++x;
		++y;
	}

	diff->path[k + diff->offset] = diff->pathcoordsz;

	pp = reallocarray
		(diff->pathcoords,
		 diff->pathcoordsz + 1,
		 sizeof(struct onp_coord));
	if (NULL == pp)
		return -1;
	diff->pathcoords = pp;

	assert(x >= 0);
	assert(y >= 0);

	diff->pathcoords[diff->pathcoordsz].x = x;
	diff->pathcoords[diff->pathcoordsz].y = y;
	diff->pathcoords[diff->pathcoordsz].k = r;
	diff->pathcoordsz++;

	return y;
}

static int
onp_addlcs(struct onp_diff *diff, const void *e)
{
	void	*pp;

	pp = reallocarray
		(diff->result->lcs,
		 diff->result->lcssz + 1,
		 sizeof(void *));
	if (NULL == pp)
		return 0;
	diff->result->lcs = pp;
	diff->result->lcs[diff->result->lcssz] = e;
	diff->result->lcssz++;
	return 1;
}

static int
onp_addses(struct onp_diff *diff, const void *e,
	size_t originIdx, size_t targetIdx, enum difft type)
{
	void	*pp;

	pp = reallocarray
		(diff->result->ses,
		 diff->result->sessz + 1,
		 sizeof(struct diff_ses));
	if (NULL == pp)
		return 0;
	diff->result->ses = pp;
	diff->result->ses[diff->result->sessz].originIdx = originIdx;
	diff->result->ses[diff->result->sessz].targetIdx = targetIdx;
	diff->result->ses[diff->result->sessz].type = type;
	diff->result->ses[diff->result->sessz].e = e;
	diff->result->sessz++;
	return 1;
}

static int
onp_genseq(struct onp_diff *diff, const struct onp_coord* v, size_t vsz)
{
	size_t		 xpos, ypos;
	size_t         	 x_idx,  y_idx;  /* offset+1 numbers */
	int		 px_idx, py_idx; /* coordinates */
	int		 complete = 0;
	int		 rc;
	size_t		 i;

	x_idx = y_idx = 1;
	px_idx = py_idx = 0;
	xpos = ypos = 0;

	assert(vsz);

	for (i = vsz - 1; ! complete; --i) {
		while (px_idx < v[i].x || py_idx < v[i].y) {
			if (v[i].y - v[i].x > py_idx - px_idx) {
				rc = ! diff->swapped ?
					onp_addses(diff,
					 diff->b + (ypos * diff->sz),
					 0, y_idx, DIFF_ADD) :
					onp_addses(diff,
					 diff->b + (ypos * diff->sz),
					 y_idx, 0, DIFF_DELETE);
				++ypos;
				++y_idx;
				++py_idx;
			} else if (v[i].y - v[i].x < py_idx - px_idx) {
				rc = ! diff->swapped ?
					onp_addses(diff,
					 diff->a + (xpos * diff->sz),
					 x_idx, 0, DIFF_DELETE) :
					onp_addses(diff,
					 diff->a + (xpos * diff->sz),
					 0, x_idx, DIFF_ADD);
				++xpos;
				++x_idx;
				++px_idx;
			} else {
				rc = ! diff->swapped ?
					onp_addses(diff,
					 diff->a + (xpos * diff->sz),
					 x_idx, y_idx, DIFF_COMMON) :
					onp_addses(diff,
					 diff->b + (ypos * diff->sz),
					 y_idx, x_idx, DIFF_COMMON);
				if (rc)
					rc = ! diff->swapped ?
					  onp_addlcs(diff, diff->a +
						(xpos * diff->sz)) :
					  onp_addlcs(diff, diff->b +
						(ypos * diff->sz));
				++xpos;
				++ypos;
				++x_idx;
				++y_idx;
				++px_idx;
				++py_idx;
			}
			if ( ! rc)
				return -1;
		}
		complete = 0 == i;
	}

	return x_idx > diff->m && y_idx > diff->n;
}

static struct onp_diff *
onp_alloc(diff_cmp cmp, size_t sz,
	const void *a, size_t alen,
	const void *b, size_t blen)
{
	struct onp_diff *diff;

	diff = calloc(1, sizeof(struct onp_diff));

	if (NULL == diff)
		return NULL;

	if (alen > blen) {
		diff->a = b;
		diff->b = a;
		diff->m = blen;
		diff->n = alen;
		diff->swapped = 1;
	} else {
		diff->a = a;
		diff->b = b;
		diff->m = alen;
		diff->n = blen;
		diff->swapped = 0;
	}

	assert(diff->n >= diff->m);
	diff->cmp = cmp;
	diff->sz = sz;
	diff->delta = diff->n - diff->m;
	diff->offset = diff->m + 1;
	diff->size = diff->m + diff->n + 3;

	return diff;
}

static void
onp_free(struct onp_diff *diff)
{

	free(diff->path);
	free(diff->pathcoords);
	free(diff);
}

static int
onp_compose(struct onp_diff *diff, struct diff *result)
{
	int		 rc = 0;
	int		 p = -1;
	int		 k;
	int		*fp = NULL;
	int		 r;
	struct onp_coord	*epc = NULL;
	size_t		 epcsz = 0;
	size_t		 i;
	void		*pp;

	/* Initialise the path from origin to target. */

	fp = malloc(sizeof(int) * diff->size);
	diff->path = malloc(sizeof(int) * diff->size);
	diff->result = result;

	if (NULL == fp || NULL == diff->path)
		goto out;

	for (i = 0; i < diff->size; i++)
		fp[i] = diff->path[i] = -1;

	/*
	 * Run the actual algorithm.
	 * This computes the full path in diff->path from the origin to
	 * the target.
	 */

	do {
		p++;
		for (k = -p;
		     k <= (ssize_t)diff->delta - 1; k++) {
			fp[k + diff->offset] = onp_snake(diff, k,
				fp[k - 1 + diff->offset] + 1,
				fp[k + 1 + diff->offset]);
			if (fp[k + diff->offset] < 0)
				goto out;
		}
		for (k = diff->delta + p;
		     k >= (ssize_t)diff->delta + 1; k--) {
			fp[k + diff->offset] = onp_snake(diff, k,
				fp[k - 1 + diff->offset] + 1,
				fp[k + 1 + diff->offset]);
			if (fp[k + diff->offset] < 0)
				goto out;
		}

		fp[diff->delta + diff->offset] =
			onp_snake(diff, diff->delta,
				fp[diff->delta - 1 + diff->offset] + 1,
				fp[diff->delta + 1 + diff->offset]);
		if (fp[diff->delta + diff->offset] < 0)
			goto out;
	} while (fp[diff->delta + diff->offset] != (ssize_t)diff->n);

	/* Now compute edit distance. */

	assert(p >= 0);
	diff->result->editdist = diff->delta + 2 * p;

	/*
	 * Here we compute the shortest edit script and the least common
	 * subsequence from the path.
	 */

	r = diff->path[diff->delta + diff->offset];

	while(-1 != r) {
		pp = reallocarray
			(epc, epcsz + 1,
			 sizeof(struct onp_coord));
		if (NULL == pp)
			goto out;
		epc = pp;
		epc[epcsz].x = diff->pathcoords[r].x;
		epc[epcsz].y = diff->pathcoords[r].y;
		epcsz++;
		r = diff->pathcoords[r].k;
	}

	if (epcsz)
		onp_genseq(diff, epc, epcsz);

	rc = 1;
out:
	free(fp);
	free(epc);
	return rc;
}

int
diff(struct diff *d, diff_cmp cmp, size_t size,
	const void *base1, size_t nmemb1,
	const void *base2, size_t nmemb2)
{
	struct onp_diff	*p;
	int		 rc;

	p = onp_alloc(cmp, size, base1, nmemb1, base2, nmemb2);
	if (NULL == p)
		return 0;

	rc = onp_compose(p, d);
	onp_free(p);
	return rc;
}