source: trunk/poppler/mypoppler/poppler/ABWOutputDev.cc @ 250

Last change on this file since 250 was 250, checked in by Eugene Romanenko, 13 years ago

PDF plugin: poppler library updated to version 0.8.3

File size: 45.4 KB
Line 
1//========================================================================
2//
3// ABWOutputDev.cc
4//
5// Jauco Noordzij
6//
7// Based somewhat on HtmlOutputDev.cc
8//
9//========================================================================
10
11#ifdef __GNUC__
12#pragma implementation
13#endif
14
15#include "config.h"
16#include <stdio.h>
17#include <stdlib.h>
18#include <stdarg.h>
19#include <stddef.h>
20#include <ctype.h>
21#include <math.h>
22#include "goo/GooString.h"
23#include "goo/GooList.h"
24#include "UnicodeMap.h"
25#include "goo/gmem.h"
26#include "Error.h"
27#include "GfxState.h"
28#include "GlobalParams.h"
29#include "ABWOutputDev.h"
30#include "PDFDoc.h"
31
32#include <libxml/parser.h>
33#include <libxml/tree.h>
34#include <libxml/xpath.h>
35#include <libxml/xpathInternals.h>
36
37
38// Inter-character space width which will cause addChar to start a new
39// word.
40#define minWordBreakSpace 0.1
41
42// Maximum inter-word spacing, as a fraction of the font size.
43#define maxWordSpacing 1.5
44
45// Max distance between baselines of two lines within a block, as a
46// fraction of the font size.
47#define maxLineSpacingDelta 1.5
48
49#define C_maxVCutValue 4
50#define C_maxHCutValue 5
51//------------------------------------------------------------------------
52// ABWOutputDev
53//------------------------------------------------------------------------
54
55ABWOutputDev::ABWOutputDev(xmlDocPtr ext_doc)
56{
57  pdfdoc = NULL;
58  N_page = N_style = N_text = N_styleset = N_Block = N_word = NULL;
59  doc = ext_doc;
60  N_root = xmlNewNode(NULL, BAD_CAST "abiword");
61  xmlDocSetRootElement(doc, N_root);
62  N_styleset = xmlNewChild(N_root, NULL, BAD_CAST "styles", NULL);
63  N_content = xmlNewChild(N_root, NULL, BAD_CAST "content", NULL);
64  uMap = globalParams->getTextEncoding();
65  maxStyle = Style = 1;
66}
67
68ABWOutputDev::~ABWOutputDev() {
69  xmlCleanupParser();
70}
71
72void ABWOutputDev::startPage(int pageNum, GfxState *state) {
73  /*While reading a pdf page this node acts as a placeholder parent.
74  when conversion is finished and the page is structured as we like it
75  all text fragments are moved from N_page to N_content.*/
76  N_page = xmlNewNode(NULL, BAD_CAST "page");
77  G_pageNum = pageNum;
78} 
79
80/*Callback to denote that poppler reached the end of a page
81here I insert most of the interesting processing stuff*/
82void ABWOutputDev::endPage() {
83  //make sure all words are closed
84  endTextBlock();
85  cleanUpNode(N_page, true);
86  //xmlAddChild(N_content, N_page);
87  //xmlSaveFormatFileEnc("pre-cut.xml", doc, "UTF-8", 1);
88  //xmlUnlinkNode(N_page);
89  //call the top down cutting mechanism
90  recursiveXYC(N_page);
91  //by stopping to worry about creating empty nodes I made the code quite a
92  //bit more robust. This function makes sure we have a nice'n'clean tree
93  cleanUpNode(N_page, true);
94  //xmlAddChild(N_content, N_page);
95  //xmlSaveFormatFileEnc("raw.xml", doc, "UTF-8", 1);
96  //xmlUnlinkNode(N_page);
97 
98  //Interpret the XY tree and infer text blocks and columns
99  interpretXYTree();
100  cleanUpNode(N_page, true);
101  //xmlAddChild(N_content, N_page);
102  //xmlSaveFormatFileEnc("interpreted.xml", doc, "UTF-8", 1);
103  //xmlUnlinkNode(N_page);
104 
105  //I have blocks and columns, this function will turn that into paragraphs and
106  //columns
107  generateParagraphs();
108  cleanUpNode(N_page, true);
109  xmlAddChild(N_content, N_page);
110  N_page = NULL;
111}
112
113void ABWOutputDev::recursiveXYC(xmlNodePtr nodeset) {
114  /*This function implements the recursive XY Cut. basically, it gets
115  the largest piece of whitespace (using getBiggestSeperator()) and then
116  splits the page using splitNodes on that whitespace. It calls itself again
117  with both the halves*/
118  float bhs, bvs, X1, X2, Y1, Y2;
119
120  bvs = getBiggestSeperator(nodeset, VERTICAL, &X1, &X2);
121  bhs = getBiggestSeperator(nodeset, HORIZONTAL, &Y1, &Y2);
122 
123  if (bvs == -1){
124    if (bhs == -1){//both -1
125      //FIXME: add assertions that bvs and bhs are >=-1
126      printf("No seperators\n");
127      return;
128    }
129    else { //only bhs > -1
130      splitNodes(Y1, HORIZONTAL, nodeset, bhs);
131    }
132  }
133  else {
134    if (bhs == -1){//only bvs > -1
135      splitNodes(X1, VERTICAL, nodeset, bvs);
136    }
137    else {//both > -1
138      if (bvs >= (bhs/1.7)){
139        //When people read a text they prefer vertical cuts over horizontal
140        //ones. I'm not that sure about the 1.7 value, but it seems to work.
141        splitNodes(X1, VERTICAL, nodeset, bvs);
142      }
143      else {
144        splitNodes(Y1, HORIZONTAL, nodeset, bhs);
145      }
146    }
147  }
148  recursiveXYC(nodeset->children);
149  recursiveXYC(nodeset->children->next);
150}
151
152void ABWOutputDev::splitNodes(float splitValue, unsigned int direction, xmlNodePtr N_parent, double seperator){
153  //This function takes a nodeset and splits it based on a cut value. It returns
154  //the nodePtr with two childnodes, the both chunks.
155  xmlNodePtr N_move, N_cur, N_newH, N_newL;
156  char * propName;
157  const char *nodeName;
158  char buf[20];
159  if (direction == HORIZONTAL) {
160    propName = "Y1"; 
161    nodeName = "horizontal";
162  }
163  else { 
164    propName = "X1"; 
165    nodeName = "vertical";
166  }
167  N_newH = xmlNewNode(NULL, BAD_CAST nodeName);
168  N_newL = xmlNewNode(NULL, BAD_CAST nodeName);
169  sprintf(buf, "%f", seperator); 
170  xmlNewProp(N_newH, BAD_CAST "diff", BAD_CAST buf);
171  sprintf(buf, "%f", seperator); 
172  xmlNewProp(N_newL, BAD_CAST "diff", BAD_CAST buf);
173  N_cur = N_parent->children;
174  while (N_cur){
175    N_move = N_cur->next;
176    xmlUnlinkNode(N_cur);
177    if (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST propName)) > splitValue){
178      xmlAddChild(N_newH, N_cur);
179    }
180    else {
181      xmlAddChild(N_newL, N_cur);
182    }
183    N_cur = N_move;
184  }
185  xmlAddChild(N_parent, N_newL);
186  xmlAddChild(N_parent, N_newH);
187}
188
189float ABWOutputDev::getBiggestSeperator(xmlNodePtr N_set, unsigned int direction, float * C1, float * C2)
190{
191  int i = 0;
192  int nodeCount = xmlLsCountNode(N_set);
193  float store;
194  int min;
195  float gap, endV;
196  float * stt;
197  float * end;
198  if (nodeCount == 0){
199    //Add assertion that this shouldn't happen
200    fprintf(stderr,"No child nodes");
201    return -1;
202  }
203  stt = new float[nodeCount];
204  end = new float[nodeCount];
205  //store all variables in two arrays (one for start, one for end coordinates)
206  if (direction == VERTICAL) {
207    for (xmlNodePtr N_cur = N_set->children; N_cur != NULL; N_cur = N_cur->next){
208      stt[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1"));
209      end[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2"));
210      i++;
211    }
212  }
213  else {
214    for (xmlNodePtr N_cur = N_set->children; N_cur != NULL; N_cur = N_cur->next){
215      stt[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1"));
216      end[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2"));
217      i++;
218    }
219  }
220  //Sort them
221  for (i = 0; i < nodeCount - 1; i++){
222    min = i;
223    for (int j = i + 1; j < nodeCount; j++)
224      if (stt[j] < stt[i])
225        min = j;
226    store = stt[i];
227    stt[i] = stt[min];
228    stt[min] = store;
229    store = end[i];
230    end[i] = end[min];
231    end[min] = store;
232  }
233  //find the largest gap
234  gap = -1;
235  endV = end[0];
236  *C1 = 0;
237  *C2 = 0;
238  for (int inspect = 1; inspect < nodeCount; inspect++){
239    //no gap
240    if (((stt[inspect] - endV) - gap) < 0.5){ //FIXME:This is copied almost directly from the previous function, needs checking out
241      //partial overlap instead of complete one
242      if (end[inspect] > endV)
243        endV = end[inspect];
244    }
245    //gap
246    else{
247      //gap is larger than any previous gap
248      if (gap < (stt[inspect] - endV)){
249        gap = stt[inspect] - endV;
250        *C1 = endV;
251        *C2 = stt[inspect];
252      }
253      endV = end[inspect];
254    }
255  }
256  return gap;
257}
258
259void ABWOutputDev::updateFont(GfxState *state) {
260  char buf[160];
261  xmlNodePtr N_cur;
262  GfxFont *font;
263  bool found = false;
264  bool isBold, isItalic, S_isBold, S_isItalic;
265  isBold = isItalic = S_isBold =  S_isItalic = false;
266  font = state->getFont();
267  GooString *ftName;
268  char *fnEnd, *fnName;
269  int fnStart, ftSize;
270  //the first time this function is called there is no funt.
271  //Fixme: find out if that isn'y a bug
272  if (font){
273    isBold = (font->isBold() || font->getWeight() >6 || (strstr(font->getOrigName()->getCString(), "Bold")-font->getOrigName()->getCString() == (font->getOrigName()->getLength()-4)));
274    isItalic =  (font->isItalic() || (strstr(font->getOrigName()->getCString(), "Italic")-font->getOrigName()->getCString() == (font->getOrigName()->getLength()-6)));
275    ftSize = int(state->getTransformedFontSize())-1;
276    ftName = new GooString(font->getOrigName());
277    fnStart = strcspn(ftName->getCString(), "+");
278    if (fnStart < ftName->getLength())
279      ftName->del(0,fnStart+1);
280    fnEnd = strrchr(ftName->getCString(), 44);
281    if (fnEnd == 0)
282      fnEnd = strrchr(ftName->getCString(), 45);
283    if (fnEnd != 0)
284      ftName->del(fnEnd-ftName->getCString(),ftName->getLength()-1);
285   
286/*    fnName = ftName;
287    if (isBold or isItalic){
288      fnStart = strcspn(fnName, "+");
289      if (fnStart == font->getOrigName()->getLength())
290        fnStart = 0;
291      else fnStart++;
292
293      fnEnd = strstr(fnName, ",");
294      if (fnEnd == 0)
295        fnEnd = strstr(fnName, "-");
296      if (fnEnd != 0)
297        fnName[fnEnd-fnName] = 0;
298//      char fntName[fnLength];
299//      strncpy (fntName,fnName+fnStart+1,fnLength);
300      fnName+=fnStart;
301//      fnName = fntName;
302    }
303    else {*/
304      fnName = ftName->getCString();
305//    }
306    for (N_cur = N_styleset->children; N_cur; N_cur = N_cur ->next){
307      if (
308       isBold == (xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "bold"),BAD_CAST "bold;") == 0)
309       &&
310       isItalic == (xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "italic"),BAD_CAST "italic") == 0)
311       &&
312       xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "font"),BAD_CAST fnName) == 0
313       &&
314       xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "size")) == ftSize
315      ) {
316        found = true;
317        Style = int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "id")));
318      }
319    }
320    if (!found){
321      N_cur = xmlNewChild(N_styleset, NULL, BAD_CAST "s", NULL);
322      xmlSetProp(N_cur, BAD_CAST "type", BAD_CAST "P");
323      sprintf(buf, "%d", maxStyle++);
324      xmlSetProp(N_cur, BAD_CAST "name", BAD_CAST buf);
325      xmlSetProp(N_cur, BAD_CAST "id", BAD_CAST buf);
326      Style = maxStyle;
327      sprintf(buf, "%d", ftSize); xmlSetProp(N_cur, BAD_CAST "size", BAD_CAST buf);
328      isBold   ? xmlSetProp(N_cur, BAD_CAST "bold", BAD_CAST "bold;")  : xmlSetProp(N_cur, BAD_CAST "bold", BAD_CAST "normal;");
329      isItalic ? xmlSetProp(N_cur, BAD_CAST "italic", BAD_CAST "italic"): xmlSetProp(N_cur, BAD_CAST "italic", BAD_CAST "normal");
330      xmlSetProp(N_cur, BAD_CAST "font", BAD_CAST fnName);
331    }
332  }
333}
334
335void ABWOutputDev::drawChar(GfxState *state, double x, double y,
336                        double dx, double dy,
337                        double originX, double originY,
338                        CharCode code, int nBytes, Unicode *u, int uLen)
339{
340  //I wouldn't know what size this should safely be. I guess 64 bytes should be
341  //enough for any unicode character
342  char buf[64];
343  int charLen;
344  x = dx;
345  y = dy;
346  //state->textTransformDelta(dx * state->getHorizScaling(), dy, &dx, &dy);
347  //state->transformDelta(dx, dy, &dx, &dy);
348  if (uLen == 1 && code == 0x20) {
349    //If we break a text sequence on space, then the X1 should be increased
350    //but the Y1 and Y2 should remain the same.
351    beginWord(state,X2+dx,Y2);
352  }
353  else {
354    X2    += dx;
355    Y2    += dy;
356    charLen = uMap->mapUnicode(*u,buf,sizeof(buf));
357    //Getting Unicode to libxml is something I need to fix.
358    //simply passing it using a bad-cast isn't working.
359    //I assume that CharCode code it the U+value of the unicode character
360    //But for a ligature code gives me DF which is the ringel-s, I guess
361    //code should be two bytes wide?
362    xmlNodeAddContentLen(N_word, BAD_CAST buf, charLen);
363  }
364}
365
366void ABWOutputDev::beginString(GfxState *state, GooString *s) {
367  double x,y;
368  //state->textTransform(x, y, &x, &y);
369  state->transform(state->getCurX(), state->getCurY(), &x, &y);
370  if (N_word) {
371    verDist = y-Y2;
372    horDist = x-X2;
373    //TEST:changed fabs(horDist) to horDist
374    //FIXME: this if statement seems awkward to me.
375    if (horDist > (state->getTransformedFontSize()*maxWordSpacing) || (fabs(verDist) > (state->getTransformedFontSize()/maxLineSpacingDelta))) {
376      beginTextBlock(state,x,y);
377    }
378    else {
379      if ((horDist > (state->getTransformedFontSize()*minWordBreakSpace)) || (fabs(verDist) > (state->getTransformedFontSize()/maxLineSpacingDelta))) {
380        beginWord(state,x,y);
381      }
382    }
383  }
384  else {
385  //This is the first word. Clear all values and call beginWord;
386    X2 = x;
387    Y2 = y;
388    horDist = 0;
389    verDist = 0;
390    height  = 0;
391    beginTextBlock(state,x,y);
392  }
393}
394
395void ABWOutputDev::endString(GfxState *state) {
396
397}
398
399void ABWOutputDev::beginWord(GfxState *state, double x, double y){
400  char buf[20];
401//  printf("***BREAK!***\n");
402  endWord();
403  X1 = x;
404  Y2 = y;
405
406  horDist = X1-X2;
407  verDist = Y1-Y2;
408
409  X2 = X1;
410  height = state->getFont()->getAscent() * state->getTransformedFontSize();
411  Y1 = Y2-height;
412
413  N_word = xmlNewChild(N_Block, NULL, BAD_CAST "word", NULL);
414  sprintf(buf, "%f", X1); xmlNewProp(N_word, BAD_CAST "X1", BAD_CAST buf);
415  sprintf(buf, "%f", Y1); xmlNewProp(N_word, BAD_CAST "Y1", BAD_CAST buf);
416  sprintf(buf, "%d", Style); xmlNewProp(N_word, BAD_CAST "style", BAD_CAST buf);
417}
418
419void ABWOutputDev::endWord(){
420  char buf[20];
421  if (N_word) {
422    sprintf(buf, "%f", X2);    xmlNewProp(N_word, BAD_CAST "X2", BAD_CAST buf);
423    sprintf(buf, "%f", Y2);    xmlNewProp(N_word, BAD_CAST "Y2", BAD_CAST buf);
424    sprintf(buf, "%f", X2-X1); xmlNewProp(N_word, BAD_CAST "width", BAD_CAST buf);
425    sprintf(buf, "%f", Y2-Y1); xmlNewProp(N_word, BAD_CAST "height", BAD_CAST buf);
426    N_word = NULL;
427  }
428}
429
430void ABWOutputDev::beginTextBlock(GfxState *state, double x, double y){
431  endTextBlock();
432  N_Block = xmlNewChild(N_page, NULL, BAD_CAST "Textblock", NULL);
433  beginWord(state,x,y);
434}
435
436void ABWOutputDev::endTextBlock(){
437  if (N_Block) {
438    endWord();
439    N_Block = NULL; 
440  }
441}
442/*
443This will be a function to retrieve coherent text blocks from the chunk tree.*/
444void ABWOutputDev::interpretXYTree(){
445  xmlNodePtr N_oldPage;
446  N_oldPage = N_page;
447  N_page = xmlNewNode(NULL, BAD_CAST "page");
448  N_column = N_page;
449  //xmlAddChild(N_content, N_page);
450  N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
451  ATP_recursive(N_oldPage);
452}
453
454void ABWOutputDev::ATP_recursive(xmlNodePtr N_parent){
455  xmlNodePtr N_first, N_second, N_line, N_tempCol, N_tempColset;
456
457  N_first  = N_parent->children;
458  if (!N_first)
459    return;
460
461  N_second = N_first->next;
462/*
463  Possibilities:
464  there is one child node
465    Because we cleaned up before the only case where we allow one childnode is
466    within Textblocks and textBlocks within 'vertical' nodes.
467      basically one text node means: add it to the current block.
468  There are two childnodes
469    This can be two verticals, two horizontals or one horizontal and a text node.
470    verticals:
471      If the first is vertical, the second is as well.
472      verticals mean: create a new Block, add a column per vertical make the
473      vertical the block and recurse inside.
474      then make the second vertical the block and recurse inside
475      then finish the block (ie. create a new one)
476    horizontal and or Textblocks
477        if first is textnode
478          add first to block
479          if second is textnode
480            at to block
481          else
482            call again
483        else
484          begin new block
485            call again
486          begin new block
487          if second is text node
488            add to block
489          else
490            call again
491  there are more then two child nodes
492    this can be a number of Textblocks and horizontals
493    add the textNodes to the current Block
494    if a horizontal is encountered enter it and generate a new block afterwards
495  */
496  //fprintf(stderr,"**********************************************************************\n");
497  //xmlSaveFormatFileEnc("-", doc, "UTF-8", 1);
498  switch (xmlLsCountNode(N_parent)) {
499  case 1:
500    //fprintf(stderr,"case 1\n");
501    N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
502    xmlUnlinkNode(N_first);
503    xmlAddChild(N_line, N_first);
504    break;
505  case 2:
506    //fprintf(stderr,"case 2\n");
507    if (xmlStrcasecmp(N_first->name,BAD_CAST "vertical") == 0){
508      //store the column for the moment
509      N_tempCol = N_column;
510      /*If we have three columns they will turn up in the tree as:
511      <vertical>
512        <vertical/>
513        <vertical/>
514      </vertical>
515      <vertical/>
516      */
517      //if the parent is a vertical as well, we can skip the colset generation
518      //thing here we can also remove the just added column and block, because
519      //these are going to replace them
520      if (xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") != 0){
521        //fprintf(stderr,"first time column\n");
522        N_tempColset = N_colset;
523        N_colset = xmlNewChild(N_column, NULL, BAD_CAST "colset", NULL);
524        N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL);
525        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
526      }
527      else {
528        //fprintf(stderr,"second time column\n");
529        xmlUnlinkNode(N_column);
530        N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL);
531        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
532      }
533      //fprintf(stderr,"Building first column...\n");
534      ATP_recursive(N_first);
535      N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL);
536      N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
537      //fprintf(stderr,"Building second column...\n");
538      ATP_recursive(N_second);
539      //make sure we end the column by continuing in the master column and
540      //setting the block and line to it
541      N_column = N_tempCol;
542      if (xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") != 0){
543        if (N_tempColset != NULL)
544          N_colset = N_tempColset;
545        else
546          fprintf(stderr,"N_templColset should not! be empty (line 823)");//FIXME: add assert
547      }
548    }
549    else {
550      if (xmlStrcasecmp(N_first->name,BAD_CAST "Textblock") == 0) {
551        //fprintf(stderr,"add first as textblock\n");
552        N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
553        xmlUnlinkNode(N_first);
554        xmlAddChild(N_line, N_first);
555        if (xmlStrcasecmp(N_second->name,BAD_CAST "Textblock") == 0) {
556          //fprintf(stderr,"add second as textblock\n");
557          //FIXME: this is not neat. We should ignore the cut ignoring when there are only two elements above
558          //line aggregation doesn't work anyway atm.
559          xmlUnlinkNode(N_second);
560          xmlAddChild(N_line, N_second);
561          //We have two textChunks that are going to be added to the line.
562          //the following statements make the line wrap around both textblocks
563          //if the firstX1 is smaller then the second X1 use the first, else use the second etc.
564        }
565        else {
566          //fprintf(stderr,"recursing into second\n");
567          ATP_recursive(N_second);
568        }
569      }
570      else {
571        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
572        //fprintf(stderr,"recursing into first\n");
573        ATP_recursive(N_first);
574        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
575        if (xmlStrcasecmp(N_second->name,BAD_CAST "Textblock") == 0) {
576          //fprintf(stderr,"add second as textblock\n");
577          N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
578          xmlUnlinkNode(N_second);
579          xmlAddChild(N_line, N_second);
580        }
581        else {
582          //fprintf(stderr,"recursing into second\n");
583          ATP_recursive(N_second);
584        }
585      }
586    }
587    break;
588  default:
589    //double tX1=0, tX2=0, tY1=0, tY2=0;
590    //fprintf(stderr,"case default\n");
591    N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
592    while (N_first){
593      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X1")) < tX1 ? tX1 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X1")) : tX1 = tX1;
594      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X2")) > tX2 ? tX2 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X2")) : tX2 = tX2;
595      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y1")) < tY1 ? tY1 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y1")) : tY1 = tY1;
596      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y2")) > tY2 ? tY2 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y2")) : tY1 = tY2;
597      N_second = N_first->next;
598      if (xmlStrcasecmp(N_first->name,BAD_CAST "Textblock") == 0){
599        xmlUnlinkNode(N_first);
600        xmlAddChild(N_line, N_first);
601      }
602      else { //fprintf(stderr,"This shouldn't happen! (line 700)\n");
603      }
604      N_first = N_second;
605    }
606    break;
607  }
608}
609
610/*The cleanup function. It started out as a simple function to remove empty nodes
611so that I could call xmladdnewchildnode as often as I liked so that I wouldn't get seg-faults
612It is now a bit more advanced, makes sure the tree is as it's supposed to be and adds information too*/
613void ABWOutputDev::cleanUpNode(xmlNodePtr N_parent, bool aggregateInfo){
614  double tX1=-1, tX2=-1, tY1=-1, tY2=-1;
615  xmlNodePtr N_cur, N_next;
616  N_cur = N_parent->children;
617  char buf[20];
618  int prevStyle = -1;
619  xmlChar *val;
620  int styleLength = xmlLsCountNode(N_styleset)+1;
621  float stylePos;
622  int *styles = new int[styleLength];
623  for (int i=1; i< styleLength; i++) { styles[i] = 0;}
624  /*
625  ignore two horizontal nodes with textBlocks right underneath them. They
626  signal the end of a chunk, and the horizontal seperation needs to be
627  preserved, because it means they are different lines. The second horizontal
628  therefore needs to be kept.
629  */
630  if ((xmlLsCountNode(N_parent) == 2)
631      &&
632     xmlStrcasecmp(N_parent->name,BAD_CAST "horizontal") == 0
633      && 
634     N_cur
635      &&
636     N_cur->next
637      &&
638     xmlStrcasecmp(N_cur->name,BAD_CAST "horizontal") == 0 && xmlStrcasecmp(N_cur->next->name,BAD_CAST "horizontal") == 0
639      &&
640     xmlLsCountNode(N_cur) == 1 && xmlLsCountNode(N_cur->next) == 1
641      &&
642     xmlStrcasecmp(N_cur->children->name,BAD_CAST "Textblock") == 0 && xmlStrcasecmp(N_cur->next->children->name,BAD_CAST "Textblock") == 0
643     ) {
644    xmlAddPrevSibling(N_cur->next,N_cur->children); 
645    xmlUnlinkNode(N_cur);
646  } 
647  /*
648  This removes columns if one of the parts is actually a single letter.
649  I found out I liked the columns better, so I have the code commented out.
650  */
651/*  else if ((xmlLsCountNode(N_parent) == 2)
652             &&
653            N_cur
654             &&
655            N_cur->next
656             &&
657            xmlStrcasecmp(N_cur->name,BAD_CAST "vertical") == 0
658             &&
659            xmlStrcasecmp(N_cur->next->name,BAD_CAST "vertical") == 0
660             &&
661            (N_cur->children)
662             &&
663            (N_cur->children->children)
664             &&
665            (N_cur->children->children->children)
666             &&
667            xmlStrlen(N_cur->children->children->children->content) == 1) {
668    N_next = N_cur->next;
669    xmlAddChild(N_parent, N_next->children);
670    xmlAddPrevSibling(N_next->children->children, N_cur->children);
671    xmlUnlinkNode(N_cur);
672    xmlUnlinkNode(N_next);
673  } */else {
674    while (N_cur){
675      N_next = N_cur->next;
676      cleanUpNode(N_cur, aggregateInfo);
677      if (xmlLsCountNode(N_cur) == 0 && (xmlStrcasecmp(N_cur->name,BAD_CAST "cbr") != 0) && (xmlStrcasecmp(N_cur->name,BAD_CAST "s") != 0))
678        xmlUnlinkNode(N_cur);
679      //If the node is still around
680      N_cur = N_next;
681    }
682  }
683  //If a countainer element has only one child, it can be removed except for vertical
684  //cuts with only one textElement;
685  //the main reason for this code is to remove the crumbs after cleaning up in the loop above
686  if ((xmlLsCountNode(N_parent) == 1) && ((xmlStrcasecmp(N_parent->name,BAD_CAST "horizontal") == 0) || ((xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") == 0) && (xmlStrcasecmp(N_parent->children->name,BAD_CAST "Textblock") != 0)))){
687    N_cur = N_parent->children;
688    xmlAddPrevSibling(N_parent,N_cur);
689    xmlUnlinkNode(N_parent);
690  }
691  //We cannot remove the page element so if it has only one childnode, we remove that childnode instead
692  if ((xmlStrcasecmp(N_parent->name,BAD_CAST "page") == 0) && (xmlLsCountNode(N_parent) == 1)) {
693    N_cur = N_parent->children->children;
694    while (N_cur){
695      N_next = N_cur->next;
696      xmlUnlinkNode(N_cur);
697      xmlAddChild(N_parent, N_cur);
698      N_cur = N_next;
699    }
700    xmlUnlinkNode(N_parent->children);
701  }
702  //Ok, so by this time the N_parent and his children are guaranteed to be clean
703  //this for loop gets information from the 'word' elements and propagates it up
704  //the tree.
705  if (aggregateInfo && xmlStrcasecmp(N_parent->name,BAD_CAST "word") != 0) {
706    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
707      val = xmlGetProp(N_cur,BAD_CAST "style");
708      stylePos = xmlXPathCastStringToNumber(val);
709      //fprintf(stderr,"1: %f, %d\n",stylePos,int(stylePos));
710      styles[int(stylePos)]=styles[int(stylePos)]+1;
711      //fprintf(stderr,"2: styles[%d] = %d\n",int(stylePos),styles[int(stylePos)]);
712      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1")) < tX1 || tX1 == -1)? tX1 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1")) : tX1 = tX1;
713      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2")) > tX2)             ? tX2 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2")) : tX2 = tX2;
714      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1")) < tY1 || tY1 == -1)? tY1 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1")) : tY1 = tY1;
715      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2")) > tY2)             ? tY2 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2")) : tY2 = tY2;
716    }
717    sprintf(buf, "%f", tX1);     xmlSetProp(N_parent, BAD_CAST "X1", BAD_CAST buf);
718    sprintf(buf, "%f", tX2);     xmlSetProp(N_parent, BAD_CAST "X2", BAD_CAST buf);
719    sprintf(buf, "%f", tY1);     xmlSetProp(N_parent, BAD_CAST "Y1", BAD_CAST buf);
720    sprintf(buf, "%f", tY2);     xmlSetProp(N_parent, BAD_CAST "Y2", BAD_CAST buf);
721    sprintf(buf, "%f", tX2-tX1); xmlSetProp(N_parent, BAD_CAST "width", BAD_CAST buf);
722    sprintf(buf, "%f", tY2-tY1); xmlSetProp(N_parent, BAD_CAST "height", BAD_CAST buf);
723    prevStyle = 0;
724    styles[0] = -1;
725    for (int i=1; i< styleLength; i++) { if (styles[i] > styles[prevStyle]) prevStyle = i; }
726    //fprintf(stderr,"%d\n", prevStyle);
727    if (prevStyle > 0){
728      sprintf(buf, "%d", prevStyle);     xmlSetProp(N_parent, BAD_CAST "style", BAD_CAST buf);
729    }
730  }
731  if (N_parent->children && xmlStrcasecmp(N_parent->children->name,BAD_CAST "line") == 0 && xmlGetProp(N_parent->children,BAD_CAST "alignment") != NULL)
732    xmlSetProp(N_parent, BAD_CAST "alignment", xmlGetProp(N_parent->children,BAD_CAST "alignment"));
733
734   delete styles;
735}
736
737void ABWOutputDev::generateParagraphs() {
738  xmlNodePtr N_cur, N_parent, N_p, N_line, N_next;
739  int lvl;
740  //basically I first detect the text-alignment within blocks.
741  //ASSUMPTION: my block seperation thing is good enough so I don't need to
742  //worry about two alignments in one paragraph
743 
744  X1 = 0;
745  X2 = pdfdoc->getPageCropWidth(G_pageNum);
746  Y1 = 0;
747  Y2 = pdfdoc->getPageCropHeight(G_pageNum);
748  addAlignment(N_page);
749 
750  //then it's a switch per alignement
751  N_cur = N_page->children;
752  N_parent = N_page;
753  lvl = 1;
754  while (N_cur) {
755    if (xmlStrcasecmp(N_cur->name,BAD_CAST "chunk") == 0){
756      N_p = xmlNewNode(NULL, BAD_CAST "chunk");
757      xmlAddPrevSibling(N_cur,N_p);
758      //N_p = xmlNewChild(N_parent, NULL, BAD_CAST "chunk", NULL);
759      //A new paragraph is created when:
760      switch (int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "alignment")))){
761      //left
762      case 1: //the distance between the texblock X2 and the last word X2 is more than
763         //the following first word width.
764         N_line = N_cur->children;
765         while (N_line){
766           N_next = N_line->next;
767           xmlUnlinkNode(N_line);
768           xmlAddChild(N_p,N_line);
769           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "1");
770           if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){
771             if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){
772               N_p = xmlNewNode(NULL, BAD_CAST "chunk");
773               xmlAddPrevSibling(N_cur,N_p);
774             }
775           }
776           N_line = N_next;
777         }
778         break;
779      //right
780      case 2: //the same but now with X1 and first word and following last word
781         N_line = N_cur->children;
782         while (N_line){
783           N_next = N_line->next;
784           xmlUnlinkNode(N_line);
785           xmlAddChild(N_p,N_line);
786           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "2");
787           if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){
788             //fprintf(stderr,"width_next=%f, X2_bl=%f, X2_w=%f\n",xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")));
789             if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){
790               N_p = xmlNewNode(NULL, BAD_CAST "chunk");
791               xmlAddPrevSibling(N_cur,N_p);
792             }
793           }
794           N_line = N_next;
795         }
796         break;
797      //centered
798      case 3: //the combined left and right space is more than the following first word
799         N_line = N_cur->children;
800         while (N_line){
801           N_next = N_line->next;
802           xmlUnlinkNode(N_line);
803           xmlAddChild(N_p,N_line);
804           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "3");
805           if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){
806             //fprintf(stderr,"width_next=%f, X2_bl=%f, X2_w=%f\n",xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")));
807             if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){
808               N_p = xmlNewNode(NULL, BAD_CAST "chunk");
809               xmlAddPrevSibling(N_cur,N_p);
810             }
811           }
812           N_line = N_next;
813         }
814         break;
815      //justified
816      case 4:
817         //we break on all alignment=1 lines. A line with alignment=1 that is the first of a block will
818         //also initiate a paragraph break before.
819         N_line = N_cur->children;
820         if (xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "alignment")) == 1){
821           N_p = xmlNewNode(NULL, BAD_CAST "chunk");
822           xmlAddPrevSibling(N_cur,N_p);
823         }
824         while (N_line){
825           N_next = N_line->next;
826           xmlUnlinkNode(N_line);
827           xmlAddChild(N_p,N_line);
828           if (xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "alignment")) == 1){
829             N_p = xmlNewNode(NULL, BAD_CAST "chunk");
830             xmlAddPrevSibling(N_cur,N_p);
831           }
832           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "4");
833           N_line = N_next;
834         }
835         break;
836      }
837    }
838    else if (xmlStrcasecmp(N_cur->name,BAD_CAST "colset") == 0 || xmlStrcasecmp(N_cur->name,BAD_CAST "column") == 0){
839      N_parent = N_cur;
840      N_cur = N_cur->children;
841      lvl++;
842      N_p = xmlNewNode(NULL, BAD_CAST "chunk");
843      xmlAddPrevSibling(N_cur,N_p);
844      continue;
845    }
846    if (N_cur->next)
847      N_cur = N_cur->next;
848    else while (lvl > 0){
849      N_cur = N_parent;
850      N_parent = N_cur->parent;
851      lvl--;
852      if (N_cur->next){
853        N_cur = N_cur->next;
854        break;
855      }
856    }
857    if (lvl==0)
858      N_cur = NULL;
859  }
860}
861
862//function that adds an 'alignment=' property to the <chunk>s
863void ABWOutputDev::addAlignment(xmlNodePtr N_parent) {
864  xmlNodePtr N_chunk, N_line;
865  double tX1, tX2;
866  bool leftMatch, rightMatch, centerMatch;
867  int leftCnt = 0, rightCnt = 0, cntrCnt = 0, justCnt = 0;
868  //fprintf(stderr,"Entering addAlignment\n");
869  for (N_chunk = N_parent->children; N_chunk; N_chunk = N_chunk->next) {
870    if (xmlStrcasecmp(N_chunk->name,BAD_CAST "chunk") == 0){
871      X1 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"));
872      X2 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"));
873      //fprintf(stderr,"Found chunk\n");
874      //if the chunk contains only one line, we don't need to loop through it.
875      if (xmlLsCountNode(N_chunk) == 1){
876        //fprintf(stderr,"Processing line\n");
877        //fprintf(stderr,"X1=%f, X2=%f, cX1=%f, cX2=%f\n",X1,X2,xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")), xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2")));
878        //fprintf(stderr,"%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")) - X1)-(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))));
879        //fprintf(stderr,"cX1-X1=%f, X2-cX2=%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")) - X1),(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))));
880        // a one line chunk, is either centered or left or right-aligned.
881        if ((xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"))-X1)-(X2-xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))) > 1) {
882          xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "2");
883          xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "2");
884          //fprintf(stderr,"alignment = right\n");
885        }
886        else { 
887        if ((xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"))-X1)-(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2")))< -1) {
888          xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "1");
889          xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "1");
890          //fprintf(stderr,"alignment = left\n");
891        }
892        else {
893          xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "3");
894          xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "3");
895          //fprintf(stderr,"alignment = center\n");
896        }
897        }
898      }
899      else {
900      leftCnt = 0;
901      rightCnt = 0;
902      cntrCnt = 0;
903      justCnt = 0;
904      for (N_line = N_chunk->children; N_line; N_line = N_line->next) {
905        //fprintf(stderr,"Processing line\n");
906        /*
907        |X1 - cX1| == 1
908        |X2 - cX2| == 1
909        |(cX1-X1)-(X2-cX2)| == 1
910        ok, each line can be just as wide as the current set,
911        it can be smaller and moved to the right
912        it can be smaller and moved to the left.
913        it can
914        */
915        //fprintf(stderr,"X1=%f, X2=%f, cX1=%f, cX2=%f\n",X1,X2,xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1")), xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2")));
916        //fprintf(stderr,"cX1-X1=%f, X2-cX2=%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1")) - X1),(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2"))));
917        leftMatch =  fabs(xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1"))-X1) < 2;
918        rightMatch =  fabs(X2-xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2"))) < 2;
919        centerMatch =  fabs((xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1"))-X1)-(X2-xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2")))) < 2;
920        if (leftMatch && rightMatch) {
921          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "4");
922          justCnt++;
923        }
924        else if (centerMatch) {
925          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "3");
926          cntrCnt++;
927        }
928        else if (rightMatch) {
929          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "2");
930          rightCnt++;
931        }
932        else {
933          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "1");
934          leftCnt++;
935        }
936      }
937      //there is almost always one justified line in a centered text
938      //and most justified blocks have at least one left aligned line
939      //fprintf(stderr,"1:%d ,2:%d ,3:%d ,4:%d\n",leftCnt,justCnt,cntrCnt,rightCnt);
940      if ((leftCnt-1 >= justCnt) && (leftCnt >= rightCnt) && (leftCnt >= cntrCnt))
941        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "1");
942      else if ((justCnt >= leftCnt-1) && (justCnt >= rightCnt) && (justCnt >= cntrCnt))
943        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "4");
944      else if ((cntrCnt >= justCnt-1) && (cntrCnt >= rightCnt) && (cntrCnt >= leftCnt))
945        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "3");
946      else
947        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "2");
948      }
949    } 
950    else {
951      if (xmlStrcasecmp(N_chunk->name,BAD_CAST "colset") == 0){
952        //fprintf(stderr,"Found a colset\n");
953        addAlignment(N_chunk);
954      }
955      else {
956        if (xmlStrcasecmp(N_chunk->name,BAD_CAST "column") == 0){
957          //fprintf(stderr,"Found a column\n");
958          tX1 = X1;
959          tX2 = X2;
960          X1 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"));
961          X2 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"));
962          addAlignment(N_chunk);
963          X1 = tX1;
964          X2 = tX2;
965        }
966        else { //fprintf(stderr,"Found something else\n");
967        }
968      }
969    }
970  }
971//parse all blocks, and all lines within all blocks
972//do a set of checks and tick a flag if the check fails
973//check for line X1 is textBlock X1
974//check for line X2 is textblock X2
975//check if line is centered in textBock (LX1 != TX1 && LX2 != TX2 && LX1-TX1 == TX2=LX2)
976//if the LX1 != TX1 then how much is the difference?
977//a line isn't left aligned if all lines have a different X1 <= not so strong assumption.
978
979//justified if both are straight except for a couple of (same factor sized) indents at the left
980//else centered if above calculation is correct
981//else left aligned if left side is more straight than right (more lines in the same X1 or common factor
982//else right
983}
984
985void ABWOutputDev::setPDFDoc(PDFDoc *priv_pdfdoc) {
986  pdfdoc = priv_pdfdoc;
987}
988
989void ABWOutputDev::createABW() {
990  //*************************************************************
991  //change styles to abiword format
992  xmlNodePtr N_cur, N_next;
993  xmlAttrPtr N_prop;
994  char buf[500];
995  for (N_cur = N_styleset->children; N_cur; N_cur = N_cur->next){
996    sprintf(buf,"margin-top:0pt; color:000000; margin-left:0pt; text-position:normal; widows:2; text-indent:0in; font-variant:normal; margin-right:0pt; lang:nl-NL; line-height:1.0; font-size:%dpt; text-decoration:none; margin-bottom:0pt; bgcolor:transparent; text-align:left; font-stretch:normal;",int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "size"))));
997    strncat(buf,"font-family:",12);
998    strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "font"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "font")));
999    strncat(buf,";",1);
1000    strncat(buf,"font-weight:",12);
1001    strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "bold"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "bold")));
1002    strncat(buf,"font-style:",12);
1003    strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "italic"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "italic")));
1004    xmlSetProp(N_cur, BAD_CAST "props", BAD_CAST buf);
1005    N_prop = xmlHasProp(N_cur, BAD_CAST "id");
1006    if (N_prop != NULL) xmlRemoveProp(N_prop);
1007    N_prop = xmlHasProp(N_cur, BAD_CAST "size");
1008    if (N_prop != NULL) xmlRemoveProp(N_prop);
1009    N_prop = xmlHasProp(N_cur, BAD_CAST "bold");
1010    if (N_prop != NULL) xmlRemoveProp(N_prop);
1011    N_prop = xmlHasProp(N_cur, BAD_CAST "italic");
1012    if (N_prop != NULL) xmlRemoveProp(N_prop);
1013    N_prop = xmlHasProp(N_cur, BAD_CAST "font");
1014    if (N_prop != NULL) xmlRemoveProp(N_prop);
1015  }
1016  //*************************************************************
1017  //Change the rest of the document
1018  //each child of N_content is a page
1019  N_cur = N_content->children;
1020  while (N_cur){
1021    //we creat a section node and attach it to the root, it will com after all
1022    //the page nodes. Then we transform the page, and finally remove it
1023    N_next = N_cur->next;
1024    //fprintf(stderr,"***Transforming page\n");
1025    N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL);
1026    transformPage(N_cur);
1027    xmlUnlinkNode(N_cur);
1028    //fprintf(stderr,"***Finished transforming page\n");
1029    N_cur = N_next;
1030  }
1031  cleanUpNode(N_root, false);
1032}
1033
1034void ABWOutputDev::transformPage(xmlNodePtr N_parent){
1035  char buf[60];
1036  xmlNodePtr N_cur, N_curLine, N_curText, N_curWord, text, space;
1037  //translate the nodes into abiword nodes
1038  if (xmlStrcasecmp(N_parent->name,BAD_CAST "page") == 0){
1039    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
1040      //fprintf(stderr,"**pass a page child\n");
1041      transformPage(N_cur);
1042    }
1043  }
1044  if (xmlStrcasecmp(N_parent->name,BAD_CAST "chunk") == 0){
1045    //fprintf(stderr,"Found a chunk\n");
1046    //I start a <p> on each chunk and add all word containment
1047    N_text = xmlNewChild(N_Block, NULL, BAD_CAST "p", NULL);
1048    if (int(xmlXPathCastStringToNumber(xmlGetProp(N_parent,BAD_CAST "style"))) > 0){
1049      xmlNewProp(N_text, BAD_CAST "style", xmlGetProp(N_parent,BAD_CAST "style"));
1050    }
1051    switch (int(xmlXPathCastStringToNumber(xmlGetProp(N_parent,BAD_CAST "alignment")))){
1052    case 1: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:left");
1053           break;
1054    case 2: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:right");
1055           break;
1056    case 3: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:center");
1057           break;
1058    case 4: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:justify");
1059           break;
1060    }
1061    for (N_curLine = N_parent->children; N_curLine; N_curLine = N_curLine->next){
1062      //fprintf(stderr,"A line\n");
1063      for (N_curText = N_curLine->children; N_curText; N_curText = N_curText->next){
1064        //fprintf(stderr,"a textNode\n");
1065        for (N_curWord = N_curText->children; N_curWord; N_curWord = N_curWord->next){
1066          //fprintf(stderr,"a word\n");
1067          text = N_curWord->children;
1068          xmlUnlinkNode(text);
1069          xmlAddChild(N_text,text);
1070          space = xmlNewText(BAD_CAST " ");
1071          xmlAddChild(N_text,space);
1072        }
1073      }
1074    }
1075  }
1076  if (xmlStrcasecmp(N_parent->name,BAD_CAST "column") == 0){
1077    //fprintf(stderr,"Found a column\n");
1078    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
1079      transformPage(N_cur);
1080    }
1081    xmlNewChild(N_text, NULL, BAD_CAST "cbr", NULL);
1082  }
1083  if (xmlStrcasecmp(N_parent->name,BAD_CAST "colset") == 0){
1084    //fprintf(stderr,"Found a colset\n");
1085    //create new section columns: count childNodes of N_cur
1086    //recurse through chunks and create textNodes
1087    N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL);
1088    sprintf(buf,"columns:%d",xmlLsCountNode(N_parent));
1089    xmlNewProp(N_Block, BAD_CAST "props", BAD_CAST buf);
1090    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
1091      transformPage(N_cur);
1092    }
1093    N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL);
1094  }
1095  //fprintf(stderr,"at the end\n");
1096}
1097
1098//Count nodes, copied from debugxml.c from libxml
1099// libxml copyright file below
1100/*
1101Except where otherwise noted in the source code (e.g. the files hash.c,
1102list.c and the trio files, which are covered by a similar licence but
1103with different Copyright notices) all the files are:
1104
1105 Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
1106
1107Permission is hereby granted, free of charge, to any person obtaining a copy
1108of this software and associated documentation files (the "Software"), to deal
1109in the Software without restriction, including without limitation the rights
1110to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1111copies of the Software, and to permit persons to whom the Software is fur-
1112nished to do so, subject to the following conditions:
1113
1114The above copyright notice and this permission notice shall be included in
1115all copies or substantial portions of the Software.
1116
1117THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1118IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
1119NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
1120DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
1121IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON-
1122NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1123
1124Except as contained in this notice, the name of Daniel Veillard shall not
1125be used in advertising or otherwise to promote the sale, use or other deal-
1126ings in this Software without prior written authorization from him.
1127*/
1128int ABWOutputDev::xmlLsCountNode(xmlNodePtr node) {
1129  int ret = 0;
1130  xmlNodePtr list = NULL;
1131
1132  if (node == NULL)
1133    return(0);
1134
1135  switch (node->type) {
1136    case XML_ELEMENT_NODE:
1137      list = node->children;
1138      break;
1139    case XML_DOCUMENT_NODE:
1140    case XML_HTML_DOCUMENT_NODE:
1141#ifdef LIBXML_DOCB_ENABLED
1142    case XML_DOCB_DOCUMENT_NODE:
1143#endif
1144      list = ((xmlDocPtr) node)->children;
1145      break;
1146    case XML_ATTRIBUTE_NODE:
1147      list = ((xmlAttrPtr) node)->children;
1148      break;
1149    case XML_TEXT_NODE:
1150    case XML_CDATA_SECTION_NODE:
1151    case XML_PI_NODE:
1152    case XML_COMMENT_NODE:
1153      if (node->content != NULL) {
1154        ret = xmlStrlen(node->content);
1155      }
1156      break;
1157    case XML_ENTITY_REF_NODE:
1158    case XML_DOCUMENT_TYPE_NODE:
1159    case XML_ENTITY_NODE:
1160    case XML_DOCUMENT_FRAG_NODE:
1161    case XML_NOTATION_NODE:
1162    case XML_DTD_NODE:
1163    case XML_ELEMENT_DECL:
1164    case XML_ATTRIBUTE_DECL:
1165    case XML_ENTITY_DECL:
1166    case XML_NAMESPACE_DECL:
1167    case XML_XINCLUDE_START:
1168    case XML_XINCLUDE_END:
1169      ret = 1;
1170      break;
1171  }
1172  for (;list != NULL;ret++) 
1173    list = list->next;
1174  return(ret);
1175}
Note: See TracBrowser for help on using the repository browser.