@@ -318,6 +318,34 @@ void reference_makeNewInterval(reference *ref, int64_t firstNode, int64_t lastNo
318318 stList_append (ref -> referenceIntervals , rTF );
319319}
320320
321+ void reference_removeIntervals (reference * ref , stSortedSet * firstNodesOfIntervalsToRemove ) {
322+ stList * updatedReferenceIntervals = stList_construct ();
323+ for (int64_t i = 0 ;i < stList_length (ref -> referenceIntervals ); i ++ ) {
324+ int64_t node = reference_getFirstOfInterval (ref , i );
325+ referenceTerm * rT = reference_getTerm (ref , node );
326+ assert (rT != NULL );
327+ stIntTuple * j = stIntTuple_construct1 (node );
328+ if (stSortedSet_search (firstNodesOfIntervalsToRemove , j ) == NULL ) { //It is not in the list of intervals to delete,
329+ //so we add it to the list of intervals to keep
330+ stList_append (updatedReferenceIntervals , reference_getTerm (ref , node ));
331+ }
332+ else {
333+ //Release the term from the node array
334+ assert (rT -> pTerm == NULL );
335+ while (rT != NULL ) {
336+ ref -> nodesInGraph [llabs (rT -> node )- 1 ] = NULL ;
337+ referenceTerm * rTP = rT ;
338+ rT = rT -> nTerm ;
339+ assert (rT == NULL || rT -> pTerm == rTP );
340+ free (rTP );
341+ }
342+ }
343+ stIntTuple_destruct (j );
344+ }
345+ stList_destruct (ref -> referenceIntervals );
346+ ref -> referenceIntervals = updatedReferenceIntervals ;
347+ }
348+
321349void reference_insertNode (reference * ref , int64_t pNode , int64_t node ) {
322350 referenceTerm * rT = st_malloc (sizeof (referenceTerm )), * rTP ;
323351 rT -> node = node ;
@@ -459,33 +487,60 @@ void reference_log(reference *ref) {
459487 }
460488}
461489
490+ static void setFirstPointer (referenceTerm * term , referenceTerm * firstTerm ) {
491+ assert (firstTerm -> pTerm == NULL );
492+ assert (firstTerm -> first == firstTerm );
493+ do {
494+ term -> first = firstTerm ;
495+ term = term -> nTerm ;
496+ } while (term != NULL );
497+ }
498+
499+ void reference_translocateIntervals (reference * ref , int64_t pNode1 , int64_t nNode2 ) {
500+ referenceTerm * pNode1Term = reference_getTerm (ref , pNode1 );
501+ assert (pNode1Term != NULL );
502+ referenceTerm * nNode1Term = pNode1Term -> nTerm ;
503+ assert (nNode1Term != NULL ); //pNode1 is not a stub end.
504+ referenceTerm * nNode2Term = reference_getTerm (ref , nNode2 );
505+ referenceTerm * pNode2Term = nNode2Term -> pTerm ;
506+ assert (pNode2Term != NULL ); //nNode2 is not a stub end.
507+ //The critical translocation lines
508+ pNode1Term -> nTerm = nNode2Term ;
509+ nNode2Term -> pTerm = pNode1Term ;
510+ pNode2Term -> nTerm = nNode1Term ;
511+ nNode1Term -> pTerm = pNode2Term ;
512+ //Correct the "first" pointers.
513+ assert (nNode1Term -> first == pNode1Term -> first );
514+ assert (nNode2Term -> first == pNode2Term -> first );
515+ setFirstPointer (nNode1Term , pNode2Term -> first );
516+ setFirstPointer (nNode2Term , pNode1Term -> first );
517+ }
518+
462519void reference_splitInterval (reference * ref , int64_t pNode , int64_t stub1 , int64_t stub2 ) {
463520 assert (reference_getNext (ref , pNode ) != INT64_MAX );
464521 assert (!reference_inGraph (ref , stub1 ));
465522 assert (!reference_inGraph (ref , stub2 ));
466523 assert (stub1 != stub2 );
467524 reference_makeNewInterval (ref , stub2 , stub1 );
468- referenceTerm * pNodeTerm = reference_getTerm (ref , pNode );
469- assert (pNodeTerm != NULL );
470- referenceTerm * nNodeTerm = pNodeTerm -> nTerm ;
471- assert (nNodeTerm != NULL ); //Is not a stub end.
472- referenceTerm * stub1Term = reference_getTerm (ref , stub1 );
473- referenceTerm * stub2Term = stub1Term -> pTerm ;
474- assert (stub2Term != NULL && stub2Term == reference_getTerm (ref , stub2 ));
475- assert (stub1Term -> nTerm == NULL );
476- assert (stub2Term -> pTerm == NULL );
477- pNodeTerm -> nTerm = stub1Term ;
478- stub1Term -> pTerm = pNodeTerm ;
479- stub2Term -> nTerm = nNodeTerm ;
480- nNodeTerm -> pTerm = stub2Term ;
481- assert (stub1Term -> first == stub2Term );
482- stub1Term -> first = pNodeTerm -> first ;
483- assert (nNodeTerm -> first == pNodeTerm -> first );
484- do {
485- nNodeTerm -> first = stub2Term ;
486- nNodeTerm = nNodeTerm -> nTerm ;
487- } while (nNodeTerm != NULL );
488- assert (stub2Term -> first == stub2Term );
525+ reference_translocateIntervals (ref , pNode , stub1 );
526+ }
527+
528+ /*
529+ * Returns the integer value of the absolute highest valued node in the reference.
530+ */
531+ int64_t reference_getMaximumNode (reference * ref ) {
532+ int64_t maxNode = INT64_MIN ;
533+ for (int64_t interval = 0 ; interval < reference_getIntervalNumber (ref ); interval ++ ) {
534+ int64_t node = reference_getFirstOfInterval (ref , interval );
535+ while (node != INT64_MAX ) {
536+ if (llabs (node ) > maxNode ) {
537+ maxNode = llabs (node );
538+ }
539+ node = reference_getNext (ref , node );
540+ }
541+ }
542+ assert (maxNode != INT64_MAX );
543+ return maxNode ;
489544}
490545
491546/*
@@ -774,6 +829,7 @@ static insertPoint *connectedNodes_popBestInsert(connectedNodes *cN, refAdjList
774829 cN -> misses ++ ;
775830 free (iP );
776831 }
832+ return NULL ;
777833}
778834
779835/*
@@ -1049,46 +1105,74 @@ void reorderReferenceToAvoidBreakpoints(refAdjList *aL, reference *ref) {
10491105 }
10501106}
10511107
1052- void translocateInterval (reference * ref , int64_t n , int64_t length , int64_t pNode ) {
1053- assert (pNode != INT64_MAX );
1054- assert (n != INT64_MAX );
1055- int64_t i = 0 ;
1056- while (i ++ < length ) {
1057- int64_t m = reference_getNext (ref , n );
1058- assert (m != INT64_MAX );
1059- reference_removeNode (ref , n );
1060- reference_insertNode (ref , pNode , n );
1061- pNode = n ;
1062- n = m ;
1063- }
1064- }
1065-
1066- int64_t getShortestInterval (reference * ref ) {
1067- int64_t shortestInterval = INT64_MAX ;
1068- int64_t shortestIntervalLength = INT64_MAX ;
1069- for (int64_t i = 0 ; i < reference_getIntervalNumber (ref ); i ++ ) {
1070- int64_t n = reference_getFirstOfInterval (ref , i );
1071- int64_t length = reference_getRemainingIntervalLength (ref , n );
1072- if (length == 0 ) {
1073- return n ;
1108+ stList * splitReferenceAtIndicatedLocations (reference * ref , bool (* refSplitFn )(int64_t , reference * , void * ), void * extraArgs ) {
1109+ int64_t indexOfFirstNewStub = reference_getMaximumNode (ref )+ 1 ; //The new stubs need to be unique in the reference
1110+ assert (indexOfFirstNewStub < INT64_MAX );
1111+ assert (!reference_inGraph (ref , indexOfFirstNewStub ));
1112+ stList * newStubs = stList_construct3 (0 , (void (* )(void * ))stIntTuple_destruct );
1113+ for (int64_t interval = reference_getIntervalNumber (ref )- 1 ; interval >=0 ; interval -- ) { //Iterate over only the old intervals, not those involving new stubs.
1114+ int64_t pNode = reference_getFirstOfInterval (ref , interval );
1115+ int64_t node = reference_getNext (ref , pNode );
1116+ while (node != INT64_MAX ) {
1117+ if (refSplitFn (pNode , ref , extraArgs )) { //Determine if a split is needed.
1118+ int64_t newStub1 = indexOfFirstNewStub ++ ;
1119+ int64_t newStub2 = indexOfFirstNewStub ++ ;
1120+ stList_append (newStubs , stIntTuple_construct1 (newStub1 ));
1121+ stList_append (newStubs , stIntTuple_construct1 (- newStub2 )); //Invert the sign, because we use signs to refer to sides of a node
1122+ reference_splitInterval (ref , pNode , newStub1 , newStub2 );
1123+ }
1124+ pNode = node ;
1125+ node = reference_getNext (ref , pNode );
1126+ }
1127+ }
1128+ return newStubs ;
1129+ }
1130+
1131+ static void removeStub (stSortedSet * extraStubNodesSet , int64_t node ) {
1132+ stIntTuple * stub = stIntTuple_construct1 (node );
1133+ assert (stSortedSet_search (extraStubNodesSet , stub ) != NULL );
1134+ stSortedSet_remove (extraStubNodesSet , stub );
1135+ stIntTuple_destruct (stub );
1136+ }
1137+
1138+ stList * remakeReferenceIntervals (reference * ref , stList * referenceIntervalsToPreserve , stList * extraStubNodes ) {
1139+ stSortedSet * extraStubNodesSet = stList_getSortedSet (extraStubNodes , (int (* )(const void * , const void * ))stIntTuple_cmpFn );
1140+ stSortedSet * firstNodesOfIntervalsToRemove = stSortedSet_construct3 ((int (* )(const void * , const void * ))stIntTuple_cmpFn , (void (* )(void * ))stIntTuple_destruct );
1141+ for (int64_t i = 0 ; i < stList_length (referenceIntervalsToPreserve ); i ++ ) {
1142+ stIntTuple * intervalToPreserve = stList_get (referenceIntervalsToPreserve , i );
1143+ int64_t startNode = stIntTuple_get (intervalToPreserve , 0 );
1144+ assert (reference_getFirst (ref , startNode ) == startNode );
1145+ int64_t endNode = stIntTuple_get (intervalToPreserve , 1 );
1146+ assert (reference_getLast (ref , endNode ) == endNode );
1147+ int64_t nodeAdjacentStartNode = reference_getLast (ref , startNode );
1148+ int64_t nodeAdjacentEndNode = reference_getFirst (ref , endNode );
1149+ if (nodeAdjacentStartNode != endNode ) { //We have identified a case where we need to scaffold across a break.
1150+ assert (llabs (nodeAdjacentStartNode ) != llabs (endNode ));
1151+ //void reference_translocateIntervals(reference *ref, int64_t pNode1, int64_t nNode2)
1152+ reference_translocateIntervals (ref , reference_getPrevious (ref , nodeAdjacentStartNode ), reference_getNext (ref , nodeAdjacentEndNode ));
1153+ //Check we have the correct connectivity
1154+ assert (reference_getLast (ref , startNode ) == endNode );
1155+ assert (reference_getFirst (ref , endNode ) == startNode );
1156+ assert (reference_getLast (ref , nodeAdjacentEndNode ) == nodeAdjacentStartNode );
1157+ assert (reference_getNext (ref , nodeAdjacentEndNode ) == nodeAdjacentStartNode );
1158+ assert (reference_getFirst (ref , nodeAdjacentStartNode ) == nodeAdjacentEndNode );
1159+ assert (reference_getPrevious (ref , nodeAdjacentStartNode ) == nodeAdjacentEndNode );
1160+ //Append to list of stub intervals to delete.
1161+ assert (reference_getFirst (ref , nodeAdjacentEndNode ) == nodeAdjacentEndNode );
1162+ stSortedSet_insert (firstNodesOfIntervalsToRemove , stIntTuple_construct1 (nodeAdjacentEndNode ));
1163+ //Delete the stubs
1164+ removeStub (extraStubNodesSet , nodeAdjacentStartNode );
1165+ removeStub (extraStubNodesSet , - nodeAdjacentEndNode ); //The minus sign is to refer to the 3' end of the node.
10741166 }
1075- if (length < shortestIntervalLength ) {
1076- shortestInterval = n ;
1077- shortestIntervalLength = length ;
1078- }
1079- }
1080- return shortestInterval ;
1081- }
1082-
1083- void reorderToAvoidOverlargeChromosome (reference * ref , bool (* tooLarge )(reference * , int64_t n )) {
1084- for (int64_t i = 0 ; i < reference_getIntervalNumber (ref ); i ++ ) {
1085- int64_t n = reference_getFirstOfInterval (ref , i );
1086- int64_t length = reference_getRemainingIntervalLength (ref , n );
1087- if (tooLarge (ref , n ) && length > 2 ) {
1088- int64_t m = getShortestInterval (ref );
1089- if (m != n ) {
1090- translocateInterval (ref , reference_getNext (ref , n ), length /2 , m );
1091- }
1167+ else {
1168+ assert (nodeAdjacentEndNode == startNode );
10921169 }
10931170 }
1171+ //Remove the old stub intervals
1172+ reference_removeIntervals (ref , firstNodesOfIntervalsToRemove );
1173+ stSortedSet_destruct (firstNodesOfIntervalsToRemove );
1174+ //Revise list of extra stub nodes.
1175+ extraStubNodes = stSortedSet_getList (extraStubNodesSet );
1176+ stSortedSet_destruct (extraStubNodesSet );
1177+ return extraStubNodes ;
10941178}
0 commit comments