start_list_test.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. package e2e
  2. import (
  3. "context"
  4. "fmt"
  5. "syscall"
  6. "testing"
  7. )
  8. // TestStartAndList verifies self-registration and default.json semantics in a fresh CLINE_DIR.
  9. func TestStartAndList(t *testing.T) {
  10. clineDir := setTempClineDir(t)
  11. t.Logf("Using temp CLINE_DIR: %s", clineDir)
  12. ctx, cancel := context.WithTimeout(context.Background(), longTimeout)
  13. defer cancel()
  14. t.Logf("Starting new instance...")
  15. // Start a new instance
  16. startOutput := mustRunCLI(ctx, t, "instance", "new")
  17. t.Logf("Instance start output: %s", startOutput)
  18. t.Logf("Listing instances to check registration...")
  19. // It should appear healthy in list JSON and be the default.
  20. out := listInstancesJSON(ctx, t)
  21. t.Logf("Found %d instances after start", len(out.CoreInstances))
  22. if len(out.CoreInstances) != 1 {
  23. t.Fatalf("expected 1 instance, got %d", len(out.CoreInstances))
  24. }
  25. addr := out.CoreInstances[0].Address
  26. t.Logf("Instance address: %s, status: %s", addr, out.CoreInstances[0].Status)
  27. t.Logf("Waiting for address %s to become healthy...", addr)
  28. waitForAddressHealthy(t, addr, defaultTimeout)
  29. t.Logf("Address %s is now healthy", addr)
  30. t.Logf("Checking default instance configuration...")
  31. // Default should be set to the new instance.
  32. out = listInstancesJSON(ctx, t)
  33. t.Logf("Default instance: %s", out.DefaultInstance)
  34. if out.DefaultInstance == "" {
  35. t.Fatalf("default_instance not set")
  36. }
  37. if out.DefaultInstance != out.CoreInstances[0].Address {
  38. t.Fatalf("expected default_instance=%s, got %s", out.CoreInstances[0].Address, out.DefaultInstance)
  39. }
  40. t.Logf("TestStartAndList completed successfully")
  41. }
  42. // TestTaskNewDefault ensures tasks route to default instance.
  43. func TestTaskNewDefault(t *testing.T) {
  44. _ = setTempClineDir(t)
  45. ctx, cancel := context.WithTimeout(context.Background(), longTimeout)
  46. defer cancel()
  47. // Start one instance and wait for healthy
  48. _ = mustRunCLI(ctx, t, "instance", "new")
  49. out := listInstancesJSON(ctx, t)
  50. if len(out.CoreInstances) != 1 {
  51. t.Fatalf("expected 1 instance, got %d", len(out.CoreInstances))
  52. }
  53. addr := out.CoreInstances[0].Address
  54. waitForAddressHealthy(t, addr, defaultTimeout)
  55. // Create a new task at default (success is sufficient)
  56. _ = mustRunCLI(ctx, t, "task", "new", "hello world")
  57. }
  58. // TestExplicitAddressAutoStart verifies that giving an explicit address auto-starts an instance and routes the task.
  59. func TestExplicitAddressAutoStart(t *testing.T) {
  60. _ = setTempClineDir(t)
  61. ctx, cancel := context.WithTimeout(context.Background(), longTimeout)
  62. defer cancel()
  63. // Find a free port and use explicit address. This should auto-start an instance.
  64. port := findFreePort(t)
  65. addr := "localhost:" + itoa(port)
  66. // Run a task at explicit address (auto-start path)
  67. _ = mustRunCLI(ctx, t, "task", "new", "--address", "localhost:"+itoa(port), "explicit address task")
  68. // Verify the instance is present and healthy
  69. waitForAddressHealthy(t, addr, defaultTimeout)
  70. }
  71. // TestCrashCleanup verifies that after SIGKILL of a local core, the cleanup removes the registry entry.
  72. // Also tests graceful shutdown (SIGTERM) vs crash cleanup and ensures no dangling host processes.
  73. func TestCrashCleanup(t *testing.T) {
  74. _ = setTempClineDir(t)
  75. ctx, cancel := context.WithTimeout(context.Background(), longTimeout)
  76. defer cancel()
  77. // Start two instances for testing both graceful and crash scenarios
  78. _ = mustRunCLI(ctx, t, "instance", "new")
  79. _ = mustRunCLI(ctx, t, "instance", "new")
  80. out := listInstancesJSON(ctx, t)
  81. if len(out.CoreInstances) < 2 {
  82. t.Fatalf("expected at least 2 instances, got %d", len(out.CoreInstances))
  83. }
  84. // Test 1: Graceful shutdown (SIGTERM) - should clean up both processes
  85. gracefulTarget := out.CoreInstances[0]
  86. waitForAddressHealthy(t, gracefulTarget.Address, defaultTimeout)
  87. // Get PID using runtime discovery
  88. gracefulPID := getCorePID(t, gracefulTarget.Address)
  89. if gracefulPID <= 0 {
  90. t.Fatalf("could not find PID for graceful target at %s", gracefulTarget.Address)
  91. }
  92. t.Logf("Testing graceful shutdown (SIGTERM) for instance %s (PID %d)", gracefulTarget.Address, gracefulPID)
  93. if err := syscall.Kill(gracefulPID, syscall.SIGTERM); err != nil {
  94. t.Fatalf("kill SIGTERM pid %d: %v", gracefulPID, err)
  95. }
  96. // Wait for registry cleanup
  97. waitForAddressRemoved(t, gracefulTarget.Address, longTimeout)
  98. // Verify both core and host ports are freed (no dangling processes)
  99. waitForPortsClosed(t, gracefulTarget.CorePort(), gracefulTarget.HostPort(), defaultTimeout)
  100. // Verify the instance is removed from SQLite (no file to check anymore)
  101. // The waitForAddressRemoved already confirms the instance is gone from the registry
  102. // Test 2: Crash cleanup (SIGKILL) - creates dangling host process that we must clean up
  103. crashTarget := out.CoreInstances[1]
  104. waitForAddressHealthy(t, crashTarget.Address, defaultTimeout)
  105. // Get PID using runtime discovery
  106. crashPID := getCorePID(t, crashTarget.Address)
  107. if crashPID <= 0 {
  108. t.Fatalf("could not find PID for crash target at %s", crashTarget.Address)
  109. }
  110. t.Logf("Testing crash cleanup (SIGKILL) for instance %s (PID %d)", crashTarget.Address, crashPID)
  111. if err := syscall.Kill(crashPID, syscall.SIGKILL); err != nil {
  112. t.Fatalf("kill SIGKILL pid %d: %v", crashPID, err)
  113. }
  114. // Wait for registry cleanup
  115. waitForAddressRemoved(t, crashTarget.Address, longTimeout)
  116. // Verify the instance is removed from SQLite (no file to check anymore)
  117. // The waitForAddressRemoved already confirms the instance is gone from the registry
  118. // Clean up dangling host process (SIGKILL leaves these behind by design)
  119. t.Logf("Cleaning up dangling host process %s", crashTarget.HostServiceAddress)
  120. findAndKillHostProcess(t, crashTarget.HostPort())
  121. // Verify both ports are now free
  122. waitForPortsClosed(t, crashTarget.CorePort(), crashTarget.HostPort(), defaultTimeout)
  123. }
  124. // itoa is a small helper for readability
  125. func itoa(i int) string {
  126. return strconvItoa(i)
  127. }
  128. // minimal inline int->string to avoid extra imports in helpers
  129. func strconvItoa(i int) string {
  130. // simple fast path
  131. return fmtInt(i)
  132. }
  133. func fmtInt(i int) string {
  134. // allocate small buffer; ints here are short
  135. return (func(n int) string {
  136. return fmt.Sprintf("%d", n)
  137. })(i)
  138. }